diff options
Diffstat (limited to 'tools/testing/selftests')
235 files changed, 10468 insertions, 1427 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index bc3299a20338..fb010a35d61a 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -49,6 +49,7 @@ TARGETS += proc TARGETS += pstore TARGETS += ptrace TARGETS += openat2 +TARGETS += rlimits TARGETS += rseq TARGETS += rtc TARGETS += seccomp diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c index b29cbc642c57..76e138525d55 100644 --- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c +++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c @@ -25,7 +25,7 @@ int main(int argc, char **argv) ksft_set_plan(2); if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) - ksft_exit_skip("SVE not available"); + ksft_exit_skip("SVE not available\n"); /* * Enumerate up to SVE_VQ_MAX vector lengths diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 4866f6a21901..addcfd8b615e 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -10,6 +10,7 @@ FEATURE-DUMP.libbpf fixdep test_dev_cgroup /test_progs* +!test_progs.h test_verifier_log feature test_sock @@ -30,10 +31,13 @@ test_sysctl xdping test_cpp *.skel.h +*.lskel.h /no_alu32 /bpf_gcc /tools /runqslower /bench *.ko +*.tmp xdpxceiver +xdp_redirect_multi diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 511259c2c6c5..f405b20c1e6c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -54,6 +54,7 @@ TEST_FILES = xsk_prereqs.sh \ # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ test_xdp_redirect.sh \ + test_xdp_redirect_multi.sh \ test_xdp_meta.sh \ test_xdp_veth.sh \ test_offload.py \ @@ -84,7 +85,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xdpxceiver + xdpxceiver xdp_redirect_multi TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read @@ -312,6 +313,10 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h +LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ + test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c +SKEL_BLACKLIST += $$(LSKELS) + test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o @@ -339,6 +344,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS) TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\ $$(TRUNNER_BPF_SRCS))) +TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS)) TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS)) TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) @@ -380,6 +386,14 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@ +$(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) + $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$< + $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) + $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) + $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@ + $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o)) $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps)) @@ -409,6 +423,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_BPF_OBJS) \ $(TRUNNER_BPF_SKELS) \ + $(TRUNNER_BPF_LSKELS) \ $(TRUNNER_BPF_SKELS_LINKED) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) @@ -516,6 +531,6 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature \ - $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko) + $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h no_alu32 bpf_gcc bpf_testmod.ko) .PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs index ccf260021e83..eb6a4fea8c79 100644 --- a/tools/testing/selftests/bpf/Makefile.docs +++ b/tools/testing/selftests/bpf/Makefile.docs @@ -52,7 +52,8 @@ $(OUTPUT)%.$2: $(OUTPUT)%.rst ifndef RST2MAN_DEP $$(error "rst2man not found, but required to generate man pages") endif - $$(QUIET_GEN)rst2man $$< > $$@ + $$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp + $$(QUIET_GEN)mv $$@.tmp $$@ docs-clean-$1: $$(call QUIET_CLEAN, eBPF_$1-manpage) diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 3353778c30f8..8deec1ca9150 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -202,3 +202,22 @@ generate valid BTF information for weak variables. Please make sure you use Clang that contains the fix. __ https://reviews.llvm.org/D100362 + +Clang relocation changes +======================== + +Clang 13 patch `clang reloc patch`_ made some changes on relocations such +that existing relocation types are broken into more types and +each new type corresponds to only one way to resolve relocation. +See `kernel llvm reloc`_ for more explanation and some examples. +Using clang 13 to compile old libbpf which has static linker support, +there will be a compilation failure:: + + libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o + +Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``. +To fix this issue, user newer libbpf. + +.. Links +.. _clang reloc patch: https://reviews.llvm.org/D102712 +.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 332ed2f7b402..6ea15b93a2f8 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -43,6 +43,7 @@ void setup_libbpf() { int err; + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); err = bump_memlock_rlimit(); diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index a967674098ad..c7ec114eca56 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -65,7 +65,7 @@ static void attach_bpf(struct bpf_program *prog) struct bpf_link *link; link = bpf_program__attach(prog); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index bde6c9d4cbd4..d167bffac679 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup() } link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } @@ -271,7 +271,7 @@ static void ringbuf_custom_setup() } link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program\n"); exit(1); } @@ -430,7 +430,7 @@ static void perfbuf_libbpf_setup() } link = bpf_program__attach(ctx->skel->progs.bench_perfbuf); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program\n"); exit(1); } diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 2a0b6c9885a4..f41a491a8cc0 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -60,7 +60,7 @@ static void attach_bpf(struct bpf_program *prog) struct bpf_link *link; link = bpf_program__attach(prog); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index 21efe7bbf10d..ba0e1efe5a45 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -2,19 +2,19 @@ #include <test_progs.h> -#include "atomics.skel.h" +#include "atomics.lskel.h" static void test_add(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.add); - if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__add__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(add)")) return; - prog_fd = bpf_program__fd(skel->progs.add); + prog_fd = skel->progs.add.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run add", @@ -33,20 +33,20 @@ static void test_add(struct atomics *skel) ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_sub(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.sub); - if (CHECK(IS_ERR(link), "attach(sub)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__sub__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(sub)")) return; - prog_fd = bpf_program__fd(skel->progs.sub); + prog_fd = skel->progs.sub.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run sub", @@ -66,20 +66,20 @@ static void test_sub(struct atomics *skel) ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_and(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.and); - if (CHECK(IS_ERR(link), "attach(and)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__and__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(and)")) return; - prog_fd = bpf_program__fd(skel->progs.and); + prog_fd = skel->progs.and.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run and", @@ -94,20 +94,20 @@ static void test_and(struct atomics *skel) ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_or(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.or); - if (CHECK(IS_ERR(link), "attach(or)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__or__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(or)")) return; - prog_fd = bpf_program__fd(skel->progs.or); + prog_fd = skel->progs.or.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run or", @@ -123,20 +123,20 @@ static void test_or(struct atomics *skel) ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_xor(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.xor); - if (CHECK(IS_ERR(link), "attach(xor)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__xor__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(xor)")) return; - prog_fd = bpf_program__fd(skel->progs.xor); + prog_fd = skel->progs.xor.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run xor", @@ -151,20 +151,20 @@ static void test_xor(struct atomics *skel) ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_cmpxchg(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.cmpxchg); - if (CHECK(IS_ERR(link), "attach(cmpxchg)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__cmpxchg__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)")) return; - prog_fd = bpf_program__fd(skel->progs.cmpxchg); + prog_fd = skel->progs.cmpxchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run add", @@ -180,20 +180,20 @@ static void test_cmpxchg(struct atomics *skel) ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_xchg(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.xchg); - if (CHECK(IS_ERR(link), "attach(xchg)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__xchg__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(xchg)")) return; - prog_fd = bpf_program__fd(skel->progs.xchg); + prog_fd = skel->progs.xchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run add", @@ -207,7 +207,7 @@ static void test_xchg(struct atomics *skel) ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result"); cleanup: - bpf_link__destroy(link); + close(link_fd); } void test_atomics(void) diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 9dc4e3dfbcf3..ec11e20d2b92 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -85,16 +85,14 @@ void test_attach_probe(void) kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, false /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); - if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", - "err %ld\n", PTR_ERR(kprobe_link))) + if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) goto cleanup; skel->links.handle_kprobe = kprobe_link; kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe, true /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); - if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe", - "err %ld\n", PTR_ERR(kretprobe_link))) + if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe")) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; @@ -103,8 +101,7 @@ void test_attach_probe(void) 0 /* self pid */, "/proc/self/exe", uprobe_offset); - if (CHECK(IS_ERR(uprobe_link), "attach_uprobe", - "err %ld\n", PTR_ERR(uprobe_link))) + if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe")) goto cleanup; skel->links.handle_uprobe = uprobe_link; @@ -113,8 +110,7 @@ void test_attach_probe(void) -1 /* any pid */, "/proc/self/exe", uprobe_offset); - if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe", - "err %ld\n", PTR_ERR(uretprobe_link))) + if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe")) goto cleanup; skel->links.handle_uretprobe = uretprobe_link; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 2d3590cfb5e1..1f1aade56504 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -47,7 +47,7 @@ static void do_dummy_read(struct bpf_program *prog) int iter_fd, len; link = bpf_program__attach_iter(prog, NULL); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) return; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -201,7 +201,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) int ret = 0; link = bpf_program__attach_iter(prog, NULL); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) return ret; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -396,7 +396,7 @@ static void test_file_iter(void) return; link = bpf_program__attach_iter(skel1->progs.dump_task, NULL); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; /* unlink this path if it exists. */ @@ -502,7 +502,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) skel->bss->map2_id = map_info.id; link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto free_map2; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -607,14 +607,12 @@ static void test_bpf_hash_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); - if (CHECK(!IS_ERR(link), "attach_iter", - "attach_iter for hashmap2 unexpected succeeded\n")) + if (!ASSERT_ERR_PTR(link, "attach_iter")) goto out; linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); - if (CHECK(!IS_ERR(link), "attach_iter", - "attach_iter for hashmap3 unexpected succeeded\n")) + if (!ASSERT_ERR_PTR(link, "attach_iter")) goto out; /* hashmap1 should be good, update map values here */ @@ -636,7 +634,7 @@ static void test_bpf_hash_map(void) linfo.map.map_fd = map_fd; link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -727,7 +725,7 @@ static void test_bpf_percpu_hash_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -798,7 +796,7 @@ static void test_bpf_array_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -894,7 +892,7 @@ static void test_bpf_percpu_array_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -957,7 +955,7 @@ static void test_bpf_sk_storage_delete(void) opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -1075,7 +1073,7 @@ static void test_bpf_sk_storage_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -1128,7 +1126,7 @@ static void test_rdonly_buf_out_of_bound(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); - if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) + if (!ASSERT_ERR_PTR(link, "attach_iter")) bpf_link__destroy(link); bpf_iter_test_kern5__destroy(skel); @@ -1186,8 +1184,7 @@ static void test_task_vma(void) skel->links.proc_maps = bpf_program__attach_iter( skel->progs.proc_maps, NULL); - if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter", - "attach iterator failed\n")) { + if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) { skel->links.proc_maps = NULL; goto out; } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e25917f04602..efe1e979affb 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -82,7 +82,7 @@ static void *server(void *arg) bytes, total_bytes, nr_sent, errno); done: - if (fd != -1) + if (fd >= 0) close(fd); if (err) { WRITE_ONCE(stop, 1); @@ -191,8 +191,7 @@ static void test_cubic(void) return; link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); - if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", - PTR_ERR(link))) { + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { bpf_cubic__destroy(cubic_skel); return; } @@ -213,8 +212,7 @@ static void test_dctcp(void) return; link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); - if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", - PTR_ERR(link))) { + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { bpf_dctcp__destroy(dctcp_skel); return; } diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 0457ae32b270..857e3f26086f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3811,7 +3811,7 @@ static void do_test_raw(unsigned int test_num) always_log); free(raw_btf); - err = ((btf_fd == -1) != test->btf_load_err); + err = ((btf_fd < 0) != test->btf_load_err); if (CHECK(err, "btf_fd:%d test->btf_load_err:%u", btf_fd, test->btf_load_err) || CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), @@ -3820,7 +3820,7 @@ static void do_test_raw(unsigned int test_num) goto done; } - if (err || btf_fd == -1) + if (err || btf_fd < 0) goto done; create_attr.name = test->map_name; @@ -3834,16 +3834,16 @@ static void do_test_raw(unsigned int test_num) map_fd = bpf_create_map_xattr(&create_attr); - err = ((map_fd == -1) != test->map_create_err); + err = ((map_fd < 0) != test->map_create_err); CHECK(err, "map_fd:%d test->map_create_err:%u", map_fd, test->map_create_err); done: if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); - if (map_fd != -1) + if (map_fd >= 0) close(map_fd); } @@ -3941,7 +3941,7 @@ static int test_big_btf_info(unsigned int test_num) btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, always_log); - if (CHECK(btf_fd == -1, "errno:%d", errno)) { + if (CHECK(btf_fd < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -3987,7 +3987,7 @@ done: free(raw_btf); free(user_btf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); return err; @@ -4029,7 +4029,7 @@ static int test_btf_id(unsigned int test_num) btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, always_log); - if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { + if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4043,7 +4043,7 @@ static int test_btf_id(unsigned int test_num) } btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id); - if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) { + if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4071,7 +4071,7 @@ static int test_btf_id(unsigned int test_num) create_attr.btf_value_type_id = 2; map_fd = bpf_create_map_xattr(&create_attr); - if (CHECK(map_fd == -1, "errno:%d", errno)) { + if (CHECK(map_fd < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4094,7 +4094,7 @@ static int test_btf_id(unsigned int test_num) /* Test BTF ID is removed from the kernel */ btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); - if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { + if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4105,7 +4105,7 @@ static int test_btf_id(unsigned int test_num) close(map_fd); map_fd = -1; btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); - if (CHECK(btf_fd[0] != -1, "BTF lingers")) { + if (CHECK(btf_fd[0] >= 0, "BTF lingers")) { err = -1; goto done; } @@ -4117,11 +4117,11 @@ done: fprintf(stderr, "\n%s", btf_log_buf); free(raw_btf); - if (map_fd != -1) + if (map_fd >= 0) close(map_fd); for (i = 0; i < 2; i++) { free(user_btf[i]); - if (btf_fd[i] != -1) + if (btf_fd[i] >= 0) close(btf_fd[i]); } @@ -4166,7 +4166,7 @@ static void do_test_get_info(unsigned int test_num) btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, always_log); - if (CHECK(btf_fd == -1, "errno:%d", errno)) { + if (CHECK(btf_fd <= 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4212,7 +4212,7 @@ done: free(raw_btf); free(user_btf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); } @@ -4249,8 +4249,9 @@ static void do_test_file(unsigned int test_num) return; btf = btf__parse_elf(test->file, &btf_ext); - if (IS_ERR(btf)) { - if (PTR_ERR(btf) == -ENOENT) { + err = libbpf_get_error(btf); + if (err) { + if (err == -ENOENT) { printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC); test__skip(); return; @@ -4263,7 +4264,8 @@ static void do_test_file(unsigned int test_num) btf_ext__free(btf_ext); obj = bpf_object__open(test->file); - if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) + err = libbpf_get_error(obj); + if (CHECK(err, "obj: %d", err)) return; prog = bpf_program__next(NULL, obj); @@ -4298,7 +4300,7 @@ static void do_test_file(unsigned int test_num) info_len = sizeof(struct bpf_prog_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); err = -1; goto done; @@ -4330,7 +4332,7 @@ static void do_test_file(unsigned int test_num) err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); err = -1; goto done; @@ -4886,7 +4888,7 @@ static void do_test_pprint(int test_num) always_log); free(raw_btf); - if (CHECK(btf_fd == -1, "errno:%d", errno)) { + if (CHECK(btf_fd < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4901,7 +4903,7 @@ static void do_test_pprint(int test_num) create_attr.btf_value_type_id = test->value_type_id; map_fd = bpf_create_map_xattr(&create_attr); - if (CHECK(map_fd == -1, "errno:%d", errno)) { + if (CHECK(map_fd < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4982,7 +4984,7 @@ static void do_test_pprint(int test_num) err = check_line(expected_line, nexpected_line, sizeof(expected_line), line); - if (err == -1) + if (err < 0) goto done; } @@ -4998,7 +5000,7 @@ static void do_test_pprint(int test_num) cpu, cmapv); err = check_line(expected_line, nexpected_line, sizeof(expected_line), line); - if (err == -1) + if (err < 0) goto done; cmapv = cmapv + rounded_value_size; @@ -5036,9 +5038,9 @@ done: fprintf(stderr, "OK"); if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); - if (map_fd != -1) + if (map_fd >= 0) close(map_fd); if (pin_file) fclose(pin_file); @@ -5950,7 +5952,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, /* get necessary lens */ info_len = sizeof(struct bpf_prog_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); return -1; } @@ -5980,7 +5982,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); err = -1; goto done; @@ -6044,7 +6046,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, info_len = sizeof(struct bpf_prog_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "err:%d errno:%d", err, errno)) { + if (CHECK(err < 0, "err:%d errno:%d", err, errno)) { err = -1; goto done; } @@ -6123,7 +6125,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, * Only recheck the info.*line_info* fields. * Other fields are not the concern of this test. */ - if (CHECK(err == -1 || + if (CHECK(err < 0 || info.nr_line_info != cnt || (jited_cnt && !info.jited_line_info) || info.nr_jited_line_info != jited_cnt || @@ -6260,7 +6262,7 @@ static void do_test_info_raw(unsigned int test_num) always_log); free(raw_btf); - if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { + if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) { err = -1; goto done; } @@ -6273,7 +6275,8 @@ static void do_test_info_raw(unsigned int test_num) patched_linfo = patch_name_tbd(test->line_info, test->str_sec, linfo_str_off, test->str_sec_size, &linfo_size); - if (IS_ERR(patched_linfo)) { + err = libbpf_get_error(patched_linfo); + if (err) { fprintf(stderr, "error in creating raw bpf_line_info"); err = -1; goto done; @@ -6297,7 +6300,7 @@ static void do_test_info_raw(unsigned int test_num) } prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); - err = ((prog_fd == -1) != test->expected_prog_load_failure); + err = ((prog_fd < 0) != test->expected_prog_load_failure); if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d", prog_fd, test->expected_prog_load_failure, errno) || CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), @@ -6306,7 +6309,7 @@ static void do_test_info_raw(unsigned int test_num) goto done; } - if (prog_fd == -1) + if (prog_fd < 0) goto done; err = test_get_finfo(test, prog_fd); @@ -6323,12 +6326,12 @@ done: if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); - if (prog_fd != -1) + if (prog_fd >= 0) close(prog_fd); - if (!IS_ERR(patched_linfo)) + if (!libbpf_get_error(patched_linfo)) free(patched_linfo); } @@ -6839,9 +6842,9 @@ static void do_test_dedup(unsigned int test_num) return; test_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + err = libbpf_get_error(test_btf); free(raw_btf); - if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld", - PTR_ERR(test_btf))) { + if (CHECK(err, "invalid test_btf errno:%d", err)) { err = -1; goto done; } @@ -6853,9 +6856,9 @@ static void do_test_dedup(unsigned int test_num) if (!raw_btf) return; expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + err = libbpf_get_error(expect_btf); free(raw_btf); - if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld", - PTR_ERR(expect_btf))) { + if (CHECK(err, "invalid expect_btf errno:%d", err)) { err = -1; goto done; } @@ -6966,10 +6969,8 @@ static void do_test_dedup(unsigned int test_num) } done: - if (!IS_ERR(test_btf)) - btf__free(test_btf); - if (!IS_ERR(expect_btf)) - btf__free(expect_btf); + btf__free(test_btf); + btf__free(expect_btf); } void test_btf(void) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 5e129dc2073c..1b90e684ff13 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -32,8 +32,9 @@ static int btf_dump_all_types(const struct btf *btf, int err = 0, id; d = btf_dump__new(btf, NULL, opts, btf_dump_printf); - if (IS_ERR(d)) - return PTR_ERR(d); + err = libbpf_get_error(d); + if (err) + return err; for (id = 1; id <= type_cnt; id++) { err = btf_dump__dump_type(d, id); @@ -56,8 +57,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t) snprintf(test_file, sizeof(test_file), "%s.o", t->file); btf = btf__parse_elf(test_file, NULL); - if (CHECK(IS_ERR(btf), "btf_parse_elf", - "failed to load test BTF: %ld\n", PTR_ERR(btf))) { + if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) { err = -PTR_ERR(btf); btf = NULL; goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index f36da15b134f..022c7d89d6f4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -4,8 +4,6 @@ #include <bpf/btf.h> #include "btf_helpers.h" -static int duration = 0; - void test_btf_write() { const struct btf_var_secinfo *vi; const struct btf_type *t; @@ -16,7 +14,7 @@ void test_btf_write() { int id, err, str_off; btf = btf__new_empty(); - if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf))) + if (!ASSERT_OK_PTR(btf, "new_empty")) return; str_off = btf__find_str(btf, "int"); diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c index 643dfa35419c..876be0ecb654 100644 --- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c @@ -102,8 +102,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd) */ parent_link = bpf_program__attach_cgroup(obj->progs.egress, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_link), "parent-cg-attach", - "err %ld", PTR_ERR(parent_link))) + if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "first-connect-send", "errno %d", errno)) @@ -126,8 +125,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd) */ child_link = bpf_program__attach_cgroup(obj->progs.egress, child_cgroup_fd); - if (CHECK(IS_ERR(child_link), "child-cg-attach", - "err %ld", PTR_ERR(child_link))) + if (!ASSERT_OK_PTR(child_link, "child-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "second-connect-send", "errno %d", errno)) @@ -147,10 +145,8 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd) goto close_bpf_object; close_bpf_object: - if (!IS_ERR(parent_link)) - bpf_link__destroy(parent_link); - if (!IS_ERR(child_link)) - bpf_link__destroy(child_link); + bpf_link__destroy(parent_link); + bpf_link__destroy(child_link); cg_storage_multi_egress_only__destroy(obj); } @@ -176,18 +172,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd) */ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach", - "err %ld", PTR_ERR(parent_egress1_link))) + if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach")) goto close_bpf_object; parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach", - "err %ld", PTR_ERR(parent_egress2_link))) + if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach")) goto close_bpf_object; parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach", - "err %ld", PTR_ERR(parent_ingress_link))) + if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "first-connect-send", "errno %d", errno)) @@ -221,18 +214,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd) */ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, child_cgroup_fd); - if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach", - "err %ld", PTR_ERR(child_egress1_link))) + if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach")) goto close_bpf_object; child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, child_cgroup_fd); - if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach", - "err %ld", PTR_ERR(child_egress2_link))) + if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach")) goto close_bpf_object; child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, child_cgroup_fd); - if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach", - "err %ld", PTR_ERR(child_ingress_link))) + if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "second-connect-send", "errno %d", errno)) @@ -264,18 +254,12 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd) goto close_bpf_object; close_bpf_object: - if (!IS_ERR(parent_egress1_link)) - bpf_link__destroy(parent_egress1_link); - if (!IS_ERR(parent_egress2_link)) - bpf_link__destroy(parent_egress2_link); - if (!IS_ERR(parent_ingress_link)) - bpf_link__destroy(parent_ingress_link); - if (!IS_ERR(child_egress1_link)) - bpf_link__destroy(child_egress1_link); - if (!IS_ERR(child_egress2_link)) - bpf_link__destroy(child_egress2_link); - if (!IS_ERR(child_ingress_link)) - bpf_link__destroy(child_ingress_link); + bpf_link__destroy(parent_egress1_link); + bpf_link__destroy(parent_egress2_link); + bpf_link__destroy(parent_ingress_link); + bpf_link__destroy(child_egress1_link); + bpf_link__destroy(child_egress2_link); + bpf_link__destroy(child_ingress_link); cg_storage_multi_isolated__destroy(obj); } @@ -301,18 +285,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) */ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach", - "err %ld", PTR_ERR(parent_egress1_link))) + if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach")) goto close_bpf_object; parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach", - "err %ld", PTR_ERR(parent_egress2_link))) + if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach")) goto close_bpf_object; parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach", - "err %ld", PTR_ERR(parent_ingress_link))) + if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "first-connect-send", "errno %d", errno)) @@ -338,18 +319,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) */ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, child_cgroup_fd); - if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach", - "err %ld", PTR_ERR(child_egress1_link))) + if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach")) goto close_bpf_object; child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, child_cgroup_fd); - if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach", - "err %ld", PTR_ERR(child_egress2_link))) + if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach")) goto close_bpf_object; child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, child_cgroup_fd); - if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach", - "err %ld", PTR_ERR(child_ingress_link))) + if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "second-connect-send", "errno %d", errno)) @@ -375,18 +353,12 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) goto close_bpf_object; close_bpf_object: - if (!IS_ERR(parent_egress1_link)) - bpf_link__destroy(parent_egress1_link); - if (!IS_ERR(parent_egress2_link)) - bpf_link__destroy(parent_egress2_link); - if (!IS_ERR(parent_ingress_link)) - bpf_link__destroy(parent_ingress_link); - if (!IS_ERR(child_egress1_link)) - bpf_link__destroy(child_egress1_link); - if (!IS_ERR(child_egress2_link)) - bpf_link__destroy(child_egress2_link); - if (!IS_ERR(child_ingress_link)) - bpf_link__destroy(child_ingress_link); + bpf_link__destroy(parent_egress1_link); + bpf_link__destroy(parent_egress2_link); + bpf_link__destroy(parent_ingress_link); + bpf_link__destroy(child_egress1_link); + bpf_link__destroy(child_egress2_link); + bpf_link__destroy(child_ingress_link); cg_storage_multi_shared__destroy(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 0a1fc9816cef..20bb8831dda6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void) prog_cnt = 2; CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, &attach_flags, - prog_ids, &prog_cnt) != -1); + prog_ids, &prog_cnt) >= 0); CHECK_FAIL(errno != ENOSPC); CHECK_FAIL(prog_cnt != 4); /* check that prog_ids are returned even when buffer is too small */ diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 736796e56ed1..9091524131d6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -65,8 +65,7 @@ void test_cgroup_link(void) for (i = 0; i < cg_nr; i++) { links[i] = bpf_program__attach_cgroup(skel->progs.egress, cgs[i].fd); - if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n", - i, PTR_ERR(links[i]))) + if (!ASSERT_OK_PTR(links[i], "cg_attach")) goto cleanup; } @@ -121,8 +120,7 @@ void test_cgroup_link(void) links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress, cgs[last_cg].fd); - if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n", - PTR_ERR(links[last_cg]))) + if (!ASSERT_OK_PTR(links[last_cg], "cg_attach")) goto cleanup; ping_and_check(cg_nr + 1, 0); @@ -147,7 +145,7 @@ void test_cgroup_link(void) /* attempt to mix in with multi-attach bpf_link */ tmp_link = bpf_program__attach_cgroup(skel->progs.egress, cgs[last_cg].fd); - if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) { + if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) { bpf_link__destroy(tmp_link); goto cleanup; } @@ -165,8 +163,7 @@ void test_cgroup_link(void) /* attach back link-based one */ links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress, cgs[last_cg].fd); - if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n", - PTR_ERR(links[last_cg]))) + if (!ASSERT_OK_PTR(links[last_cg], "cg_attach")) goto cleanup; ping_and_check(cg_nr, 0); @@ -249,8 +246,7 @@ cleanup: BPF_CGROUP_INET_EGRESS); for (i = 0; i < cg_nr; i++) { - if (!IS_ERR(links[i])) - bpf_link__destroy(links[i]); + bpf_link__destroy(links[i]); } test_cgroup_link__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c index 464edc1c1708..b9dc4ec655b5 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c @@ -60,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk) goto cleanup; link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd); - if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "cgroup_attach")) goto cleanup; run_lookup_test(&skel->bss->g_serv_port, out_sk); diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index b62a39315336..012068f33a0a 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -53,7 +53,7 @@ static void test_check_mtu_xdp_attach(void) prog = skel->progs.xdp_use_helper_basic; link = bpf_program__attach_xdp(prog, IFINDEX_LO); - if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_attach")) goto out; skel->links.xdp_use_helper_basic = link; diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 607710826dca..d02e064c535f 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -369,8 +369,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test) const char *name; int i; - if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) || - CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) { + if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) { btf__free(local_btf); btf__free(targ_btf); return -EINVAL; @@ -848,8 +847,7 @@ void test_core_reloc(void) } obj = bpf_object__open_file(test_case->bpf_obj_file, NULL); - if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", - test_case->bpf_obj_file, PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open")) continue; probe_name = "raw_tracepoint/sys_enter"; @@ -899,8 +897,7 @@ void test_core_reloc(void) data->my_pid_tgid = my_pid_tgid; link = bpf_program__attach_raw_tracepoint(prog, tp_name); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto cleanup; /* trigger test run */ @@ -941,10 +938,8 @@ cleanup: CHECK_FAIL(munmap(mmap_data, mmap_sz)); mmap_data = NULL; } - if (!IS_ERR_OR_NULL(link)) { - bpf_link__destroy(link); - link = NULL; - } + bpf_link__destroy(link); + link = NULL; bpf_object__close(obj); } } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 109d0345a2be..91154c2ba256 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "fentry_test.skel.h" -#include "fexit_test.skel.h" +#include "fentry_test.lskel.h" +#include "fexit_test.lskel.h" void test_fentry_fexit(void) { @@ -26,7 +26,7 @@ void test_fentry_fexit(void) if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; - prog_fd = bpf_program__fd(fexit_skel->progs.test1); + prog_fd = fexit_skel->progs.test1.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 7cb111b11995..174c89e7456e 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -1,13 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "fentry_test.skel.h" +#include "fentry_test.lskel.h" static int fentry_test(struct fentry_test *fentry_skel) { int err, prog_fd, i; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; __u64 *result; err = fentry_test__attach(fentry_skel); @@ -15,11 +15,11 @@ static int fentry_test(struct fentry_test *fentry_skel) return err; /* Check that already linked program can't be attached again. */ - link = bpf_program__attach(fentry_skel->progs.test1); - if (!ASSERT_ERR_PTR(link, "fentry_attach_link")) + link_fd = fentry_test__test1__attach(fentry_skel); + if (!ASSERT_LT(link_fd, 0, "fentry_attach_link")) return -1; - prog_fd = bpf_program__fd(fentry_skel->progs.test1); + prog_fd = fentry_skel->progs.test1.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); ASSERT_OK(err, "test_run"); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 63990842d20f..73b4c76e6b86 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -146,10 +146,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, close_prog: for (i = 0; i < prog_cnt; i++) - if (!IS_ERR_OR_NULL(link[i])) - bpf_link__destroy(link[i]); - if (!IS_ERR_OR_NULL(obj)) - bpf_object__close(obj); + bpf_link__destroy(link[i]); + bpf_object__close(obj); bpf_object__close(tgt_obj); free(link); free(prog); @@ -231,7 +229,7 @@ static int test_second_attach(struct bpf_object *obj) return err; link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name); - if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd)) + if (!ASSERT_OK_PTR(link, "second_link")) goto out; err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), @@ -283,9 +281,7 @@ static void test_fmod_ret_freplace(void) opts.attach_prog_fd = pkt_fd; freplace_obj = bpf_object__open_file(freplace_name, &opts); - if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open", - "failed to open %s: %ld\n", freplace_name, - PTR_ERR(freplace_obj))) + if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open")) goto out; err = bpf_object__load(freplace_obj); @@ -294,14 +290,12 @@ static void test_fmod_ret_freplace(void) prog = bpf_program__next(NULL, freplace_obj); freplace_link = bpf_program__attach_trace(prog); - if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n")) + if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace")) goto out; opts.attach_prog_fd = bpf_program__fd(prog); fmod_obj = bpf_object__open_file(fmod_ret_name, &opts); - if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open", - "failed to open %s: %ld\n", fmod_ret_name, - PTR_ERR(fmod_obj))) + if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open")) goto out; err = bpf_object__load(fmod_obj); @@ -350,9 +344,7 @@ static void test_obj_load_failure_common(const char *obj_file, ); obj = bpf_object__open_file(obj_file, &opts); - if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", - "failed to open %s: %ld\n", obj_file, - PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; /* It should fail to load the program */ @@ -361,8 +353,7 @@ static void test_obj_load_failure_common(const char *obj_file, goto close_prog; close_prog: - if (!IS_ERR_OR_NULL(obj)) - bpf_object__close(obj); + bpf_object__close(obj); bpf_object__close(pkt_obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c index ccc7e8a34ab6..4e7f4b42ea29 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -6,7 +6,7 @@ #include <time.h> #include <sys/mman.h> #include <sys/syscall.h> -#include "fexit_sleep.skel.h" +#include "fexit_sleep.lskel.h" static int do_sleep(void *skel) { @@ -58,8 +58,8 @@ void test_fexit_sleep(void) * waiting for percpu_ref_kill to confirm). The other one * will be freed quickly. */ - close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry)); - close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit)); + close(fexit_skel->progs.nanosleep_fentry.prog_fd); + close(fexit_skel->progs.nanosleep_fexit.prog_fd); fexit_sleep__detach(fexit_skel); /* kill the thread to unwind sys_nanosleep stack through the trampoline */ diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index 6792e41f7f69..af3dba726701 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -1,13 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "fexit_test.skel.h" +#include "fexit_test.lskel.h" static int fexit_test(struct fexit_test *fexit_skel) { int err, prog_fd, i; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; __u64 *result; err = fexit_test__attach(fexit_skel); @@ -15,11 +15,11 @@ static int fexit_test(struct fexit_test *fexit_skel) return err; /* Check that already linked program can't be attached again. */ - link = bpf_program__attach(fexit_skel->progs.test1); - if (!ASSERT_ERR_PTR(link, "fexit_attach_link")) + link_fd = fexit_test__test1__attach(fexit_skel); + if (!ASSERT_LT(link_fd, 0, "fexit_attach_link")) return -1; - prog_fd = bpf_program__fd(fexit_skel->progs.test1); + prog_fd = fexit_skel->progs.test1.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); ASSERT_OK(err, "test_run"); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index cd6dc80edf18..225714f71ac6 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -541,7 +541,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) return; link = bpf_program__attach_netns(skel->progs._dissect, net_fd); - if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_netns")) goto out_close; run_tests_skb_less(tap_fd, skel->maps.last_dissection); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 172c586b6996..3931ede5c534 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2) /* Expect failure creating link when another link exists */ errno = 0; link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts); - if (CHECK_FAIL(link2 != -1 || errno != E2BIG)) + if (CHECK_FAIL(link2 >= 0 || errno != E2BIG)) perror("bpf_prog_attach(prog2) expected E2BIG"); - if (link2 != -1) + if (link2 >= 0) close(link2); CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); @@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2) /* Expect failure creating link when prog attached */ errno = 0; link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts); - if (CHECK_FAIL(link != -1 || errno != EEXIST)) + if (CHECK_FAIL(link >= 0 || errno != EEXIST)) perror("bpf_link_create(prog2) expected EEXIST"); - if (link != -1) + if (link >= 0) close(link); CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); @@ -623,7 +623,7 @@ static void run_tests(int netns) } out_close: for (i = 0; i < ARRAY_SIZE(progs); i++) { - if (progs[i] != -1) + if (progs[i] >= 0) CHECK_FAIL(close(progs[i])); } } diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c index 925722217edf..522237aa4470 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c @@ -121,12 +121,12 @@ void test_get_stack_raw_tp(void) goto close_prog; link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; pb_opts.sample_cb = get_stack_print_output; pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts); - if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto close_prog; /* trigger some syscall action */ @@ -141,9 +141,7 @@ void test_get_stack_raw_tp(void) } close_prog: - if (!IS_ERR_OR_NULL(link)) - bpf_link__destroy(link); - if (!IS_ERR_OR_NULL(pb)) - perf_buffer__free(pb); + bpf_link__destroy(link); + perf_buffer__free(pb); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c index d884b2ed5bc5..8d5a6023a1bb 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c @@ -48,8 +48,7 @@ void test_get_stackid_cannot_attach(void) skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain", - "should have failed\n"); + ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain"); close(pmu_fd); /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */ @@ -65,8 +64,7 @@ void test_get_stackid_cannot_attach(void) skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain", - "err: %ld\n", PTR_ERR(skel->links.oncpu)); + ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain"); close(pmu_fd); /* add exclude_callchain_kernel, attach should fail */ @@ -82,8 +80,7 @@ void test_get_stackid_cannot_attach(void) skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel", - "should have failed\n"); + ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel"); close(pmu_fd); cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c index 428d488830c6..4747ab18f97f 100644 --- a/tools/testing/selftests/bpf/prog_tests/hashmap.c +++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c @@ -48,8 +48,7 @@ static void test_hashmap_generic(void) struct hashmap *map; map = hashmap__new(hash_fn, equal_fn, NULL); - if (CHECK(IS_ERR(map), "hashmap__new", - "failed to create map: %ld\n", PTR_ERR(map))) + if (!ASSERT_OK_PTR(map, "hashmap__new")) return; for (i = 0; i < ELEM_CNT; i++) { @@ -267,8 +266,7 @@ static void test_hashmap_multimap(void) /* force collisions */ map = hashmap__new(collision_hash_fn, equal_fn, NULL); - if (CHECK(IS_ERR(map), "hashmap__new", - "failed to create map: %ld\n", PTR_ERR(map))) + if (!ASSERT_OK_PTR(map, "hashmap__new")) return; /* set up multimap: @@ -339,8 +337,7 @@ static void test_hashmap_empty() /* force collisions */ map = hashmap__new(hash_fn, equal_fn, NULL); - if (CHECK(IS_ERR(map), "hashmap__new", - "failed to create map: %ld\n", PTR_ERR(map))) + if (!ASSERT_OK_PTR(map, "hashmap__new")) goto cleanup; if (CHECK(hashmap__size(map) != 0, "hashmap__size", diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index d65107919998..ddfb6bf97152 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -97,15 +97,13 @@ void test_kfree_skb(void) goto close_prog; link = bpf_program__attach_raw_tracepoint(prog, NULL); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; link_fentry = bpf_program__attach_trace(fentry); - if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n", - PTR_ERR(link_fentry))) + if (!ASSERT_OK_PTR(link_fentry, "attach fentry")) goto close_prog; link_fexit = bpf_program__attach_trace(fexit); - if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n", - PTR_ERR(link_fexit))) + if (!ASSERT_OK_PTR(link_fexit, "attach fexit")) goto close_prog; perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map"); @@ -116,7 +114,7 @@ void test_kfree_skb(void) pb_opts.sample_cb = on_sample; pb_opts.ctx = &passed; pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts); - if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto close_prog; memcpy(skb.cb, &cb, sizeof(cb)); @@ -144,12 +142,9 @@ void test_kfree_skb(void) CHECK_FAIL(!test_ok[0] || !test_ok[1]); close_prog: perf_buffer__free(pb); - if (!IS_ERR_OR_NULL(link)) - bpf_link__destroy(link); - if (!IS_ERR_OR_NULL(link_fentry)) - bpf_link__destroy(link_fentry); - if (!IS_ERR_OR_NULL(link_fexit)) - bpf_link__destroy(link_fexit); + bpf_link__destroy(link); + bpf_link__destroy(link_fentry); + bpf_link__destroy(link_fexit); bpf_object__close(obj); bpf_object__close(obj2); } diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 7fc0951ee75f..30a7b9b837bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -2,7 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <test_progs.h> #include <network_helpers.h> -#include "kfunc_call_test.skel.h" +#include "kfunc_call_test.lskel.h" #include "kfunc_call_test_subprog.skel.h" static void test_main(void) @@ -14,13 +14,13 @@ static void test_main(void) if (!ASSERT_OK_PTR(skel, "skel")) return; - prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1); + prog_fd = skel->progs.kfunc_call_test1.prog_fd; err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); ASSERT_OK(err, "bpf_prog_test_run(test1)"); ASSERT_EQ(retval, 12, "test1-retval"); - prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2); + prog_fd = skel->progs.kfunc_call_test2.prog_fd; err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); ASSERT_OK(err, "bpf_prog_test_run(test2)"); diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index b58b775d19f3..67bebd324147 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -87,8 +87,7 @@ void test_ksyms_btf(void) struct btf *btf; btf = libbpf_find_kernel_btf(); - if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", - PTR_ERR(btf))) + if (!ASSERT_OK_PTR(btf, "btf_exists")) return; percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c index 4c232b456479..2cd5cded543f 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -4,7 +4,7 @@ #include <test_progs.h> #include <bpf/libbpf.h> #include <bpf/btf.h> -#include "test_ksyms_module.skel.h" +#include "test_ksyms_module.lskel.h" static int duration; diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c index a743288cf384..6fc97c45f71e 100644 --- a/tools/testing/selftests/bpf/prog_tests/link_pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c @@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog, int err, i; link = bpf_program__attach(prog); - if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_attach")) goto cleanup; bss->in = 1; @@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog, /* re-open link from BPFFS */ link = bpf_link__open(link_pin_path); - if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_open")) goto cleanup; CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2", @@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog, CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i); cleanup: - if (!IS_ERR(link)) - bpf_link__destroy(link); + bpf_link__destroy(link); } void test_link_pinning(void) diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c new file mode 100644 index 000000000000..beebfa9730e1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <test_progs.h> +#include "test_lookup_and_delete.skel.h" + +#define START_VALUE 1234 +#define NEW_VALUE 4321 +#define MAX_ENTRIES 2 + +static int duration; +static int nr_cpus; + +static int fill_values(int map_fd) +{ + __u64 key, value = START_VALUE; + int err; + + for (key = 1; key < MAX_ENTRIES + 1; key++) { + err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static int fill_values_percpu(int map_fd) +{ + __u64 key, value[nr_cpus]; + int i, err; + + for (i = 0; i < nr_cpus; i++) + value[i] = START_VALUE; + + for (key = 1; key < MAX_ENTRIES + 1; key++) { + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type, + int *map_fd) +{ + struct test_lookup_and_delete *skel; + int err; + + skel = test_lookup_and_delete__open(); + if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open")) + return NULL; + + err = bpf_map__set_type(skel->maps.hash_map, map_type); + if (!ASSERT_OK(err, "bpf_map__set_type")) + goto cleanup; + + err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto cleanup; + + err = test_lookup_and_delete__load(skel); + if (!ASSERT_OK(err, "test_lookup_and_delete__load")) + goto cleanup; + + *map_fd = bpf_map__fd(skel->maps.hash_map); + if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd")) + goto cleanup; + + return skel; + +cleanup: + test_lookup_and_delete__destroy(skel); + return NULL; +} + +/* Triggers BPF program that updates map with given key and value */ +static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key, + __u64 value) +{ + int err; + + skel->bss->set_pid = getpid(); + skel->bss->set_key = key; + skel->bss->set_value = value; + + err = test_lookup_and_delete__attach(skel); + if (!ASSERT_OK(err, "test_lookup_and_delete__attach")) + return -1; + + syscall(__NR_getpgid); + + test_lookup_and_delete__detach(skel); + + return 0; +} + +static void test_lookup_and_delete_hash(void) +{ + struct test_lookup_and_delete *skel; + __u64 key, value; + int map_fd, err; + + /* Setup program and fill the map. */ + skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd); + if (!ASSERT_OK_PTR(skel, "setup_prog")) + return; + + err = fill_values(map_fd); + if (!ASSERT_OK(err, "fill_values")) + goto cleanup; + + /* Lookup and delete element. */ + key = 1; + err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) + goto cleanup; + + /* Fetched value should match the initially set value. */ + if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem", + "unexpected value=%lld\n", value)) + goto cleanup; + + /* Check that the entry is non existent. */ + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + +cleanup: + test_lookup_and_delete__destroy(skel); +} + +static void test_lookup_and_delete_percpu_hash(void) +{ + struct test_lookup_and_delete *skel; + __u64 key, val, value[nr_cpus]; + int map_fd, err, i; + + /* Setup program and fill the map. */ + skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd); + if (!ASSERT_OK_PTR(skel, "setup_prog")) + return; + + err = fill_values_percpu(map_fd); + if (!ASSERT_OK(err, "fill_values_percpu")) + goto cleanup; + + /* Lookup and delete element. */ + key = 1; + err = bpf_map_lookup_and_delete_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) + goto cleanup; + + for (i = 0; i < nr_cpus; i++) { + val = value[i]; + + /* Fetched value should match the initially set value. */ + if (CHECK(val != START_VALUE, "map value", + "unexpected for cpu %d: %lld\n", i, val)) + goto cleanup; + } + + /* Check that the entry is non existent. */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + +cleanup: + test_lookup_and_delete__destroy(skel); +} + +static void test_lookup_and_delete_lru_hash(void) +{ + struct test_lookup_and_delete *skel; + __u64 key, value; + int map_fd, err; + + /* Setup program and fill the LRU map. */ + skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd); + if (!ASSERT_OK_PTR(skel, "setup_prog")) + return; + + err = fill_values(map_fd); + if (!ASSERT_OK(err, "fill_values")) + goto cleanup; + + /* Insert new element at key=3, should reuse LRU element. */ + key = 3; + err = trigger_tp(skel, key, NEW_VALUE); + if (!ASSERT_OK(err, "trigger_tp")) + goto cleanup; + + /* Lookup and delete element 3. */ + err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) + goto cleanup; + + /* Value should match the new value. */ + if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem", + "unexpected value=%lld\n", value)) + goto cleanup; + + /* Check that entries 3 and 1 are non existent. */ + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + + key = 1; + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + +cleanup: + test_lookup_and_delete__destroy(skel); +} + +static void test_lookup_and_delete_lru_percpu_hash(void) +{ + struct test_lookup_and_delete *skel; + __u64 key, val, value[nr_cpus]; + int map_fd, err, i, cpucnt = 0; + + /* Setup program and fill the LRU map. */ + skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd); + if (!ASSERT_OK_PTR(skel, "setup_prog")) + return; + + err = fill_values_percpu(map_fd); + if (!ASSERT_OK(err, "fill_values_percpu")) + goto cleanup; + + /* Insert new element at key=3, should reuse LRU element 1. */ + key = 3; + err = trigger_tp(skel, key, NEW_VALUE); + if (!ASSERT_OK(err, "trigger_tp")) + goto cleanup; + + /* Clean value. */ + for (i = 0; i < nr_cpus; i++) + value[i] = 0; + + /* Lookup and delete element 3. */ + err = bpf_map_lookup_and_delete_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) { + goto cleanup; + } + + /* Check if only one CPU has set the value. */ + for (i = 0; i < nr_cpus; i++) { + val = value[i]; + if (val) { + if (CHECK(val != NEW_VALUE, "map value", + "unexpected for cpu %d: %lld\n", i, val)) + goto cleanup; + cpucnt++; + } + } + if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n", + cpucnt)) + goto cleanup; + + /* Check that entries 3 and 1 are non existent. */ + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + + key = 1; + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + +cleanup: + test_lookup_and_delete__destroy(skel); +} + +void test_lookup_and_delete(void) +{ + nr_cpus = bpf_num_possible_cpus(); + + if (test__start_subtest("lookup_and_delete")) + test_lookup_and_delete_hash(); + if (test__start_subtest("lookup_and_delete_percpu")) + test_lookup_and_delete_percpu_hash(); + if (test__start_subtest("lookup_and_delete_lru")) + test_lookup_and_delete_lru_hash(); + if (test__start_subtest("lookup_and_delete_lru_percpu")) + test_lookup_and_delete_lru_percpu_hash(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c new file mode 100644 index 000000000000..59adb4715394 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c @@ -0,0 +1,559 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check if we can migrate child sockets. + * + * 1. call listen() for 4 server sockets. + * 2. call connect() for 25 client sockets. + * 3. call listen() for 1 server socket. (migration target) + * 4. update a map to migrate all child sockets + * to the last server socket (migrate_map[cookie] = 4) + * 5. call shutdown() for first 4 server sockets + * and migrate the requests in the accept queue + * to the last server socket. + * 6. call listen() for the second server socket. + * 7. call shutdown() for the last server + * and migrate the requests in the accept queue + * to the second server socket. + * 8. call listen() for the last server. + * 9. call shutdown() for the second server + * and migrate the requests in the accept queue + * to the last server socket. + * 10. call accept() for the last server socket. + * + * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + */ + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "test_progs.h" +#include "test_migrate_reuseport.skel.h" +#include "network_helpers.h" + +#ifndef TCP_FASTOPEN_CONNECT +#define TCP_FASTOPEN_CONNECT 30 +#endif + +#define IFINDEX_LO 1 + +#define NR_SERVERS 5 +#define NR_CLIENTS (NR_SERVERS * 5) +#define MIGRATED_TO (NR_SERVERS - 1) + +/* fastopenq->max_qlen and sk->sk_max_ack_backlog */ +#define QLEN (NR_CLIENTS * 5) + +#define MSG "Hello World\0" +#define MSGLEN 12 + +static struct migrate_reuseport_test_case { + const char *name; + __s64 servers[NR_SERVERS]; + __s64 clients[NR_CLIENTS]; + struct sockaddr_storage addr; + socklen_t addrlen; + int family; + int state; + bool drop_ack; + bool expire_synack_timer; + bool fastopen; + struct bpf_link *link; +} test_cases[] = { + { + .name = "IPv4 TCP_ESTABLISHED inet_csk_listen_stop", + .family = AF_INET, + .state = BPF_TCP_ESTABLISHED, + .drop_ack = false, + .expire_synack_timer = false, + .fastopen = false, + }, + { + .name = "IPv4 TCP_SYN_RECV inet_csk_listen_stop", + .family = AF_INET, + .state = BPF_TCP_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = false, + .fastopen = true, + }, + { + .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler", + .family = AF_INET, + .state = BPF_TCP_NEW_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = true, + .fastopen = false, + }, + { + .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance", + .family = AF_INET, + .state = BPF_TCP_NEW_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = false, + .fastopen = false, + }, + { + .name = "IPv6 TCP_ESTABLISHED inet_csk_listen_stop", + .family = AF_INET6, + .state = BPF_TCP_ESTABLISHED, + .drop_ack = false, + .expire_synack_timer = false, + .fastopen = false, + }, + { + .name = "IPv6 TCP_SYN_RECV inet_csk_listen_stop", + .family = AF_INET6, + .state = BPF_TCP_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = false, + .fastopen = true, + }, + { + .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler", + .family = AF_INET6, + .state = BPF_TCP_NEW_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = true, + .fastopen = false, + }, + { + .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance", + .family = AF_INET6, + .state = BPF_TCP_NEW_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = false, + .fastopen = false, + } +}; + +static void init_fds(__s64 fds[], int len) +{ + int i; + + for (i = 0; i < len; i++) + fds[i] = -1; +} + +static void close_fds(__s64 fds[], int len) +{ + int i; + + for (i = 0; i < len; i++) { + if (fds[i] != -1) { + close(fds[i]); + fds[i] = -1; + } + } +} + +static int setup_fastopen(char *buf, int size, int *saved_len, bool restore) +{ + int err = 0, fd, len; + + fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR); + if (!ASSERT_NEQ(fd, -1, "open")) + return -1; + + if (restore) { + len = write(fd, buf, *saved_len); + if (!ASSERT_EQ(len, *saved_len, "write - restore")) + err = -1; + } else { + *saved_len = read(fd, buf, size); + if (!ASSERT_GE(*saved_len, 1, "read")) { + err = -1; + goto close; + } + + err = lseek(fd, 0, SEEK_SET); + if (!ASSERT_OK(err, "lseek")) + goto close; + + /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE | + * TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD) + */ + len = write(fd, "519", 3); + if (!ASSERT_EQ(len, 3, "write - setup")) + err = -1; + } + +close: + close(fd); + + return err; +} + +static int drop_ack(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + if (test_case->family == AF_INET) + skel->bss->server_port = ((struct sockaddr_in *) + &test_case->addr)->sin_port; + else + skel->bss->server_port = ((struct sockaddr_in6 *) + &test_case->addr)->sin6_port; + + test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack, + IFINDEX_LO); + if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp")) + return -1; + + return 0; +} + +static int pass_ack(struct migrate_reuseport_test_case *test_case) +{ + int err; + + err = bpf_link__detach(test_case->link); + if (!ASSERT_OK(err, "bpf_link__detach")) + return -1; + + test_case->link = NULL; + + return 0; +} + +static int start_servers(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + int i, err, prog_fd, reuseport = 1, qlen = QLEN; + + prog_fd = bpf_program__fd(skel->progs.migrate_reuseport); + + make_sockaddr(test_case->family, + test_case->family == AF_INET ? "127.0.0.1" : "::1", 0, + &test_case->addr, &test_case->addrlen); + + for (i = 0; i < NR_SERVERS; i++) { + test_case->servers[i] = socket(test_case->family, SOCK_STREAM, + IPPROTO_TCP); + if (!ASSERT_NEQ(test_case->servers[i], -1, "socket")) + return -1; + + err = setsockopt(test_case->servers[i], SOL_SOCKET, + SO_REUSEPORT, &reuseport, sizeof(reuseport)); + if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT")) + return -1; + + err = bind(test_case->servers[i], + (struct sockaddr *)&test_case->addr, + test_case->addrlen); + if (!ASSERT_OK(err, "bind")) + return -1; + + if (i == 0) { + err = setsockopt(test_case->servers[i], SOL_SOCKET, + SO_ATTACH_REUSEPORT_EBPF, + &prog_fd, sizeof(prog_fd)); + if (!ASSERT_OK(err, + "setsockopt - SO_ATTACH_REUSEPORT_EBPF")) + return -1; + + err = getsockname(test_case->servers[i], + (struct sockaddr *)&test_case->addr, + &test_case->addrlen); + if (!ASSERT_OK(err, "getsockname")) + return -1; + } + + if (test_case->fastopen) { + err = setsockopt(test_case->servers[i], + SOL_TCP, TCP_FASTOPEN, + &qlen, sizeof(qlen)); + if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN")) + return -1; + } + + /* All requests will be tied to the first four listeners */ + if (i != MIGRATED_TO) { + err = listen(test_case->servers[i], qlen); + if (!ASSERT_OK(err, "listen")) + return -1; + } + } + + return 0; +} + +static int start_clients(struct migrate_reuseport_test_case *test_case) +{ + char buf[MSGLEN] = MSG; + int i, err; + + for (i = 0; i < NR_CLIENTS; i++) { + test_case->clients[i] = socket(test_case->family, SOCK_STREAM, + IPPROTO_TCP); + if (!ASSERT_NEQ(test_case->clients[i], -1, "socket")) + return -1; + + /* The attached XDP program drops only the final ACK, so + * clients will transition to TCP_ESTABLISHED immediately. + */ + err = settimeo(test_case->clients[i], 100); + if (!ASSERT_OK(err, "settimeo")) + return -1; + + if (test_case->fastopen) { + int fastopen = 1; + + err = setsockopt(test_case->clients[i], IPPROTO_TCP, + TCP_FASTOPEN_CONNECT, &fastopen, + sizeof(fastopen)); + if (!ASSERT_OK(err, + "setsockopt - TCP_FASTOPEN_CONNECT")) + return -1; + } + + err = connect(test_case->clients[i], + (struct sockaddr *)&test_case->addr, + test_case->addrlen); + if (!ASSERT_OK(err, "connect")) + return -1; + + err = write(test_case->clients[i], buf, MSGLEN); + if (!ASSERT_EQ(err, MSGLEN, "write")) + return -1; + } + + return 0; +} + +static int update_maps(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + int i, err, migrated_to = MIGRATED_TO; + int reuseport_map_fd, migrate_map_fd; + __u64 value; + + reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map); + migrate_map_fd = bpf_map__fd(skel->maps.migrate_map); + + for (i = 0; i < NR_SERVERS; i++) { + value = (__u64)test_case->servers[i]; + err = bpf_map_update_elem(reuseport_map_fd, &i, &value, + BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map")) + return -1; + + err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map")) + return -1; + + err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to, + BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map")) + return -1; + } + + return 0; +} + +static int migrate_dance(struct migrate_reuseport_test_case *test_case) +{ + int i, err; + + /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests + * to the last listener based on eBPF. + */ + for (i = 0; i < MIGRATED_TO; i++) { + err = shutdown(test_case->servers[i], SHUT_RDWR); + if (!ASSERT_OK(err, "shutdown")) + return -1; + } + + /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */ + if (test_case->state == BPF_TCP_NEW_SYN_RECV) + return 0; + + /* Note that we use the second listener instead of the + * first one here. + * + * The fist listener is bind()ed with port 0 and, + * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so + * calling listen() again will bind() the first listener + * on a new ephemeral port and detach it from the existing + * reuseport group. (See: __inet_bind(), tcp_set_state()) + * + * OTOH, the second one is bind()ed with a specific port, + * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will + * resurrect the listener on the existing reuseport group. + */ + err = listen(test_case->servers[1], QLEN); + if (!ASSERT_OK(err, "listen")) + return -1; + + /* Migrate from the last listener to the second one. + * + * All listeners were detached out of the reuseport_map, + * so migration will be done by kernel random pick from here. + */ + err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR); + if (!ASSERT_OK(err, "shutdown")) + return -1; + + /* Back to the existing reuseport group */ + err = listen(test_case->servers[MIGRATED_TO], QLEN); + if (!ASSERT_OK(err, "listen")) + return -1; + + /* Migrate back to the last one from the second one */ + err = shutdown(test_case->servers[1], SHUT_RDWR); + if (!ASSERT_OK(err, "shutdown")) + return -1; + + return 0; +} + +static void count_requests(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int err, cnt = 0, client; + char buf[MSGLEN]; + + err = settimeo(test_case->servers[MIGRATED_TO], 4000); + if (!ASSERT_OK(err, "settimeo")) + goto out; + + for (; cnt < NR_CLIENTS; cnt++) { + client = accept(test_case->servers[MIGRATED_TO], + (struct sockaddr *)&addr, &len); + if (!ASSERT_NEQ(client, -1, "accept")) + goto out; + + memset(buf, 0, MSGLEN); + read(client, &buf, MSGLEN); + close(client); + + if (!ASSERT_STREQ(buf, MSG, "read")) + goto out; + } + +out: + ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace"); + + switch (test_case->state) { + case BPF_TCP_ESTABLISHED: + cnt = skel->bss->migrated_at_close; + break; + case BPF_TCP_SYN_RECV: + cnt = skel->bss->migrated_at_close_fastopen; + break; + case BPF_TCP_NEW_SYN_RECV: + if (test_case->expire_synack_timer) + cnt = skel->bss->migrated_at_send_synack; + else + cnt = skel->bss->migrated_at_recv_ack; + break; + default: + cnt = 0; + } + + ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog"); +} + +static void run_test(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + int err, saved_len; + char buf[16]; + + skel->bss->migrated_at_close = 0; + skel->bss->migrated_at_close_fastopen = 0; + skel->bss->migrated_at_send_synack = 0; + skel->bss->migrated_at_recv_ack = 0; + + init_fds(test_case->servers, NR_SERVERS); + init_fds(test_case->clients, NR_CLIENTS); + + if (test_case->fastopen) { + memset(buf, 0, sizeof(buf)); + + err = setup_fastopen(buf, sizeof(buf), &saved_len, false); + if (!ASSERT_OK(err, "setup_fastopen - setup")) + return; + } + + err = start_servers(test_case, skel); + if (!ASSERT_OK(err, "start_servers")) + goto close_servers; + + if (test_case->drop_ack) { + /* Drop the final ACK of the 3-way handshake and stick the + * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV. + */ + err = drop_ack(test_case, skel); + if (!ASSERT_OK(err, "drop_ack")) + goto close_servers; + } + + /* Tie requests to the first four listners */ + err = start_clients(test_case); + if (!ASSERT_OK(err, "start_clients")) + goto close_clients; + + err = listen(test_case->servers[MIGRATED_TO], QLEN); + if (!ASSERT_OK(err, "listen")) + goto close_clients; + + err = update_maps(test_case, skel); + if (!ASSERT_OK(err, "fill_maps")) + goto close_clients; + + /* Migrate the requests in the accept queue only. + * TCP_NEW_SYN_RECV requests are not migrated at this point. + */ + err = migrate_dance(test_case); + if (!ASSERT_OK(err, "migrate_dance")) + goto close_clients; + + if (test_case->expire_synack_timer) { + /* Wait for SYN+ACK timers to expire so that + * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests. + */ + sleep(1); + } + + if (test_case->link) { + /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */ + err = pass_ack(test_case); + if (!ASSERT_OK(err, "pass_ack")) + goto close_clients; + } + + count_requests(test_case, skel); + +close_clients: + close_fds(test_case->clients, NR_CLIENTS); + + if (test_case->link) { + err = pass_ack(test_case); + ASSERT_OK(err, "pass_ack - clean up"); + } + +close_servers: + close_fds(test_case->servers, NR_SERVERS); + + if (test_case->fastopen) { + err = setup_fastopen(buf, sizeof(buf), &saved_len, true); + ASSERT_OK(err, "setup_fastopen - restore"); + } +} + +void test_migrate_reuseport(void) +{ + struct test_migrate_reuseport *skel; + int i; + + skel = test_migrate_reuseport__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + test__start_subtest(test_cases[i].name); + run_test(&test_cases[i], skel); + } + + test_migrate_reuseport__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c index e178416bddad..6194b776a28b 100644 --- a/tools/testing/selftests/bpf/prog_tests/obj_name.c +++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c @@ -38,13 +38,13 @@ void test_obj_name(void) fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); CHECK((tests[i].success && fd < 0) || - (!tests[i].success && fd != -1) || + (!tests[i].success && fd >= 0) || (!tests[i].success && errno != tests[i].expected_errno), "check-bpf-prog-name", "fd %d(%d) errno %d(%d)\n", fd, tests[i].success, errno, tests[i].expected_errno); - if (fd != -1) + if (fd >= 0) close(fd); /* test different attr.map_name during BPF_MAP_CREATE */ @@ -59,13 +59,13 @@ void test_obj_name(void) memcpy(attr.map_name, tests[i].name, ncopy); fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); CHECK((tests[i].success && fd < 0) || - (!tests[i].success && fd != -1) || + (!tests[i].success && fd >= 0) || (!tests[i].success && errno != tests[i].expected_errno), "check-bpf-map-name", "fd %d(%d) errno %d(%d)\n", fd, tests[i].success, errno, tests[i].expected_errno); - if (fd != -1) + if (fd >= 0) close(fd); } } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c index e35c444902a7..12c4f45cee1a 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c @@ -74,7 +74,7 @@ static void test_perf_branches_common(int perf_fd, /* attach perf_event */ link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd); - if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_perf_event")) goto out_destroy_skel; /* generate some branches on cpu 0 */ @@ -119,7 +119,7 @@ static void test_perf_branches_hw(void) * Some setups don't support branch records (virtual machines, !x86), * so skip test in this case. */ - if (pfd == -1) { + if (pfd < 0) { if (errno == ENOENT || errno == EOPNOTSUPP) { printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n", __func__); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index ca9f0895ec84..6490e9673002 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -80,7 +80,7 @@ void test_perf_buffer(void) pb_opts.sample_cb = on_sample; pb_opts.ctx = &cpu_seen; pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts); - if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out_close; CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd", diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c index 72c3690844fb..33144c9432ae 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c @@ -97,8 +97,7 @@ void test_perf_event_stackmap(void) skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event", - "err %ld\n", PTR_ERR(skel->links.oncpu))) { + if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) { close(pmu_fd); goto cleanup; } diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index 7aecfd9e87d1..95bd12097358 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -15,7 +15,7 @@ void test_probe_user(void) static const int zero = 0; obj = bpf_object__open_file(obj_file, &opts); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; kprobe_prog = bpf_object__find_program_by_title(obj, prog_name); @@ -33,11 +33,8 @@ void test_probe_user(void) goto cleanup; kprobe_link = bpf_program__attach(kprobe_prog); - if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", - "err %ld\n", PTR_ERR(kprobe_link))) { - kprobe_link = NULL; + if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) goto cleanup; - } memset(&curr, 0, sizeof(curr)); in->sin_family = AF_INET; diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c index 131d7f7eeb42..89fc98faf19e 100644 --- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c +++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c @@ -46,7 +46,7 @@ void test_prog_run_xattr(void) tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access); err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run", + CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run", "err %d errno %d retval %d\n", err, errno, tattr.retval); CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out", @@ -78,6 +78,6 @@ void test_prog_run_xattr(void) cleanup: if (skel) test_pkt_access__destroy(skel); - if (stats_fd != -1) + if (stats_fd >= 0) close(stats_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c index c5fb191874ac..41720a62c4fa 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c @@ -77,7 +77,7 @@ void test_raw_tp_test_run(void) /* invalid cpu ID should fail with ENXIO */ opts.cpu = 0xffffffff; err = bpf_prog_test_run_opts(prog_fd, &opts); - CHECK(err != -1 || errno != ENXIO, + CHECK(err >= 0 || errno != ENXIO, "test_run_opts_fail", "should failed with ENXIO\n"); @@ -85,7 +85,7 @@ void test_raw_tp_test_run(void) opts.cpu = 1; opts.flags = 0; err = bpf_prog_test_run_opts(prog_fd, &opts); - CHECK(err != -1 || errno != EINVAL, + CHECK(err >= 0 || errno != EINVAL, "test_run_opts_fail", "should failed with EINVAL\n"); diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c index 563e12120e77..5f9eaa3ab584 100644 --- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -30,7 +30,7 @@ void test_rdonly_maps(void) struct bss bss; obj = bpf_object__open_file(file, NULL); - if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open")) return; err = bpf_object__load(obj); @@ -58,11 +58,8 @@ void test_rdonly_maps(void) goto cleanup; link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); - if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n", - t->prog_name, PTR_ERR(link))) { - link = NULL; + if (!ASSERT_OK_PTR(link, "attach_prog")) goto cleanup; - } /* trigger probe */ usleep(1); diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index ac1ee10cffd8..de2688166696 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -15,7 +15,7 @@ void test_reference_tracking(void) int err = 0; obj = bpf_object__open_file(file, &open_opts); - if (CHECK_FAIL(IS_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name", diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index d3c2de2c24d1..f62361306f6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -76,7 +76,7 @@ __resolve_symbol(struct btf *btf, int type_id) } for (i = 0; i < ARRAY_SIZE(test_symbols); i++) { - if (test_symbols[i].id != -1) + if (test_symbols[i].id >= 0) continue; if (BTF_INFO_KIND(type->info) != test_symbols[i].type) diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index f9a8ae331963..4706cee84360 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -12,7 +12,7 @@ #include <sys/sysinfo.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> -#include "test_ringbuf.skel.h" +#include "test_ringbuf.lskel.h" #define EDONE 7777 @@ -94,15 +94,13 @@ void test_ringbuf(void) if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; - err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size); - if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) - goto cleanup; + skel->maps.ringbuf.max_entries = page_size; err = test_ringbuf__load(skel); if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) goto cleanup; - rb_fd = bpf_map__fd(skel->maps.ringbuf); + rb_fd = skel->maps.ringbuf.map_fd; /* good read/write cons_pos */ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"); @@ -151,7 +149,7 @@ void test_ringbuf(void) /* only trigger BPF program for current process */ skel->bss->pid = getpid(); - ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), + ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd, process_sample, NULL, NULL); if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index cef63e703924..167cd8a2edfd 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -63,7 +63,7 @@ void test_ringbuf_multi(void) goto cleanup; proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0); - if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n")) + if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n")) goto cleanup; err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 821b4146b7b6..4efd337d6a3c 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -78,7 +78,7 @@ static int create_maps(enum bpf_map_type inner_type) attr.max_entries = REUSEPORT_ARRAY_SIZE; reuseport_array = bpf_create_map_xattr(&attr); - RET_ERR(reuseport_array == -1, "creating reuseport_array", + RET_ERR(reuseport_array < 0, "creating reuseport_array", "reuseport_array:%d errno:%d\n", reuseport_array, errno); /* Creating outer_map */ @@ -89,7 +89,7 @@ static int create_maps(enum bpf_map_type inner_type) attr.max_entries = 1; attr.inner_map_fd = reuseport_array; outer_map = bpf_create_map_xattr(&attr); - RET_ERR(outer_map == -1, "creating outer_map", + RET_ERR(outer_map < 0, "creating outer_map", "outer_map:%d errno:%d\n", outer_map, errno); return 0; @@ -102,8 +102,9 @@ static int prepare_bpf_obj(void) int err; obj = bpf_object__open("test_select_reuseport_kern.o"); - RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o", - "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj)); + err = libbpf_get_error(obj); + RET_ERR(err, "open test_select_reuseport_kern.o", + "obj:%p PTR_ERR(obj):%d\n", obj, err); map = bpf_object__find_map_by_name(obj, "outer_map"); RET_ERR(!map, "find outer_map", "!map\n"); @@ -116,31 +117,31 @@ static int prepare_bpf_obj(void) prog = bpf_program__next(NULL, obj); RET_ERR(!prog, "get first bpf_program", "!prog\n"); select_by_skb_data_prog = bpf_program__fd(prog); - RET_ERR(select_by_skb_data_prog == -1, "get prog fd", + RET_ERR(select_by_skb_data_prog < 0, "get prog fd", "select_by_skb_data_prog:%d\n", select_by_skb_data_prog); map = bpf_object__find_map_by_name(obj, "result_map"); RET_ERR(!map, "find result_map", "!map\n"); result_map = bpf_map__fd(map); - RET_ERR(result_map == -1, "get result_map fd", + RET_ERR(result_map < 0, "get result_map fd", "result_map:%d\n", result_map); map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map"); RET_ERR(!map, "find tmp_index_ovr_map\n", "!map"); tmp_index_ovr_map = bpf_map__fd(map); - RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd", + RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd", "tmp_index_ovr_map:%d\n", tmp_index_ovr_map); map = bpf_object__find_map_by_name(obj, "linum_map"); RET_ERR(!map, "find linum_map", "!map\n"); linum_map = bpf_map__fd(map); - RET_ERR(linum_map == -1, "get linum_map fd", + RET_ERR(linum_map < 0, "get linum_map fd", "linum_map:%d\n", linum_map); map = bpf_object__find_map_by_name(obj, "data_check_map"); RET_ERR(!map, "find data_check_map", "!map\n"); data_check_map = bpf_map__fd(map); - RET_ERR(data_check_map == -1, "get data_check_map fd", + RET_ERR(data_check_map < 0, "get data_check_map fd", "data_check_map:%d\n", data_check_map); return 0; @@ -237,7 +238,7 @@ static long get_linum(void) int err; err = bpf_map_lookup_elem(linum_map, &index_zero, &linum); - RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n", + RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n", err, errno); return linum; @@ -254,11 +255,11 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, addrlen = sizeof(cli_sa); err = getsockname(cli_fd, (struct sockaddr *)&cli_sa, &addrlen); - RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n", + RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem(data_check_map, &index_zero, &result); - RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n", + RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n", err, errno); if (type == SOCK_STREAM) { @@ -347,7 +348,7 @@ static void check_results(void) for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &results[i]); - RET_IF(err == -1, "lookup_elem(result_map)", + RET_IF(err < 0, "lookup_elem(result_map)", "i:%u err:%d errno:%d\n", i, err, errno); } @@ -524,12 +525,12 @@ static void test_syncookie(int type, sa_family_t family) */ err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &tmp_index, BPF_ANY); - RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)", + RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)", "err:%d errno:%d\n", err, errno); do_test(type, family, &cmd, PASS); err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero, &tmp_index); - RET_IF(err == -1 || tmp_index != -1, + RET_IF(err < 0 || tmp_index >= 0, "lookup_elem(tmp_index_ovr_map)", "err:%d errno:%d tmp_index:%d\n", err, errno, tmp_index); @@ -569,7 +570,7 @@ static void test_detach_bpf(int type, sa_family_t family) for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &tmp); - RET_IF(err == -1, "lookup_elem(result_map)", + RET_IF(err < 0, "lookup_elem(result_map)", "i:%u err:%d errno:%d\n", i, err, errno); nr_run_before += tmp; } @@ -584,7 +585,7 @@ static void test_detach_bpf(int type, sa_family_t family) for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &tmp); - RET_IF(err == -1, "lookup_elem(result_map)", + RET_IF(err < 0, "lookup_elem(result_map)", "i:%u err:%d errno:%d\n", i, err, errno); nr_run_after += tmp; } @@ -632,24 +633,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany) SO_ATTACH_REUSEPORT_EBPF, &select_by_skb_data_prog, sizeof(select_by_skb_data_prog)); - RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", + RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", "err:%d errno:%d\n", err, errno); } err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen); - RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n", + RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n", i, err, errno); if (type == SOCK_STREAM) { err = listen(sk_fds[i], 10); - RET_IF(err == -1, "listen()", + RET_IF(err < 0, "listen()", "sk_fds[%d] err:%d errno:%d\n", i, err, errno); } err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i], BPF_NOEXIST); - RET_IF(err == -1, "update_elem(reuseport_array)", + RET_IF(err < 0, "update_elem(reuseport_array)", "sk_fds[%d] err:%d errno:%d\n", i, err, errno); if (i == first) { @@ -682,7 +683,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany, prepare_sk_fds(type, family, inany); err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr, BPF_ANY); - RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)", + RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)", "err:%d errno:%d\n", err, errno); /* Install reuseport_array to outer_map? */ @@ -691,7 +692,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany, err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array, BPF_ANY); - RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)", + RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)", "err:%d errno:%d\n", err, errno); } @@ -720,18 +721,18 @@ static void cleanup_per_test(bool no_inner_map) return; err = bpf_map_delete_elem(outer_map, &index_zero); - RET_IF(err == -1, "delete_elem(outer_map)", + RET_IF(err < 0, "delete_elem(outer_map)", "err:%d errno:%d\n", err, errno); } static void cleanup(void) { - if (outer_map != -1) { + if (outer_map >= 0) { close(outer_map); outer_map = -1; } - if (reuseport_array != -1) { + if (reuseport_array >= 0) { close(reuseport_array); reuseport_array = -1; } diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 7043e6ded0e6..023cc532992d 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -2,7 +2,7 @@ #include <test_progs.h> #include "test_send_signal_kern.skel.h" -static volatile int sigusr1_received = 0; +int sigusr1_received = 0; static void sigusr1_handler(int signum) { @@ -91,8 +91,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, skel->links.send_signal_perf = bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd); - if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event", - "err %ld\n", PTR_ERR(skel->links.send_signal_perf))) + if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event")) goto disable_pmu; } diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 45c82db3c58c..aee41547e7f4 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -480,7 +480,7 @@ static struct bpf_link *attach_lookup_prog(struct bpf_program *prog) } link = bpf_program__attach_netns(prog, net_fd); - if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) { + if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) { errno = -PTR_ERR(link); log_err("failed to attach program '%s' to netns", bpf_program__name(prog)); diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index fe87b77af459..f6f130c99b8c 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -82,10 +82,8 @@ void test_skeleton(void) CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2); CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3); CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4); - CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n", - bss->handler_out5.a, 5); - CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n", - bss->handler_out5.b, 6); + CHECK(bss->out5.a != 5, "res5", "got %d != exp %d\n", bss->out5.a, 5); + CHECK(bss->out5.b != 6, "res6", "got %lld != exp %d\n", bss->out5.b, 6); CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14); CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1", diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index af87118e748e..577d619fb07e 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -97,12 +97,12 @@ static void check_result(void) err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, &egress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, &ingress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); @@ -355,14 +355,12 @@ void test_sock_fields(void) egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd); - if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n", - PTR_ERR(egress_link))) + if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)")) goto done; ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd); - if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n", - PTR_ERR(ingress_link))) + if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)")) goto done; linum_map_fd = bpf_map__fd(skel->maps.linum_map); @@ -375,8 +373,8 @@ done: bpf_link__destroy(egress_link); bpf_link__destroy(ingress_link); test_sock_fields__destroy(skel); - if (child_cg_fd != -1) + if (child_cg_fd >= 0) close(child_cg_fd); - if (parent_cg_fd != -1) + if (parent_cg_fd >= 0) close(parent_cg_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index ab77596b64e3..1352ec104149 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -88,11 +88,11 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type) int s, map, err; s = connected_socket_v4(); - if (CHECK_FAIL(s == -1)) + if (CHECK_FAIL(s < 0)) return; map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); - if (CHECK_FAIL(map == -1)) { + if (CHECK_FAIL(map < 0)) { perror("bpf_create_map"); goto out; } @@ -245,7 +245,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.copy, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -304,7 +304,7 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first, } err = bpf_prog_attach(verdict, map, second, 0); - assert(err == -1 && errno == EBUSY); + ASSERT_EQ(err, -EBUSY, "prog_attach_fail"); err = bpf_prog_detach2(verdict, map, first); if (CHECK_FAIL(err)) { diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 06b86addc181..7a0d64fdc192 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -98,7 +98,7 @@ static void run_tests(int family, enum bpf_map_type map_type) int map; map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); - if (CHECK_FAIL(map == -1)) { + if (CHECK_FAIL(map < 0)) { perror("bpf_map_create"); return; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 648d9ae898d2..515229f24a93 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -139,7 +139,7 @@ #define xbpf_map_delete_elem(fd, key) \ ({ \ int __ret = bpf_map_delete_elem((fd), (key)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("map_delete"); \ __ret; \ }) @@ -147,7 +147,7 @@ #define xbpf_map_lookup_elem(fd, key, val) \ ({ \ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("map_lookup"); \ __ret; \ }) @@ -155,7 +155,7 @@ #define xbpf_map_update_elem(fd, key, val, flags) \ ({ \ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("map_update"); \ __ret; \ }) @@ -164,7 +164,7 @@ ({ \ int __ret = \ bpf_prog_attach((prog), (target), (type), (flags)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("prog_attach(" #type ")"); \ __ret; \ }) @@ -172,7 +172,7 @@ #define xbpf_prog_detach2(prog, target, type) \ ({ \ int __ret = bpf_prog_detach2((prog), (target), (type)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("prog_detach2(" #type ")"); \ __ret; \ }) @@ -1610,6 +1610,7 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, struct sockaddr_storage addr; int c0, c1, p0, p1; unsigned int pass; + int retries = 100; socklen_t len; int err, n; u64 value; @@ -1686,9 +1687,13 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); +again: n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) + if (n < 0) { + if (errno == EAGAIN && retries--) + goto again; FAIL_ERRNO("%s: read", log_prefix); + } if (n == 0) FAIL("%s: incomplete read", log_prefix); diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 11a769e18f5d..0a91d8d9954b 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -62,8 +62,7 @@ retry: skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event", - "err %ld\n", PTR_ERR(skel->links.oncpu))) { + if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) { close(pmu_fd); goto cleanup; } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index 37269d23df93..04b476bd62b9 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -21,7 +21,7 @@ void test_stacktrace_map(void) goto close_prog; link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch"); - if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_tp")) goto close_prog; /* find map fds */ diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index 404a5498e1a3..4fd30bb651ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -21,7 +21,7 @@ void test_stacktrace_map_raw_tp(void) goto close_prog; link = bpf_program__attach_raw_tracepoint(prog, "sched_switch"); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; /* find map fds */ @@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void) goto close_prog; close_prog: - if (!IS_ERR_OR_NULL(link)) - bpf_link__destroy(link); + bpf_link__destroy(link); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c index 46556976dccc..5c4e3014e063 100644 --- a/tools/testing/selftests/bpf/prog_tests/static_linked.c +++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c @@ -14,12 +14,7 @@ void test_static_linked(void) return; skel->rodata->rovar1 = 1; - skel->bss->static_var1 = 2; - skel->bss->static_var11 = 3; - skel->rodata->rovar2 = 4; - skel->bss->static_var2 = 5; - skel->bss->static_var22 = 6; err = test_static_linked__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -32,8 +27,8 @@ void test_static_linked(void) /* trigger */ usleep(1); - ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1"); - ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2"); + ASSERT_EQ(skel->data->var1, 1 * 2 + 2 + 3, "var1"); + ASSERT_EQ(skel->data->var2, 4 * 3 + 5 + 6, "var2"); cleanup: test_static_linked__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c new file mode 100644 index 000000000000..81e997a69f7a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/syscall.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "syscall.skel.h" + +struct args { + __u64 log_buf; + __u32 log_size; + int max_entries; + int map_fd; + int prog_fd; + int btf_fd; +}; + +void test_syscall(void) +{ + static char verifier_log[8192]; + struct args ctx = { + .max_entries = 1024, + .log_buf = (uintptr_t) verifier_log, + .log_size = sizeof(verifier_log), + }; + struct bpf_prog_test_run_attr tattr = { + .ctx_in = &ctx, + .ctx_size_in = sizeof(ctx), + }; + struct syscall *skel = NULL; + __u64 key = 12, value = 0; + int err; + + skel = syscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog); + err = bpf_prog_test_run_xattr(&tattr); + ASSERT_EQ(err, 0, "err"); + ASSERT_EQ(tattr.retval, 1, "retval"); + ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd"); + ASSERT_GT(ctx.prog_fd, 0, "ctx.prog_fd"); + ASSERT_OK(memcmp(verifier_log, "processed", sizeof("processed") - 1), + "verifier_log"); + + err = bpf_map_lookup_elem(ctx.map_fd, &key, &value); + ASSERT_EQ(err, 0, "map_lookup"); + ASSERT_EQ(value, 34, "map lookup value"); +cleanup: + syscall__destroy(skel); + if (ctx.prog_fd > 0) + close(ctx.prog_fd); + if (ctx.map_fd > 0) + close(ctx.map_fd); + if (ctx.btf_fd > 0) + close(ctx.btf_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c new file mode 100644 index 000000000000..4a505a5adf4d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <linux/pkt_cls.h> + +#include "test_tc_bpf.skel.h" + +#define LO_IFINDEX 1 + +#define TEST_DECLARE_OPTS(__fd) \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_h, .handle = 1); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_p, .priority = 1); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_f, .prog_fd = __fd); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hp, .handle = 1, .priority = 1); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hf, .handle = 1, .prog_fd = __fd); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_pf, .priority = 1, .prog_fd = __fd); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpf, .handle = 1, .priority = 1, .prog_fd = __fd); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpi, .handle = 1, .priority = 1, .prog_id = 42); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpr, .handle = 1, .priority = 1, \ + .flags = BPF_TC_F_REPLACE); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpfi, .handle = 1, .priority = 1, .prog_fd = __fd, \ + .prog_id = 42); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_prio_max, .handle = 1, .priority = UINT16_MAX + 1); + +static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd); + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int ret; + + ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd")) + return ret; + + ret = bpf_tc_attach(hook, &opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + return ret; + + if (!ASSERT_EQ(opts.handle, 1, "handle set") || + !ASSERT_EQ(opts.priority, 1, "priority set") || + !ASSERT_EQ(opts.prog_id, info.id, "prog_id set")) + goto end; + + opts.prog_id = 0; + opts.flags = BPF_TC_F_REPLACE; + ret = bpf_tc_attach(hook, &opts); + if (!ASSERT_OK(ret, "bpf_tc_attach replace mode")) + goto end; + + opts.flags = opts.prog_fd = opts.prog_id = 0; + ret = bpf_tc_query(hook, &opts); + if (!ASSERT_OK(ret, "bpf_tc_query")) + goto end; + + if (!ASSERT_EQ(opts.handle, 1, "handle set") || + !ASSERT_EQ(opts.priority, 1, "priority set") || + !ASSERT_EQ(opts.prog_id, info.id, "prog_id set")) + goto end; + +end: + opts.flags = opts.prog_fd = opts.prog_id = 0; + ret = bpf_tc_detach(hook, &opts); + ASSERT_OK(ret, "bpf_tc_detach"); + return ret; +} + +static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_opts, .handle = 1, .priority = 1, .prog_fd = fd); + DECLARE_LIBBPF_OPTS(bpf_tc_hook, inv_hook, .attach_point = BPF_TC_INGRESS); + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1); + int ret; + + ret = bpf_tc_hook_create(NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook = NULL")) + return -EINVAL; + + /* hook ifindex = 0 */ + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex == 0")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex == 0")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex == 0")) + return -EINVAL; + attach_opts.prog_id = 0; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex == 0")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex == 0")) + return -EINVAL; + + /* hook ifindex < 0 */ + inv_hook.ifindex = -1; + + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex < 0")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex < 0")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex < 0")) + return -EINVAL; + attach_opts.prog_id = 0; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex < 0")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex < 0")) + return -EINVAL; + + inv_hook.ifindex = LO_IFINDEX; + + /* hook.attach_point invalid */ + inv_hook.attach_point = 0xabcd; + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook.attach_point")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook.attach_point")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook.attach_point")) + return -EINVAL; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook.attach_point")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook.attach_point")) + return -EINVAL; + + inv_hook.attach_point = BPF_TC_INGRESS; + + /* hook.attach_point valid, but parent invalid */ + inv_hook.parent = TC_H_MAKE(1UL << 16, 10); + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent")) + return -EINVAL; + + inv_hook.attach_point = BPF_TC_CUSTOM; + inv_hook.parent = 0; + /* These return EOPNOTSUPP instead of EINVAL as parent is checked after + * attach_point of the hook. + */ + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent")) + return -EINVAL; + + inv_hook.attach_point = BPF_TC_INGRESS; + + /* detach */ + { + TEST_DECLARE_OPTS(fd); + + ret = bpf_tc_detach(NULL, &opts_hp); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook = NULL")) + return -EINVAL; + + ret = bpf_tc_detach(hook, NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid opts = NULL")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_hpr); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid flags set")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_hpf); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_fd set")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_hpi); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_id set")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_p); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid handle unset")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_h); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority unset")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_prio_max); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority > UINT16_MAX")) + return -EINVAL; + } + + /* query */ + { + TEST_DECLARE_OPTS(fd); + + ret = bpf_tc_query(NULL, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook = NULL")) + return -EINVAL; + + ret = bpf_tc_query(hook, NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid opts = NULL")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_hpr); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid flags set")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_hpf); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_fd set")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_hpi); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_id set")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_p); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid handle unset")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_h); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority unset")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_prio_max); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority > UINT16_MAX")) + return -EINVAL; + + /* when chain is not present, kernel returns -EINVAL */ + ret = bpf_tc_query(hook, &opts_hp); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query valid handle, priority set")) + return -EINVAL; + } + + /* attach */ + { + TEST_DECLARE_OPTS(fd); + + ret = bpf_tc_attach(NULL, &opts_hp); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook = NULL")) + return -EINVAL; + + ret = bpf_tc_attach(hook, NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid opts = NULL")) + return -EINVAL; + + opts_hp.flags = 42; + ret = bpf_tc_attach(hook, &opts_hp); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid flags")) + return -EINVAL; + + ret = bpf_tc_attach(hook, NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_fd unset")) + return -EINVAL; + + ret = bpf_tc_attach(hook, &opts_hpi); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_id set")) + return -EINVAL; + + ret = bpf_tc_attach(hook, &opts_pf); + if (!ASSERT_OK(ret, "bpf_tc_attach valid handle unset")) + return -EINVAL; + opts_pf.prog_fd = opts_pf.prog_id = 0; + ASSERT_OK(bpf_tc_detach(hook, &opts_pf), "bpf_tc_detach"); + + ret = bpf_tc_attach(hook, &opts_hf); + if (!ASSERT_OK(ret, "bpf_tc_attach valid priority unset")) + return -EINVAL; + opts_hf.prog_fd = opts_hf.prog_id = 0; + ASSERT_OK(bpf_tc_detach(hook, &opts_hf), "bpf_tc_detach"); + + ret = bpf_tc_attach(hook, &opts_prio_max); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid priority > UINT16_MAX")) + return -EINVAL; + + ret = bpf_tc_attach(hook, &opts_f); + if (!ASSERT_OK(ret, "bpf_tc_attach valid both handle and priority unset")) + return -EINVAL; + opts_f.prog_fd = opts_f.prog_id = 0; + ASSERT_OK(bpf_tc_detach(hook, &opts_f), "bpf_tc_detach"); + } + + return 0; +} + +void test_tc_bpf(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_INGRESS); + struct test_tc_bpf *skel = NULL; + bool hook_created = false; + int cls_fd, ret; + + skel = test_tc_bpf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load")) + return; + + cls_fd = bpf_program__fd(skel->progs.cls); + + ret = bpf_tc_hook_create(&hook); + if (ret == 0) + hook_created = true; + + ret = ret == -EEXIST ? 0 : ret; + if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)")) + goto end; + + hook.attach_point = BPF_TC_CUSTOM; + hook.parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + ret = bpf_tc_hook_create(&hook); + if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook.attach_point")) + goto end; + + ret = test_tc_bpf_basic(&hook, cls_fd); + if (!ASSERT_OK(ret, "test_tc_internal ingress")) + goto end; + + ret = bpf_tc_hook_destroy(&hook); + if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook.attach_point")) + goto end; + + hook.attach_point = BPF_TC_INGRESS; + hook.parent = 0; + bpf_tc_hook_destroy(&hook); + + ret = test_tc_bpf_basic(&hook, cls_fd); + if (!ASSERT_OK(ret, "test_tc_internal ingress")) + goto end; + + bpf_tc_hook_destroy(&hook); + + hook.attach_point = BPF_TC_EGRESS; + ret = test_tc_bpf_basic(&hook, cls_fd); + if (!ASSERT_OK(ret, "test_tc_internal egress")) + goto end; + + bpf_tc_hook_destroy(&hook); + + ret = test_tc_bpf_api(&hook, cls_fd); + if (!ASSERT_OK(ret, "test_tc_bpf_api")) + goto end; + + bpf_tc_hook_destroy(&hook); + +end: + if (hook_created) { + hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&hook); + } + test_tc_bpf__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 08d19cafd5e8..1fa772079967 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -353,8 +353,7 @@ static void fastopen_estab(void) return; link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)")) return; if (sk_fds_connect(&sk_fds, true)) { @@ -398,8 +397,7 @@ static void syncookie_estab(void) return; link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)")) return; if (sk_fds_connect(&sk_fds, false)) { @@ -431,8 +429,7 @@ static void fin(void) return; link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)")) return; if (sk_fds_connect(&sk_fds, false)) { @@ -471,8 +468,7 @@ static void __simple_estab(bool exprm) return; link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)")) return; if (sk_fds_connect(&sk_fds, false)) { @@ -509,8 +505,7 @@ static void misc(void) return; link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)")) return; if (sk_fds_connect(&sk_fds, false)) { diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c index 9966685866fd..123c68c1917d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -73,7 +73,7 @@ void test_test_overhead(void) return; obj = bpf_object__open_file("./test_overhead.o", NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); @@ -108,7 +108,7 @@ void test_test_overhead(void) /* attach kprobe */ link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */, kprobe_func); - if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_kprobe")) goto cleanup; test_run("kprobe"); bpf_link__destroy(link); @@ -116,28 +116,28 @@ void test_test_overhead(void) /* attach kretprobe */ link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */, kprobe_func); - if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_kretprobe")) goto cleanup; test_run("kretprobe"); bpf_link__destroy(link); /* attach raw_tp */ link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename"); - if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto cleanup; test_run("raw_tp"); bpf_link__destroy(link); /* attach fentry */ link = bpf_program__attach_trace(fentry_prog); - if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_fentry")) goto cleanup; test_run("fentry"); bpf_link__destroy(link); /* attach fexit */ link = bpf_program__attach_trace(fexit_prog); - if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_fexit")) goto cleanup; test_run("fexit"); bpf_link__destroy(link); diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index 39b0decb1bb2..d39bc00feb45 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -3,7 +3,7 @@ #include <test_progs.h> -#include "trace_printk.skel.h" +#include "trace_printk.lskel.h" #define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "testing,testing" @@ -21,6 +21,9 @@ void test_trace_printk(void) if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) return; + ASSERT_EQ(skel->rodata->fmt[0], 'T', "invalid printk fmt string"); + skel->rodata->fmt[0] = 't'; + err = trace_printk__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index f3022d934e2d..d7f5a931d7f3 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -55,7 +55,7 @@ void test_trampoline_count(void) /* attach 'allowed' trampoline programs */ for (i = 0; i < MAX_TRAMP_PROGS; i++) { obj = bpf_object__open_file(object, NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { + if (!ASSERT_OK_PTR(obj, "obj_open_file")) { obj = NULL; goto cleanup; } @@ -68,14 +68,14 @@ void test_trampoline_count(void) if (rand() % 2) { link = load(inst[i].obj, fentry_name); - if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) { + if (!ASSERT_OK_PTR(link, "attach_prog")) { link = NULL; goto cleanup; } inst[i].link_fentry = link; } else { link = load(inst[i].obj, fexit_name); - if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) { + if (!ASSERT_OK_PTR(link, "attach_prog")) { link = NULL; goto cleanup; } @@ -85,7 +85,7 @@ void test_trampoline_count(void) /* and try 1 extra.. */ obj = bpf_object__open_file(object, NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { + if (!ASSERT_OK_PTR(obj, "obj_open_file")) { obj = NULL; goto cleanup; } @@ -96,13 +96,15 @@ void test_trampoline_count(void) /* ..that needs to fail */ link = load(obj, fentry_name); - if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) { + err = libbpf_get_error(link); + if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) { bpf_link__destroy(link); goto cleanup_extra; } /* with E2BIG error */ - CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link)); + ASSERT_EQ(err, -E2BIG, "proper error check"); + ASSERT_EQ(link, NULL, "ptr_is_null"); /* and finaly execute the probe */ if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c index 2aba09d4d01b..56c9d6bd38a3 100644 --- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c +++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c @@ -22,11 +22,10 @@ void test_udp_limit(void) goto close_cgroup_fd; skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock")) + goto close_skeleton; skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd); - if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release), - "cg-attach", "sock %ld sock_release %ld", - PTR_ERR(skel->links.sock), - PTR_ERR(skel->links.sock_release))) + if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release")) goto close_skeleton; /* BPF program enforces a single UDP socket per cgroup, diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 2c6c570b21f8..3bd5904b4db5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -90,7 +90,7 @@ void test_xdp_bpf2bpf(void) pb_opts.ctx = &passed; pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1, &pb_opts); - if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out; /* Run test program */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index 6f814999b395..46eed0a33c23 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -51,7 +51,7 @@ void test_xdp_link(void) /* BPF link is not allowed to replace prog attachment */ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); - if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) { + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { bpf_link__destroy(link); /* best-effort detach prog */ opts.old_fd = prog_fd1; @@ -67,7 +67,7 @@ void test_xdp_link(void) /* now BPF link should attach successfully */ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); - if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_attach")) goto cleanup; skel1->links.xdp_handler = link; @@ -95,7 +95,7 @@ void test_xdp_link(void) /* BPF link is not allowed to replace another BPF link */ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO); - if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) { + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { bpf_link__destroy(link); goto cleanup; } @@ -105,7 +105,7 @@ void test_xdp_link(void) /* new link attach should succeed */ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO); - if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_attach")) goto cleanup; skel2->links.xdp_handler = link; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c index 6dfce3fd68bc..0aa3cd34cbe3 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c index b83b5d2e17dc..6c39e86b666f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c index d58d9f1642b5..784a610ce039 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index 95989f4c99b5..a28e51e2dcee 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c index b7f32c160f4e..c86b93f33b32 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c index a1ddc36f13ec..bca8b889cb10 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020, Oracle and/or its affiliates. */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> #include <errno.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index b2f7c7c5f952..6e7b400888fe 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index 43c36f5f7649..f2b8167b72a8 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c index 11d1aa37cf11..4ea6a37d1345 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 54380c5e1069..2e4775c35414 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_endian.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index b4fbddfa4e10..943f7bba180e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_endian.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c index ee49493dc125..400fdf8d6233 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -9,8 +9,8 @@ __u32 map1_id = 0, map2_id = 0; __u32 map1_accessed = 0, map2_accessed = 0; __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0; -static volatile const __u32 print_len; -static volatile const __u32 ret1; +volatile const __u32 print_len; +volatile const __u32 ret1; SEC("iter/bpf_map") int dump_bpf_map(struct bpf_iter__bpf_map *ctx) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c index f258583afbbd..cf0c485b1ed7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_endian.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c index 65f93bb03f0f..5031e21c433f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_endian.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index a46a264ce24e..55e283050cab 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -109,10 +109,10 @@ int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location) return 0; } -static volatile struct { +struct { bool fentry_test_ok; bool fexit_test_ok; -} result; +} result = {}; SEC("fentry/eth_type_trans") int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev, diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c index 52291515cc72..00bf1ca95986 100644 --- a/tools/testing/selftests/bpf/progs/linked_maps1.c +++ b/tools/testing/selftests/bpf/progs/linked_maps1.c @@ -75,7 +75,7 @@ int BPF_PROG(handler_exit1) val = bpf_map_lookup_elem(&map_weak, &key); if (val) output_weak1 = *val; - + return 0; } diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c new file mode 100644 index 000000000000..e550f728962d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/syscall.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <../../../tools/include/linux/filter.h> +#include <linux/btf.h> + +char _license[] SEC("license") = "GPL"; + +struct args { + __u64 log_buf; + __u32 log_size; + int max_entries; + int map_fd; + int prog_fd; + int btf_fd; +}; + +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) + +static int btf_load(void) +{ + struct btf_blob { + struct btf_header btf_hdr; + __u32 types[8]; + __u32 str; + } raw_btf = { + .btf_hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(__u32) * 8, + .str_off = sizeof(__u32) * 8, + .str_len = sizeof(__u32), + }, + .types = { + /* long */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */ + /* unsigned long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + }, + }; + static union bpf_attr btf_load_attr = { + .btf_size = sizeof(raw_btf), + }; + + btf_load_attr.btf = (long)&raw_btf; + return bpf_sys_bpf(BPF_BTF_LOAD, &btf_load_attr, sizeof(btf_load_attr)); +} + +SEC("syscall") +int bpf_prog(struct args *ctx) +{ + static char license[] = "GPL"; + static struct bpf_insn insns[] = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + static union bpf_attr map_create_attr = { + .map_type = BPF_MAP_TYPE_HASH, + .key_size = 8, + .value_size = 8, + .btf_key_type_id = 1, + .btf_value_type_id = 2, + }; + static union bpf_attr map_update_attr = { .map_fd = 1, }; + static __u64 key = 12; + static __u64 value = 34; + static union bpf_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + .insn_cnt = sizeof(insns) / sizeof(insns[0]), + }; + int ret; + + ret = btf_load(); + if (ret <= 0) + return ret; + + ctx->btf_fd = ret; + map_create_attr.max_entries = ctx->max_entries; + map_create_attr.btf_fd = ret; + + prog_load_attr.license = (long) license; + prog_load_attr.insns = (long) insns; + prog_load_attr.log_buf = ctx->log_buf; + prog_load_attr.log_size = ctx->log_size; + prog_load_attr.log_level = 1; + + ret = bpf_sys_bpf(BPF_MAP_CREATE, &map_create_attr, sizeof(map_create_attr)); + if (ret <= 0) + return ret; + ctx->map_fd = ret; + insns[3].imm = ret; + + map_update_attr.map_fd = ret; + map_update_attr.key = (long) &key; + map_update_attr.value = (long) &value; + ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr)); + if (ret < 0) + return ret; + + ret = bpf_sys_bpf(BPF_PROG_LOAD, &prog_load_attr, sizeof(prog_load_attr)); + if (ret <= 0) + return ret; + ctx->prog_fd = ret; + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c index 739dc2a51e74..910858fe078a 100644 --- a/tools/testing/selftests/bpf/progs/tailcall3.c +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -10,7 +10,7 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -static volatile int count; +int count = 0; SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c index f82075b47d7d..bd4be135c39d 100644 --- a/tools/testing/selftests/bpf/progs/tailcall4.c +++ b/tools/testing/selftests/bpf/progs/tailcall4.c @@ -10,7 +10,7 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -static volatile int selector; +int selector = 0; #define TAIL_FUNC(x) \ SEC("classifier/" #x) \ diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c index ce5450744fd4..adf30a33064e 100644 --- a/tools/testing/selftests/bpf/progs/tailcall5.c +++ b/tools/testing/selftests/bpf/progs/tailcall5.c @@ -10,7 +10,7 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -static volatile int selector; +int selector = 0; #define TAIL_FUNC(x) \ SEC("classifier/" #x) \ diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c index 7b1c04183824..3cc4c12817b5 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -20,7 +20,7 @@ int subprog_tail(struct __sk_buff *skb) return 1; } -static volatile int count; +int count = 0; SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c index 9a1b166b7fbe..77df6d4db895 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -9,7 +9,7 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -static volatile int count; +int count = 0; __noinline int subprog_tail_2(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c index c4a9bae96e75..71184af57749 100644 --- a/tools/testing/selftests/bpf/progs/test_check_mtu.c +++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c @@ -11,8 +11,8 @@ char _license[] SEC("license") = "GPL"; /* Userspace will update with MTU it can see on device */ -static volatile const int GLOBAL_USER_MTU; -static volatile const __u32 GLOBAL_USER_IFINDEX; +volatile const int GLOBAL_USER_MTU; +volatile const __u32 GLOBAL_USER_IFINDEX; /* BPF-prog will update these with MTU values it can see */ __u32 global_bpf_mtu_xdp = 0; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index 3c1e042962e6..e2a5acc4785c 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -39,8 +39,8 @@ char _license[] SEC("license") = "Dual BSD/GPL"; /** * Destination port and IP used for UDP encapsulation. */ -static volatile const __be16 ENCAPSULATION_PORT; -static volatile const __be32 ENCAPSULATION_IP; +volatile const __be16 ENCAPSULATION_PORT; +volatile const __be32 ENCAPSULATION_IP; typedef struct { uint64_t processed_packets_total; diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c index cae309538a9e..e712bf77daae 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func_args.c +++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c @@ -8,7 +8,7 @@ struct S { int v; }; -static volatile struct S global_variable; +struct S global_variable = {}; struct { __uint(type, BPF_MAP_TYPE_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c new file mode 100644 index 000000000000..3a193f42c7e7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +__u32 set_pid = 0; +__u64 set_key = 0; +__u64 set_value = 0; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 2); + __type(key, __u64); + __type(value, __u64); +} hash_map SEC(".maps"); + +SEC("tp/syscalls/sys_enter_getpgid") +int bpf_lookup_and_delete_test(const void *ctx) +{ + if (set_pid == bpf_get_current_pid_tgid() >> 32) + bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c new file mode 100644 index 000000000000..27df571abf5b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check if we can migrate child sockets. + * + * 1. If reuse_md->migrating_sk is NULL (SYN packet), + * return SK_PASS without selecting a listener. + * 2. If reuse_md->migrating_sk is not NULL (socket migration), + * select a listener (reuseport_map[migrate_map[cookie]]) + * + * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + */ + +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/in.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, 256); + __type(key, int); + __type(value, __u64); +} reuseport_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 256); + __type(key, __u64); + __type(value, int); +} migrate_map SEC(".maps"); + +int migrated_at_close = 0; +int migrated_at_close_fastopen = 0; +int migrated_at_send_synack = 0; +int migrated_at_recv_ack = 0; +__be16 server_port; + +SEC("xdp") +int drop_ack(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + struct tcphdr *tcp = NULL; + + if (eth + 1 > data_end) + goto pass; + + switch (bpf_ntohs(eth->h_proto)) { + case ETH_P_IP: { + struct iphdr *ip = (struct iphdr *)(eth + 1); + + if (ip + 1 > data_end) + goto pass; + + if (ip->protocol != IPPROTO_TCP) + goto pass; + + tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4); + break; + } + case ETH_P_IPV6: { + struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1); + + if (ipv6 + 1 > data_end) + goto pass; + + if (ipv6->nexthdr != IPPROTO_TCP) + goto pass; + + tcp = (struct tcphdr *)(ipv6 + 1); + break; + } + default: + goto pass; + } + + if (tcp + 1 > data_end) + goto pass; + + if (tcp->dest != server_port) + goto pass; + + if (!tcp->syn && tcp->ack) + return XDP_DROP; + +pass: + return XDP_PASS; +} + +SEC("sk_reuseport/migrate") +int migrate_reuseport(struct sk_reuseport_md *reuse_md) +{ + int *key, flags = 0, state, err; + __u64 cookie; + + if (!reuse_md->migrating_sk) + return SK_PASS; + + state = reuse_md->migrating_sk->state; + cookie = bpf_get_socket_cookie(reuse_md->sk); + + key = bpf_map_lookup_elem(&migrate_map, &cookie); + if (!key) + return SK_DROP; + + err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags); + if (err) + return SK_PASS; + + switch (state) { + case BPF_TCP_ESTABLISHED: + __sync_fetch_and_add(&migrated_at_close, 1); + break; + case BPF_TCP_SYN_RECV: + __sync_fetch_and_add(&migrated_at_close_fastopen, 1); + break; + case BPF_TCP_NEW_SYN_RECV: + if (!reuse_md->len) + __sync_fetch_and_add(&migrated_at_send_synack, 1); + else + __sync_fetch_and_add(&migrated_at_recv_ack, 1); + break; + } + + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c index ecbeea2df259..fc8e8a34a3db 100644 --- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c +++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c @@ -5,7 +5,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -static volatile const struct { +const struct { unsigned a[4]; /* * if the struct's size is multiple of 16, compiler will put it into @@ -15,11 +15,11 @@ static volatile const struct { char _y; } rdonly_values = { .a = {2, 3, 4, 5} }; -static volatile struct { +struct { unsigned did_run; unsigned iters; unsigned sum; -} res; +} res = {}; SEC("raw_tracepoint/sys_enter:skip_loop") int skip_loop(struct pt_regs *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c index 6b3f288b7c63..eaa7d9dba0be 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c @@ -35,7 +35,7 @@ long prod_pos = 0; /* inner state */ long seq = 0; -SEC("tp/syscalls/sys_enter_getpgid") +SEC("fentry/__x64_sys_getpgid") int test_ringbuf(void *ctx) { int cur_pid = bpf_get_current_pid_tgid() >> 32; @@ -48,7 +48,7 @@ int test_ringbuf(void *ctx) sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0); if (!sample) { __sync_fetch_and_add(&dropped, 1); - return 1; + return 0; } sample->pid = pid; diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 374ccef704e1..441fa1c552c8 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -38,11 +38,11 @@ extern int LINUX_KERNEL_VERSION __kconfig; bool bpf_syscall = 0; int kern_ver = 0; +struct s out5 = {}; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { - static volatile struct s out5; - out1 = in1; out2 = in2; out3 = in3; diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c index e35129bea0a0..e2ad26150f9b 100644 --- a/tools/testing/selftests/bpf/progs/test_snprintf.c +++ b/tools/testing/selftests/bpf/progs/test_snprintf.c @@ -3,7 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> __u32 pid = 0; diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c index 402adaf344f9..3095837334d3 100644 --- a/tools/testing/selftests/bpf/progs/test_snprintf_single.c +++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c @@ -5,7 +5,7 @@ #include <bpf/bpf_helpers.h> /* The format string is filled from the userspace such that loading fails */ -static const char fmt[10]; +const char fmt[10]; SEC("raw_tp/sys_enter") int handler(const void *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index a39eba9f5201..a1cc58b10c7c 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -28,8 +28,8 @@ struct { __type(value, unsigned int); } verdict_map SEC(".maps"); -static volatile bool test_sockmap; /* toggled by user-space */ -static volatile bool test_ingress; /* toggled by user-space */ +bool test_sockmap = false; /* toggled by user-space */ +bool test_ingress = false; /* toggled by user-space */ SEC("sk_skb/stream_parser") int prog_stream_parser(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c index ea1a6c4c7172..4f0b612e1661 100644 --- a/tools/testing/selftests/bpf/progs/test_static_linked1.c +++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c @@ -4,10 +4,10 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -/* 8-byte aligned .bss */ -static volatile long static_var1; -static volatile int static_var11; -int var1 = 0; +/* 8-byte aligned .data */ +static volatile long static_var1 = 2; +static volatile int static_var2 = 3; +int var1 = -1; /* 4-byte aligned .rodata */ const volatile int rovar1; @@ -21,7 +21,7 @@ static __noinline int subprog(int x) SEC("raw_tp/sys_enter") int handler1(const void *ctx) { - var1 = subprog(rovar1) + static_var1 + static_var11; + var1 = subprog(rovar1) + static_var1 + static_var2; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c index 54d8d1ab577c..766ebd502a60 100644 --- a/tools/testing/selftests/bpf/progs/test_static_linked2.c +++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c @@ -4,10 +4,10 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -/* 4-byte aligned .bss */ -static volatile int static_var2; -static volatile int static_var22; -int var2 = 0; +/* 4-byte aligned .data */ +static volatile int static_var1 = 5; +static volatile int static_var2 = 6; +int var2 = -1; /* 8-byte aligned .rodata */ const volatile long rovar2; @@ -21,7 +21,7 @@ static __noinline int subprog(int x) SEC("raw_tp/sys_enter") int handler2(const void *ctx) { - var2 = subprog(rovar2) + static_var2 + static_var22; + var2 = subprog(rovar2) + static_var1 + static_var2; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c index d3c5673c0218..b7c37ca09544 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs.c @@ -4,8 +4,18 @@ const char LICENSE[] SEC("license") = "GPL"; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} array SEC(".maps"); + __noinline int sub1(int x) { + int key = 0; + + bpf_map_lookup_elem(&array, &key); return x + 1; } @@ -23,6 +33,9 @@ static __noinline int sub3(int z) static __noinline int sub4(int w) { + int key = 0; + + bpf_map_lookup_elem(&array, &key); return w + sub3(5) + sub1(6); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c new file mode 100644 index 000000000000..18a3a7ed924a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +/* Dummy prog to test TC-BPF API */ + +SEC("classifier") +int cls(struct __sk_buff *skb) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c index 8ca7f399b670..119582aa105a 100644 --- a/tools/testing/selftests/bpf/progs/trace_printk.c +++ b/tools/testing/selftests/bpf/progs/trace_printk.c @@ -10,11 +10,11 @@ char _license[] SEC("license") = "GPL"; int trace_printk_ret = 0; int trace_printk_ran = 0; -SEC("tp/raw_syscalls/sys_enter") +const char fmt[] = "Testing,testing %d\n"; + +SEC("fentry/__x64_sys_nanosleep") int sys_enter(void *ctx) { - static const char fmt[] = "testing,testing %d\n"; - trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt), ++trace_printk_ran); return 0; diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c new file mode 100644 index 000000000000..880debcbcd65 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +#define KBUILD_MODNAME "foo" +#include <string.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +/* One map use devmap, another one use devmap_hash for testing */ +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 1024); +} map_all SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 128); +} map_egress SEC(".maps"); + +/* map to store egress interfaces mac addresses */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __be64); + __uint(max_entries, 128); +} mac_map SEC(".maps"); + +SEC("xdp_redirect_map_multi") +int xdp_redirect_map_multi_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + int if_index = ctx->ingress_ifindex; + struct ethhdr *eth = data; + __u16 h_proto; + __u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + h_proto = eth->h_proto; + + /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */ + if (h_proto == bpf_htons(ETH_P_IP)) + return bpf_redirect_map(&map_all, 0, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); + /* Using IPv6 for none flag testing */ + else if (h_proto == bpf_htons(ETH_P_IPV6)) + return bpf_redirect_map(&map_all, if_index, 0); + /* All others for BPF_F_BROADCAST testing */ + else + return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST); +} + +/* The following 2 progs are for 2nd devmap prog testing */ +SEC("xdp_redirect_map_ingress") +int xdp_redirect_map_all_prog(struct xdp_md *ctx) +{ + return bpf_redirect_map(&map_egress, 0, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); +} + +SEC("xdp_devmap/map_prog") +int xdp_devmap_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = ctx->egress_ifindex; + struct ethhdr *eth = data; + __u64 nh_off; + __be64 *mac; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + mac = bpf_map_lookup_elem(&mac_map, &key); + if (mac) + __builtin_memcpy(eth->h_source, mac, ETH_ALEN); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh index 7eb940a7b2eb..ed12111cd2f0 100755 --- a/tools/testing/selftests/bpf/test_doc_build.sh +++ b/tools/testing/selftests/bpf/test_doc_build.sh @@ -1,5 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +set -e # Assume script is located under tools/testing/selftests/bpf/. We want to start # build attempts from the top of kernel repository. diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 6a5349f9eb14..7e9049fa3edf 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -231,6 +231,14 @@ static void test_lru_sanity0(int map_type, int map_flags) assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && errno == ENOENT); + /* lookup elem key=1 and delete it, then check it doesn't exist */ + key = 1; + assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value)); + assert(value[0] == 1234); + + /* remove the same element from the expected map */ + assert(!bpf_map_delete_elem(expected_map_fd, &key)); + assert(map_equal(lru_map_fd, expected_map_fd)); close(expected_map_fd); diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 51adc42b2b40..30cbf5d98f7d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -53,23 +53,30 @@ static void test_hashmap(unsigned int task, void *data) value = 0; /* BPF_NOEXIST means add new element if it doesn't exist. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && /* key=1 already exists. */ errno == EEXIST); /* -1 is an invalid flag. */ - assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 && errno == EINVAL); /* Check that key=1 can be found. */ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234); key = 2; + value = 1234; + /* Insert key=2 element. */ + assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); + + /* Check that key=2 matches the value and delete it */ + assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234); + /* Check that key=2 is not found. */ - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT); /* BPF_EXIST means update existing element. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 && /* key=2 is not there. */ errno == ENOENT); @@ -80,7 +87,7 @@ static void test_hashmap(unsigned int task, void *data) * inserted due to max_entries limit. */ key = 0; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == E2BIG); /* Update existing element, though the map is full. */ @@ -89,12 +96,12 @@ static void test_hashmap(unsigned int task, void *data) key = 2; assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); key = 3; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == E2BIG); /* Check that key = 0 doesn't exist. */ key = 0; - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT); /* Iterate over two elements. */ assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && @@ -104,7 +111,7 @@ static void test_hashmap(unsigned int task, void *data) assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && (next_key == 1 || next_key == 2) && (next_key != first_key)); - assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 && errno == ENOENT); /* Delete both elements. */ @@ -112,13 +119,13 @@ static void test_hashmap(unsigned int task, void *data) assert(bpf_map_delete_elem(fd, &key) == 0); key = 2; assert(bpf_map_delete_elem(fd, &key) == 0); - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT); key = 0; /* Check that map is empty. */ - assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 && errno == ENOENT); - assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 && errno == ENOENT); close(fd); @@ -166,15 +173,25 @@ static void test_hashmap_percpu(unsigned int task, void *data) /* Insert key=1 element. */ assert(!(expected_key_mask & key)); assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0); + + /* Lookup and delete elem key=1 and check value. */ + assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 && + bpf_percpu(value,0) == 100); + + for (i = 0; i < nr_cpus; i++) + bpf_percpu(value,i) = i + 100; + + /* Insert key=1 element which should not exist. */ + assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0); expected_key_mask |= key; /* BPF_NOEXIST means add new element if it doesn't exist. */ - assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 && /* key=1 already exists. */ errno == EEXIST); /* -1 is an invalid flag. */ - assert(bpf_map_update_elem(fd, &key, value, -1) == -1 && + assert(bpf_map_update_elem(fd, &key, value, -1) < 0 && errno == EINVAL); /* Check that key=1 can be found. Value could be 0 if the lookup @@ -186,10 +203,10 @@ static void test_hashmap_percpu(unsigned int task, void *data) key = 2; /* Check that key=2 is not found. */ - assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT); /* BPF_EXIST means update existing element. */ - assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 && /* key=2 is not there. */ errno == ENOENT); @@ -202,11 +219,11 @@ static void test_hashmap_percpu(unsigned int task, void *data) * inserted due to max_entries limit. */ key = 0; - assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 && errno == E2BIG); /* Check that key = 0 doesn't exist. */ - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT); /* Iterate over two elements. */ assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && @@ -237,13 +254,13 @@ static void test_hashmap_percpu(unsigned int task, void *data) assert(bpf_map_delete_elem(fd, &key) == 0); key = 2; assert(bpf_map_delete_elem(fd, &key) == 0); - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT); key = 0; /* Check that map is empty. */ - assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 && errno == ENOENT); - assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 && errno == ENOENT); close(fd); @@ -360,7 +377,7 @@ static void test_arraymap(unsigned int task, void *data) assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); value = 0; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == EEXIST); /* Check that key=1 can be found. */ @@ -374,11 +391,11 @@ static void test_arraymap(unsigned int task, void *data) * due to max_entries limit. */ key = 2; - assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 && errno == E2BIG); /* Check that key = 2 doesn't exist. */ - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT); /* Iterate over two elements. */ assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && @@ -387,12 +404,12 @@ static void test_arraymap(unsigned int task, void *data) next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && next_key == 1); - assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 && errno == ENOENT); /* Delete shouldn't succeed. */ key = 1; - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL); close(fd); } @@ -418,7 +435,7 @@ static void test_arraymap_percpu(unsigned int task, void *data) assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0); bpf_percpu(values, 0) = 0; - assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 && errno == EEXIST); /* Check that key=1 can be found. */ @@ -433,11 +450,11 @@ static void test_arraymap_percpu(unsigned int task, void *data) /* Check that key=2 cannot be inserted due to max_entries limit. */ key = 2; - assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 && errno == E2BIG); /* Check that key = 2 doesn't exist. */ - assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT); /* Iterate over two elements. */ assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && @@ -446,12 +463,12 @@ static void test_arraymap_percpu(unsigned int task, void *data) next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && next_key == 1); - assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 && errno == ENOENT); /* Delete shouldn't succeed. */ key = 1; - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL); close(fd); } @@ -555,7 +572,7 @@ static void test_queuemap(unsigned int task, void *data) assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); /* Check that element cannot be pushed due to max_entries limit */ - assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 && errno == E2BIG); /* Peek element */ @@ -571,12 +588,12 @@ static void test_queuemap(unsigned int task, void *data) val == vals[i]); /* Check that there are not elements left */ - assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 && errno == ENOENT); /* Check that non supported functions set errno to EINVAL */ - assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); - assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL); close(fd); } @@ -613,7 +630,7 @@ static void test_stackmap(unsigned int task, void *data) assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); /* Check that element cannot be pushed due to max_entries limit */ - assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 && errno == E2BIG); /* Peek element */ @@ -629,12 +646,12 @@ static void test_stackmap(unsigned int task, void *data) val == vals[i]); /* Check that there are not elements left */ - assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 && errno == ENOENT); /* Check that non supported functions set errno to EINVAL */ - assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); - assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL); close(fd); } @@ -835,7 +852,7 @@ static void test_sockmap(unsigned int tasks, void *data) } bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx"); - if (IS_ERR(bpf_map_rx)) { + if (!bpf_map_rx) { printf("Failed to load map rx from verdict prog\n"); goto out_sockmap; } @@ -847,7 +864,7 @@ static void test_sockmap(unsigned int tasks, void *data) } bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx"); - if (IS_ERR(bpf_map_tx)) { + if (!bpf_map_tx) { printf("Failed to load map tx from verdict prog\n"); goto out_sockmap; } @@ -859,7 +876,7 @@ static void test_sockmap(unsigned int tasks, void *data) } bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg"); - if (IS_ERR(bpf_map_msg)) { + if (!bpf_map_msg) { printf("Failed to load map msg from msg_verdict prog\n"); goto out_sockmap; } @@ -871,7 +888,7 @@ static void test_sockmap(unsigned int tasks, void *data) } bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break"); - if (IS_ERR(bpf_map_break)) { + if (!bpf_map_break) { printf("Failed to load map tx from verdict prog\n"); goto out_sockmap; } @@ -1153,7 +1170,7 @@ static void test_map_in_map(void) } map = bpf_object__find_map_by_name(obj, "mim_array"); - if (IS_ERR(map)) { + if (!map) { printf("Failed to load array of maps from test prog\n"); goto out_map_in_map; } @@ -1164,7 +1181,7 @@ static void test_map_in_map(void) } map = bpf_object__find_map_by_name(obj, "mim_hash"); - if (IS_ERR(map)) { + if (!map) { printf("Failed to load hash of maps from test prog\n"); goto out_map_in_map; } @@ -1177,7 +1194,7 @@ static void test_map_in_map(void) bpf_object__load(obj); map = bpf_object__find_map_by_name(obj, "mim_array"); - if (IS_ERR(map)) { + if (!map) { printf("Failed to load array of maps from test prog\n"); goto out_map_in_map; } @@ -1194,7 +1211,7 @@ static void test_map_in_map(void) } map = bpf_object__find_map_by_name(obj, "mim_hash"); - if (IS_ERR(map)) { + if (!map) { printf("Failed to load hash of maps from test prog\n"); goto out_map_in_map; } @@ -1246,7 +1263,7 @@ static void test_map_large(void) } key.c = -1; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == E2BIG); /* Iterate through all elements. */ @@ -1254,12 +1271,12 @@ static void test_map_large(void) key.c = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); - assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT); key.c = 0; assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0); key.a = 1; - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT); close(fd); } @@ -1391,7 +1408,7 @@ static void test_map_parallel(void) run_parallel(TASKS, test_update_delete, data); /* Check that key=0 is already there. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == EEXIST); /* Check that all elements were inserted. */ @@ -1399,7 +1416,7 @@ static void test_map_parallel(void) key = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); - assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT); /* Another check for all elements */ for (i = 0; i < MAP_SIZE; i++) { @@ -1415,8 +1432,8 @@ static void test_map_parallel(void) /* Nothing should be left. */ key = -1; - assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT); - assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT); } static void test_map_rdonly(void) @@ -1434,12 +1451,12 @@ static void test_map_rdonly(void) key = 1; value = 1234; /* Try to insert key=1 element. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 && errno == EPERM); /* Check that key=1 is not found. */ - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); - assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT); close(fd); } @@ -1462,8 +1479,8 @@ static void test_map_wronly_hash(void) assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); /* Check that reading elements and keys from the map is not allowed. */ - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM); - assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM); + assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM); close(fd); } @@ -1490,10 +1507,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type) assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0); /* Peek element should fail */ - assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM); + assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM); /* Pop element should fail */ - assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 && + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 && errno == EPERM); close(fd); @@ -1547,7 +1564,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size, value = &fd32; } err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY); - CHECK(err != -1 || errno != EINVAL, + CHECK(err >= 0 || errno != EINVAL, "reuseport array update unbound sk", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1576,7 +1593,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size, */ err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY); - CHECK(err != -1 || errno != EINVAL, + CHECK(err >= 0 || errno != EINVAL, "reuseport array update non-listening sk", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1606,31 +1623,31 @@ static void test_reuseport_array(void) map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, sizeof(__u32), sizeof(__u64), array_size, 0); - CHECK(map_fd == -1, "reuseport array create", + CHECK(map_fd < 0, "reuseport array create", "map_fd:%d, errno:%d\n", map_fd, errno); /* Test lookup/update/delete with invalid index */ err = bpf_map_delete_elem(map_fd, &bad_index); - CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries", + CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries", "err:%d errno:%d\n", err, errno); err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY); - CHECK(err != -1 || errno != E2BIG, + CHECK(err >= 0 || errno != E2BIG, "reuseport array update >=max_entries", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array update >=max_entries", "err:%d errno:%d\n", err, errno); /* Test lookup/delete non existence elem */ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array lookup not-exist elem", "err:%d errno:%d\n", err, errno); err = bpf_map_delete_elem(map_fd, &index3); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array del not-exist elem", "err:%d errno:%d\n", err, errno); @@ -1644,7 +1661,7 @@ static void test_reuseport_array(void) /* BPF_EXIST failure case */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_EXIST); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array update empty elem BPF_EXIST", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1653,7 +1670,7 @@ static void test_reuseport_array(void) /* BPF_NOEXIST success case */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_NOEXIST); - CHECK(err == -1, + CHECK(err < 0, "reuseport array update empty elem BPF_NOEXIST", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1662,7 +1679,7 @@ static void test_reuseport_array(void) /* BPF_EXIST success case. */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_EXIST); - CHECK(err == -1, + CHECK(err < 0, "reuseport array update same elem BPF_EXIST", "sock_type:%d err:%d errno:%d\n", type, err, errno); fds_idx = REUSEPORT_FD_IDX(err, fds_idx); @@ -1670,7 +1687,7 @@ static void test_reuseport_array(void) /* BPF_NOEXIST failure case */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_NOEXIST); - CHECK(err != -1 || errno != EEXIST, + CHECK(err >= 0 || errno != EEXIST, "reuseport array update non-empty elem BPF_NOEXIST", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1679,7 +1696,7 @@ static void test_reuseport_array(void) /* BPF_ANY case (always succeed) */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_ANY); - CHECK(err == -1, + CHECK(err < 0, "reuseport array update same sk with BPF_ANY", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1688,32 +1705,32 @@ static void test_reuseport_array(void) /* The same sk cannot be added to reuseport_array twice */ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY); - CHECK(err != -1 || errno != EBUSY, + CHECK(err >= 0 || errno != EBUSY, "reuseport array update same sk with same index", "sock_type:%d err:%d errno:%d\n", type, err, errno); err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY); - CHECK(err != -1 || errno != EBUSY, + CHECK(err >= 0 || errno != EBUSY, "reuseport array update same sk with different index", "sock_type:%d err:%d errno:%d\n", type, err, errno); /* Test delete elem */ err = bpf_map_delete_elem(map_fd, &index3); - CHECK(err == -1, "reuseport array delete sk", + CHECK(err < 0, "reuseport array delete sk", "sock_type:%d err:%d errno:%d\n", type, err, errno); /* Add it back with BPF_NOEXIST */ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST); - CHECK(err == -1, + CHECK(err < 0, "reuseport array re-add with BPF_NOEXIST after del", "sock_type:%d err:%d errno:%d\n", type, err, errno); /* Test cookie */ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie); - CHECK(err == -1 || sk_cookie != map_cookie, + CHECK(err < 0 || sk_cookie != map_cookie, "reuseport array lookup re-added sk", "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn", type, err, errno, sk_cookie, map_cookie); @@ -1722,7 +1739,7 @@ static void test_reuseport_array(void) for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++) close(grpa_fds64[f]); err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array lookup after close()", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1733,7 +1750,7 @@ static void test_reuseport_array(void) CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n", err, errno); err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST); - CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW", + CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW", "err:%d errno:%d\n", err, errno); close(fd64); @@ -1743,16 +1760,16 @@ static void test_reuseport_array(void) /* Test 32 bit fd */ map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, sizeof(__u32), sizeof(__u32), array_size, 0); - CHECK(map_fd == -1, "reuseport array create", + CHECK(map_fd < 0, "reuseport array create", "map_fd:%d, errno:%d\n", map_fd, errno); prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64, &sk_cookie, 1); fd = fd64; err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST); - CHECK(err == -1, "reuseport array update 32 bit fd", + CHECK(err < 0, "reuseport array update 32 bit fd", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie); - CHECK(err != -1 || errno != ENOSPC, + CHECK(err >= 0 || errno != ENOSPC, "reuseport array lookup 32 bit fd", "err:%d errno:%d\n", err, errno); close(fd); @@ -1798,6 +1815,8 @@ int main(void) { srand(time(NULL)); + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + map_flags = 0; run_all_tests(); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 6396932b97e2..6f103106a39b 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -737,6 +737,9 @@ int main(int argc, char **argv) if (err) return err; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + libbpf_set_print(libbpf_print_fn); srand(time(NULL)); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index dda52cb649dc..8ef7f334e715 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -249,16 +249,17 @@ extern int test__join_cgroup(const char *path); #define ASSERT_OK_PTR(ptr, name) ({ \ static int duration = 0; \ const void *___res = (ptr); \ - bool ___ok = !IS_ERR_OR_NULL(___res); \ - CHECK(!___ok, (name), \ - "unexpected error: %ld\n", PTR_ERR(___res)); \ + int ___err = libbpf_get_error(___res); \ + bool ___ok = ___err == 0; \ + CHECK(!___ok, (name), "unexpected error: %d\n", ___err); \ ___ok; \ }) #define ASSERT_ERR_PTR(ptr, name) ({ \ static int duration = 0; \ const void *___res = (ptr); \ - bool ___ok = IS_ERR(___res); \ + int ___err = libbpf_get_error(___res); \ + bool ___ok = ___err != 0; \ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ ___ok; \ }) diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 73da7fe8c152..4a39304cc5a6 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -82,6 +82,8 @@ int main(int argc, char **argv) cpu_set_t cpuset; __u32 key = 0; + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + CPU_ZERO(&cpuset); CPU_SET(0, &cpuset); pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); @@ -116,7 +118,7 @@ int main(int argc, char **argv) pb_opts.sample_cb = dummyfn; pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts); - if (IS_ERR(pb)) + if (!pb) goto err; pthread_create(&tid, NULL, poller_thread, pb); @@ -163,7 +165,6 @@ err: bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); close(cg_fd); cleanup_cgroup_environment(); - if (!IS_ERR_OR_NULL(pb)) - perf_buffer__free(pb); + perf_buffer__free(pb); return error; } diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh new file mode 100755 index 000000000000..1538373157e3 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -0,0 +1,204 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test topology: +# - - - - - - - - - - - - - - - - - - - - - - - - - +# | veth1 veth2 veth3 | ... init net +# - -| - - - - - - | - - - - - - | - - +# --------- --------- --------- +# | veth0 | | veth0 | | veth0 | ... +# --------- --------- --------- +# ns1 ns2 ns3 +# +# Test modules: +# XDP modes: generic, native, native + egress_prog +# +# Test cases: +# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive +# the redirects. +# ns1 -> gw: ns1, ns2, ns3, should receive the arp request +# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress +# interface should not receive the redirects. +# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects. +# IPv6: Testing none flag, all the pkts should be redirected back +# ping test: ns1 -> ns2 (block), echo requests will be redirect back +# egress_prog: +# all src mac should be egress interface's mac + +# netns numbers +NUM=3 +IFACES="" +DRV_MODE="xdpgeneric xdpdrv xdpegress" +PASS=0 +FAIL=0 + +test_pass() +{ + echo "Pass: $@" + PASS=$((PASS + 1)) +} + +test_fail() +{ + echo "fail: $@" + FAIL=$((FAIL + 1)) +} + +clean_up() +{ + for i in $(seq $NUM); do + ip link del veth$i 2> /dev/null + ip netns del ns$i 2> /dev/null + done +} + +# Kselftest framework requirement - SKIP code is 4. +check_env() +{ + ip link set dev lo xdpgeneric off &>/dev/null + if [ $? -ne 0 ];then + echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support" + exit 4 + fi + + which tcpdump &>/dev/null + if [ $? -ne 0 ];then + echo "selftests: [SKIP] Could not run test without tcpdump" + exit 4 + fi +} + +setup_ns() +{ + local mode=$1 + IFACES="" + + if [ "$mode" = "xdpegress" ]; then + mode="xdpdrv" + fi + + for i in $(seq $NUM); do + ip netns add ns$i + ip link add veth$i type veth peer name veth0 netns ns$i + ip link set veth$i up + ip -n ns$i link set veth0 up + + ip -n ns$i addr add 192.0.2.$i/24 dev veth0 + ip -n ns$i addr add 2001:db8::$i/64 dev veth0 + # Add a neigh entry for IPv4 ping test + ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 + ip -n ns$i link set veth0 $mode obj \ + xdp_dummy.o sec xdp_dummy &> /dev/null || \ + { test_fail "Unable to load dummy xdp" && exit 1; } + IFACES="$IFACES veth$i" + veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}') + done +} + +do_egress_tests() +{ + local mode=$1 + + # mac test + ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log & + ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log & + sleep 0.5 + ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null + sleep 0.5 + pkill -9 tcpdump + + # mac check + grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \ + test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2" + grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \ + test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3" +} + +do_ping_tests() +{ + local mode=$1 + + # ping6 test: echo request should be redirect back to itself, not others + ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 + + ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log & + ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log & + ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log & + sleep 0.5 + # ARP test + ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null + # IPv4 test + ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null + # IPv6 test + ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null + sleep 0.5 + pkill -9 tcpdump + + # All netns should receive the redirect arp requests + [ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \ + test_pass "$mode arp(F_BROADCAST) ns1-1" || \ + test_fail "$mode arp(F_BROADCAST) ns1-1" + [ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \ + test_pass "$mode arp(F_BROADCAST) ns1-2" || \ + test_fail "$mode arp(F_BROADCAST) ns1-2" + [ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \ + test_pass "$mode arp(F_BROADCAST) ns1-3" || \ + test_fail "$mode arp(F_BROADCAST) ns1-3" + + # ns1 should not receive the redirect echo request, others should + [ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \ + test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \ + test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" + [ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \ + test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \ + test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" + [ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \ + test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \ + test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" + + # ns1 should receive the echo request, ns2 should not + [ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \ + test_pass "$mode IPv6 (no flags) ns1-1" || \ + test_fail "$mode IPv6 (no flags) ns1-1" + [ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \ + test_pass "$mode IPv6 (no flags) ns1-2" || \ + test_fail "$mode IPv6 (no flags) ns1-2" +} + +do_tests() +{ + local mode=$1 + local drv_p + + case ${mode} in + xdpdrv) drv_p="-N";; + xdpegress) drv_p="-X";; + xdpgeneric) drv_p="-S";; + esac + + ./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log & + xdp_pid=$! + sleep 1 + + if [ "$mode" = "xdpegress" ]; then + do_egress_tests $mode + else + do_ping_tests $mode + fi + + kill $xdp_pid +} + +trap clean_up 0 2 3 6 9 + +check_env +rm -f xdp_redirect_*.log ns*.log mac_ns*.log + +for mode in ${DRV_MODE}; do + setup_ns $mode + do_tests $mode + clean_up +done + +echo "Summary: PASS $PASS, FAIL $FAIL" +[ $FAIL -eq 0 ] && exit 0 || exit 1 diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c new file mode 100644 index 000000000000..3696a8f32c23 --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <net/if.h> +#include <unistd.h> +#include <libgen.h> +#include <sys/resource.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> + +#include "bpf_util.h" +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#define MAX_IFACE_NUM 32 +#define MAX_INDEX_NUM 1024 + +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static int ifaces[MAX_IFACE_NUM] = {}; + +static void int_exit(int sig) +{ + __u32 prog_id = 0; + int i; + + for (i = 0; ifaces[i] > 0; i++) { + if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id) + bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags); + } + + exit(0); +} + +static int get_mac_addr(unsigned int ifindex, void *mac_addr) +{ + char ifname[IF_NAMESIZE]; + struct ifreq ifr; + int fd, ret = -1; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return ret; + + if (!if_indextoname(ifindex, ifname)) + goto err_out; + + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) + goto err_out; + + memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char)); + ret = 0; + +err_out: + close(fd); + return ret; +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n" + "OPTS:\n" + " -S use skb-mode\n" + " -N enforce native mode\n" + " -F force loading prog\n" + " -X load xdp program on egress\n", + prog); +} + +int main(int argc, char **argv) +{ + int prog_fd, group_all, mac_map; + struct bpf_program *ingress_prog, *egress_prog; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; + int i, ret, opt, egress_prog_fd = 0; + struct bpf_devmap_val devmap_val; + bool attach_egress_prog = false; + unsigned char mac_addr[6]; + char ifname[IF_NAMESIZE]; + struct bpf_object *obj; + unsigned int ifindex; + char filename[256]; + + while ((opt = getopt(argc, argv, "SNFX")) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + /* default, set below */ + break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; + case 'X': + attach_egress_prog = true; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) { + xdp_flags |= XDP_FLAGS_DRV_MODE; + } else if (attach_egress_prog) { + printf("Load xdp program on egress with SKB mode not supported yet\n"); + goto err_out; + } + + if (optind == argc) { + printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]); + goto err_out; + } + + printf("Get interfaces"); + for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) { + ifaces[i] = if_nametoindex(argv[optind + i]); + if (!ifaces[i]) + ifaces[i] = strtoul(argv[optind + i], NULL, 0); + if (!if_indextoname(ifaces[i], ifname)) { + perror("Invalid interface name or i"); + goto err_out; + } + if (ifaces[i] > MAX_INDEX_NUM) { + printf("Interface index to large\n"); + goto err_out; + } + printf(" %d", ifaces[i]); + } + printf("\n"); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + goto err_out; + + if (attach_egress_prog) + group_all = bpf_object__find_map_fd_by_name(obj, "map_egress"); + else + group_all = bpf_object__find_map_fd_by_name(obj, "map_all"); + mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map"); + + if (group_all < 0 || mac_map < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); + goto err_out; + } + + if (attach_egress_prog) { + /* Find ingress/egress prog for 2nd xdp prog */ + ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog"); + egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog"); + if (!ingress_prog || !egress_prog) { + printf("finding ingress/egress_prog in obj file failed\n"); + goto err_out; + } + prog_fd = bpf_program__fd(ingress_prog); + egress_prog_fd = bpf_program__fd(egress_prog); + if (prog_fd < 0 || egress_prog_fd < 0) { + printf("find egress_prog fd failed\n"); + goto err_out; + } + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + /* Init forward multicast groups and exclude group */ + for (i = 0; ifaces[i] > 0; i++) { + ifindex = ifaces[i]; + + if (attach_egress_prog) { + ret = get_mac_addr(ifindex, mac_addr); + if (ret < 0) { + printf("get interface %d mac failed\n", ifindex); + goto err_out; + } + ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0); + if (ret) { + perror("bpf_update_elem mac_map failed\n"); + goto err_out; + } + } + + /* Add all the interfaces to group all */ + devmap_val.ifindex = ifindex; + devmap_val.bpf_prog.fd = egress_prog_fd; + ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0); + if (ret) { + perror("bpf_map_update_elem"); + goto err_out; + } + + /* bind prog_fd to each interface */ + ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags); + if (ret) { + printf("Set xdp fd failed on %d\n", ifindex); + goto err_out; + } + } + + /* sleep some time for testing */ + sleep(999); + + return 0; + +err_out: + return 1; +} diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index 84cfcabea838..be9643ef6285 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -2,4 +2,5 @@ test_memcontrol test_core test_freezer -test_kmem
\ No newline at end of file +test_kmem +test_kill diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index f027d933595b..59e222460581 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -9,6 +9,7 @@ TEST_GEN_PROGS = test_memcontrol TEST_GEN_PROGS += test_kmem TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer +TEST_GEN_PROGS += test_kill include ../lib.mk @@ -16,3 +17,4 @@ $(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h $(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h $(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h $(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_kill: cgroup_util.c ../clone3/clone3_selftests.h ../pidfd/pidfd.h diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 027014662fb2..623cec04ad42 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -5,10 +5,12 @@ #include <errno.h> #include <fcntl.h> #include <linux/limits.h> +#include <poll.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/inotify.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/wait.h> @@ -252,6 +254,10 @@ int cg_killall(const char *cgroup) char buf[PAGE_SIZE]; char *ptr = buf; + /* If cgroup.kill exists use it. */ + if (!cg_write(cgroup, "cgroup.kill", "1")) + return 0; + if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) return -1; @@ -576,3 +582,48 @@ int clone_into_cgroup_run_wait(const char *cgroup) (void)clone_reap(pid, WEXITED); return 0; } + +int cg_prepare_for_wait(const char *cgroup) +{ + int fd, ret = -1; + + fd = inotify_init1(0); + if (fd == -1) + return fd; + + ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"), + IN_MODIFY); + if (ret == -1) { + close(fd); + fd = -1; + } + + return fd; +} + +int cg_wait_for(int fd) +{ + int ret = -1; + struct pollfd fds = { + .fd = fd, + .events = POLLIN, + }; + + while (true) { + ret = poll(&fds, 1, 10000); + + if (ret == -1) { + if (errno == EINTR) + continue; + + break; + } + + if (ret > 0 && fds.revents & POLLIN) { + ret = 0; + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 5a1305dd1f0b..82e59cdf16e7 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -54,3 +54,5 @@ extern pid_t clone_into_cgroup(int cgroup_fd); extern int clone_reap(pid_t pid, int options); extern int clone_into_cgroup_run_wait(const char *cgroup); extern int dirfd_open_opath(const char *dir); +extern int cg_prepare_for_wait(const char *cgroup); +extern int cg_wait_for(int fd); diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index 23d8fa4a3e4e..ff519029f6f4 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -7,9 +7,7 @@ #include <unistd.h> #include <stdio.h> #include <errno.h> -#include <poll.h> #include <stdlib.h> -#include <sys/inotify.h> #include <string.h> #include <sys/wait.h> @@ -55,61 +53,6 @@ static int cg_freeze_nowait(const char *cgroup, bool freeze) } /* - * Prepare for waiting on cgroup.events file. - */ -static int cg_prepare_for_wait(const char *cgroup) -{ - int fd, ret = -1; - - fd = inotify_init1(0); - if (fd == -1) { - debug("Error: inotify_init1() failed\n"); - return fd; - } - - ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"), - IN_MODIFY); - if (ret == -1) { - debug("Error: inotify_add_watch() failed\n"); - close(fd); - fd = -1; - } - - return fd; -} - -/* - * Wait for an event. If there are no events for 10 seconds, - * treat this an error. - */ -static int cg_wait_for(int fd) -{ - int ret = -1; - struct pollfd fds = { - .fd = fd, - .events = POLLIN, - }; - - while (true) { - ret = poll(&fds, 1, 10000); - - if (ret == -1) { - if (errno == EINTR) - continue; - debug("Error: poll() failed\n"); - break; - } - - if (ret > 0 && fds.revents & POLLIN) { - ret = 0; - break; - } - } - - return ret; -} - -/* * Attach a task to the given cgroup and wait for a cgroup frozen event. * All transient events (e.g. populated) are ignored. */ diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c new file mode 100644 index 000000000000..6153690319c9 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_kill.c @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <errno.h> +#include <linux/limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include "../kselftest.h" +#include "../pidfd/pidfd.h" +#include "cgroup_util.h" + +/* + * Kill the given cgroup and wait for the inotify signal. + * If there are no events in 10 seconds, treat this as an error. + * Then check that the cgroup is in the desired state. + */ +static int cg_kill_wait(const char *cgroup) +{ + int fd, ret = -1; + + fd = cg_prepare_for_wait(cgroup); + if (fd < 0) + return fd; + + ret = cg_write(cgroup, "cgroup.kill", "1"); + if (ret) + goto out; + + ret = cg_wait_for(fd); + if (ret) + goto out; + +out: + close(fd); + return ret; +} + +/* + * A simple process running in a sleep loop until being + * re-parented. + */ +static int child_fn(const char *cgroup, void *arg) +{ + int ppid = getppid(); + + while (getppid() == ppid) + usleep(1000); + + return getppid() == ppid; +} + +static int test_cgkill_simple(const char *root) +{ + pid_t pids[100]; + int ret = KSFT_FAIL; + char *cgroup = NULL; + int i; + + cgroup = cg_name(root, "cg_test_simple"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + for (i = 0; i < 100; i++) + pids[i] = cg_run_nowait(cgroup, child_fn, NULL); + + if (cg_wait_for_proc_count(cgroup, 100)) + goto cleanup; + + if (cg_read_strcmp(cgroup, "cgroup.events", "populated 1\n")) + goto cleanup; + + if (cg_kill_wait(cgroup)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + for (i = 0; i < 100; i++) + wait_for_pid(pids[i]); + + if (ret == KSFT_PASS && + cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n")) + ret = KSFT_FAIL; + + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * The test creates the following hierarchy: + * A + * / / \ \ + * B E I K + * /\ | + * C D F + * | + * G + * | + * H + * + * with a process in C, H and 3 processes in K. + * Then it tries to kill the whole tree. + */ +static int test_cgkill_tree(const char *root) +{ + pid_t pids[5]; + char *cgroup[10] = {0}; + int ret = KSFT_FAIL; + int i; + + cgroup[0] = cg_name(root, "cg_test_tree_A"); + if (!cgroup[0]) + goto cleanup; + + cgroup[1] = cg_name(cgroup[0], "B"); + if (!cgroup[1]) + goto cleanup; + + cgroup[2] = cg_name(cgroup[1], "C"); + if (!cgroup[2]) + goto cleanup; + + cgroup[3] = cg_name(cgroup[1], "D"); + if (!cgroup[3]) + goto cleanup; + + cgroup[4] = cg_name(cgroup[0], "E"); + if (!cgroup[4]) + goto cleanup; + + cgroup[5] = cg_name(cgroup[4], "F"); + if (!cgroup[5]) + goto cleanup; + + cgroup[6] = cg_name(cgroup[5], "G"); + if (!cgroup[6]) + goto cleanup; + + cgroup[7] = cg_name(cgroup[6], "H"); + if (!cgroup[7]) + goto cleanup; + + cgroup[8] = cg_name(cgroup[0], "I"); + if (!cgroup[8]) + goto cleanup; + + cgroup[9] = cg_name(cgroup[0], "K"); + if (!cgroup[9]) + goto cleanup; + + for (i = 0; i < 10; i++) + if (cg_create(cgroup[i])) + goto cleanup; + + pids[0] = cg_run_nowait(cgroup[2], child_fn, NULL); + pids[1] = cg_run_nowait(cgroup[7], child_fn, NULL); + pids[2] = cg_run_nowait(cgroup[9], child_fn, NULL); + pids[3] = cg_run_nowait(cgroup[9], child_fn, NULL); + pids[4] = cg_run_nowait(cgroup[9], child_fn, NULL); + + /* + * Wait until all child processes will enter + * corresponding cgroups. + */ + + if (cg_wait_for_proc_count(cgroup[2], 1) || + cg_wait_for_proc_count(cgroup[7], 1) || + cg_wait_for_proc_count(cgroup[9], 3)) + goto cleanup; + + /* + * Kill A and check that we get an empty notification. + */ + if (cg_kill_wait(cgroup[0])) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + for (i = 0; i < 5; i++) + wait_for_pid(pids[i]); + + if (ret == KSFT_PASS && + cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n")) + ret = KSFT_FAIL; + + for (i = 9; i >= 0 && cgroup[i]; i--) { + cg_destroy(cgroup[i]); + free(cgroup[i]); + } + + return ret; +} + +static int forkbomb_fn(const char *cgroup, void *arg) +{ + int ppid; + + fork(); + fork(); + + ppid = getppid(); + + while (getppid() == ppid) + usleep(1000); + + return getppid() == ppid; +} + +/* + * The test runs a fork bomb in a cgroup and tries to kill it. + */ +static int test_cgkill_forkbomb(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + pid_t pid = -ESRCH; + + cgroup = cg_name(root, "cg_forkbomb_test"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + pid = cg_run_nowait(cgroup, forkbomb_fn, NULL); + if (pid < 0) + goto cleanup; + + usleep(100000); + + if (cg_kill_wait(cgroup)) + goto cleanup; + + if (cg_wait_for_proc_count(cgroup, 0)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (pid > 0) + wait_for_pid(pid); + + if (ret == KSFT_PASS && + cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n")) + ret = KSFT_FAIL; + + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +#define T(x) { x, #x } +struct cgkill_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_cgkill_simple), + T(test_cgkill_tree), + T(test_cgkill_forkbomb), +}; +#undef T + +int main(int argc, char *argv[]) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index 4029833f7e27..160891dcb4bc 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -109,6 +109,9 @@ router_destroy() __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down } setup_prepare() diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 42d44e27802c..190c1b6b5365 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -111,6 +111,9 @@ router_destroy() __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down } setup_prepare() diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh index 65f43a7ce9c9..1e9a4aff76a2 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh @@ -7,6 +7,8 @@ PORT_NUM_NETIFS=0 +declare -a unsplit + port_setup_prepare() { : @@ -20,12 +22,12 @@ port_cleanup() devlink port unsplit $port check_err $? "Did not unsplit $netdev" done + unsplit=() } split_all_ports() { local should_fail=$1; shift - local -a unsplit # Loop over the splittable netdevs and create tuples of netdev along # with its width. For example: diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh index 5cbff8038f84..28a570006d4d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh @@ -93,7 +93,9 @@ switch_destroy() lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null lldpad_app_wait_del + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh index 27de3d9ed08e..f4493ef9cca1 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh @@ -29,37 +29,38 @@ cleanup() get_prio_pg() { - __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | - grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2- + # Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number + # of buffer that priority X is mapped to. + dcb -j buffer show dev $swp | + jq -r '[.prio_buffer | .[] | tostring + " "] | add' } get_prio_pfc() { - __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | - grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2- + # Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes + # whether priority X has PFC enabled (the value is 1) or disabled (0). + dcb -j pfc show dev $swp | + jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add' } get_prio_tc() { - __mlnx_qos -i $swp | sed -n '/^tc/,$p' | - awk '/^tc/ { TC = $2 } - /priority:/ { PRIO[$2]=TC } - END { - for (i in PRIO) - printf("%d ", PRIO[i]) - }' + # Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number + # of TC that priority X is mapped to. + dcb -j ets show dev $swp | + jq -r '[.prio_tc | .[] | tostring + " "] | add' } get_buf_size() { local idx=$1; shift - __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1)) + dcb -j buffer show dev $swp | jq ".buffer_size[$idx]" } get_tot_size() { - __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//' + dcb -j buffer show dev $swp | jq '.total_size' } check_prio_pg() @@ -121,18 +122,18 @@ test_dcb_ets() { RET=0 - __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null + dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7 check_prio_pg "0 2 4 6 1 3 5 7 " check_prio_tc "0 2 4 6 1 3 5 7 " check_prio_pfc "0 0 0 0 0 0 0 0 " - __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + dcb ets set dev $swp prio-tc all:0 check_prio_pg "0 0 0 0 0 0 0 0 " check_prio_tc "0 0 0 0 0 0 0 0 " - __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null + dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null check_fail $? "prio2buffer accepted in DCB mode" log_test "Configuring headroom through ETS" @@ -174,7 +175,7 @@ test_pfc() { RET=0 - __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null + dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3 local buf0size=$(get_buf_size 0) local buf1size=$(get_buf_size 1) @@ -193,7 +194,7 @@ test_pfc() RET=0 - __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null + dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0 check_prio_pg "0 0 0 0 0 1 2 3 " check_prio_pfc "0 0 0 0 0 1 1 1 " @@ -210,7 +211,7 @@ test_pfc() RET=0 - __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null + dcb pfc set dev $swp delay 1000 check_buf_size 0 "== $buf0size" check_buf_size 1 "> $buf1size" @@ -221,8 +222,8 @@ test_pfc() RET=0 - __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null - __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + dcb pfc set dev $swp prio-pfc all:off delay 0 + dcb ets set dev $swp prio-tc all:0 check_prio_pg "0 0 0 0 0 0 0 0 " check_prio_tc "0 0 0 0 0 0 0 0 " @@ -242,13 +243,13 @@ test_tc_priomap() { RET=0 - __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null + dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 check_prio_pg "0 1 2 3 4 5 6 7 " tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M check_prio_pg "0 0 0 0 0 0 0 0 " - __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null + dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 check_prio_pg "1 3 5 7 0 2 4 6 " tc qdisc delete dev $swp root @@ -256,9 +257,9 @@ test_tc_priomap() # Clean up. tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M - __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp prio-buffer all:0 tc qdisc delete dev $swp root - __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + dcb ets set dev $swp prio-tc all:0 log_test "TC: priomap" } @@ -270,12 +271,12 @@ test_tc_sizes() RET=0 - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null check_fail $? "buffer_size should fail before qdisc is added" tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size check_err $? "buffer_size should pass after qdisc is added" check_buf_size 0 "== $size" "set size: " @@ -283,26 +284,26 @@ test_tc_sizes() check_buf_size 0 "== $size" "set MTU: " mtu_restore $swp - __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 # After replacing the qdisc for the same kind, buffer_size still has to # work. tc qdisc replace dev $swp root handle 1: bfifo limit 1M - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size check_buf_size 0 "== $size" "post replace, set size: " - __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 # Likewise after replacing for a different kind. tc qdisc replace dev $swp root handle 2: prio bands 8 - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size check_buf_size 0 "== $size" "post replace different kind, set size: " tc qdisc delete dev $swp root - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null check_fail $? "buffer_size should fail after qdisc is deleted" log_test "TC: buffer size" @@ -363,10 +364,10 @@ test_tc_int_buf() tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M test_int_buf "TC: " - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size test_int_buf "TC+buffsize: " - __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 tc qdisc delete dev $swp root } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index 0bf76f13c030..faa51012cdac 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -82,17 +82,3 @@ bail_on_lldpad() fi fi } - -__mlnx_qos() -{ - local err - - mlnx_qos "$@" 2>/dev/null - err=$? - - if ((err)); then - echo "Error ($err) in mlnx_qos $@" >/dev/stderr - fi - - return $err -} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh index 5c7700212f75..5d5622fc2758 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -171,7 +171,7 @@ switch_create() # assignment. tc qdisc replace dev $swp1 root handle 1: \ ets bands 8 strict 8 priomap 7 6 - __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp1 prio-buffer all:0 1:1 # $swp2 # ----- @@ -209,8 +209,8 @@ switch_create() # the lossless prio into a buffer of its own. Don't bother with buffer # sizes though, there is not going to be any pressure in the "backward" # direction. - __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp3 prio-buffer all:0 1:1 + dcb pfc set dev $swp3 prio-pfc all:off 1:on # $swp4 # ----- @@ -226,11 +226,11 @@ switch_create() # Configure qdisc so that we can hand-tune headroom. tc qdisc replace dev $swp4 root handle 1: \ ets bands 8 strict 8 priomap 7 6 - __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp4 prio-buffer all:0 1:1 + dcb pfc set dev $swp4 prio-pfc all:off 1:on # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which # is (-2*MTU) about 80K of delay provision. - __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp4 buffer-size all:0 1:$_100KB # bridges # ------- @@ -273,9 +273,9 @@ switch_destroy() # $swp4 # ----- - __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp4 buffer-size all:0 + dcb pfc set dev $swp4 prio-pfc all:off + dcb buffer set dev $swp4 prio-buffer all:0 tc qdisc del dev $swp4 root devlink_tc_bind_pool_th_restore $swp4 1 ingress @@ -288,8 +288,8 @@ switch_destroy() # $swp3 # ----- - __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + dcb pfc set dev $swp3 prio-pfc all:off + dcb buffer set dev $swp3 prio-buffer all:0 tc qdisc del dev $swp3 root devlink_tc_bind_pool_th_restore $swp3 1 egress @@ -315,7 +315,7 @@ switch_destroy() # $swp1 # ----- - __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp1 prio-buffer all:0 tc qdisc del dev $swp1 root devlink_tc_bind_pool_th_restore $swp1 1 ingress diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh index e93878d42596..683759d29199 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh @@ -68,7 +68,7 @@ wait_for_routes() local t0=$1; shift local route_count=$1; shift - local t1=$(ip route | grep -o 'offload' | wc -l) + local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l) local delta=$((t1 - t0)) echo $delta [[ $delta -ge $route_count ]] diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh index 093bed088ad0..373d5f2a846e 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -234,15 +234,15 @@ __tc_sample_rate_test() psample_capture_start - ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \ -B $dip -t udp dp=52768,sp=42768 -q psample_capture_stop pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) - pct=$((100 * (pkts - 100) / 100)) + pct=$((100 * (pkts - 10000) / 10000)) (( -25 <= pct && pct <= 25)) - check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" log_test "tc sample rate ($desc)" @@ -587,15 +587,15 @@ __tc_sample_acl_rate_test() psample_capture_start - ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q psample_capture_stop pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) - pct=$((100 * (pkts - 100) / 100)) + pct=$((100 * (pkts - 10000) / 10000)) (( -25 <= pct && pct <= 25)) - check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" # Setup a filter that should not match any packet and make sure packets # are not sampled. diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 40909c254365..9de1d123f4f5 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -5,12 +5,13 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS="fw_flash_test params_test regions_test reload_test \ netns_reload_test resource_test dev_info_test \ - empty_reporter_test dummy_reporter_test" + empty_reporter_test dummy_reporter_test rate_test" NUM_NETIFS=0 source $lib_dir/lib.sh BUS_ADDR=10 PORT_COUNT=4 +VF_COUNT=4 DEV_NAME=netdevsim$BUS_ADDR SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/ @@ -507,6 +508,170 @@ dummy_reporter_test() log_test "dummy reporter test" } +rate_leafs_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))' +} + +rate_nodes_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))' +} + +rate_attr_set() +{ + local handle=$1 + local name=$2 + local value=$3 + local units=$4 + + devlink port function rate set $handle $name $value$units +} + +rate_attr_get() +{ + local handle=$1 + local name=$2 + + cmd_jq "devlink port function rate show $handle -j" '.[][].'$name +} + +rate_attr_tx_rate_check() +{ + local handle=$1 + local name=$2 + local rate=$3 + local debug_file=$4 + + rate_attr_set $handle $name $rate mbit + check_err $? "Failed to set $name value" + + local debug_value=$(cat $debug_file) + check_err $? "Failed to read $name value from debugfs" + [ "$debug_value" == "$rate" ] + check_err $? "Unexpected $name debug value $debug_value != $rate" + + local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 )) + check_err $? "Failed to get $name attr value" + [ "$api_value" == "$rate" ] + check_err $? "Unexpected $name attr value $api_value != $rate" +} + +rate_attr_parent_check() +{ + local handle=$1 + local parent=$2 + local debug_file=$3 + + rate_attr_set $handle parent $parent + check_err $? "Failed to set parent" + + debug_value=$(cat $debug_file) + check_err $? "Failed to get parent debugfs value" + [ "$debug_value" == "$parent" ] + check_err $? "Unexpected parent debug value $debug_value != $parent" + + api_value=$(rate_attr_get $r_obj parent) + check_err $? "Failed to get parent attr value" + [ "$api_value" == "$parent" ] + check_err $? "Unexpected parent attr value $api_value != $parent" +} + +rate_node_add() +{ + local handle=$1 + + devlink port function rate add $handle +} + +rate_node_del() +{ + local handle=$1 + + devlink port function rate del $handle +} + +rate_test() +{ + RET=0 + + echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs + devlink dev eswitch set $DL_HANDLE mode switchdev + local leafs=`rate_leafs_get $DL_HANDLE` + local num_leafs=`echo $leafs | wc -w` + [ "$num_leafs" == "$VF_COUNT" ] + check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs" + + rate=10 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_share $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share + rate=$(($rate+10)) + done + + rate=100 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_max $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max + rate=$(($rate+100)) + done + + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 1 ] + check_err $? "Expected 1 rate node in output but got $num_nodes" + + local node_tx_share=10 + rate_attr_tx_rate_check $node1 tx_share $node_tx_share \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share + + local node_tx_max=100 + rate_attr_tx_rate_check $node1 tx_max $node_tx_max \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max + + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 0 ] + check_err $? "Expected 0 rate node but got $num_nodes" + + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + rate_attr_parent_check $r_obj $node1_name \ + $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent + + local node2_name='group2' + local node2="$DL_HANDLE/$node2_name" + rate_node_add "$node2" + check_err $? "Failed to add node $node2" + + rate_attr_parent_check $node2 $node1_name \ + $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent + rate_node_del "$node2" + check_err $? "Failed to delete node $node2" + rate_attr_set "$r_obj" noparent + check_err $? "Failed to unset $r_obj parent node" + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + + log_test "rate test" +} + setup_prepare() { modprobe netdevsim diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh index da49ad2761b5..109900c817be 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -24,13 +24,15 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ SLEEP_TIME=1 NETDEV="" NUM_NETIFS=0 source $lib_dir/lib.sh + +DEVLINK_DEV= source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} require_command udevadm @@ -163,6 +165,16 @@ trap_stats_test() devlink_trap_action_set $trap_name "drop" devlink_trap_stats_idle_test $trap_name check_err $? "Stats of trap $trap_name not idle when action is drop" + + echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get + devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null + check_fail $? "Managed to read trap (hard dropped) statistics when should not" + echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get + devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null + check_err $? "Did not manage to read trap (hard dropped) statistics when should" + + devlink_trap_drop_stats_idle_test $trap_name + check_fail $? "Drop stats of trap $trap_name idle when should not" else devlink_trap_stats_idle_test $trap_name check_fail $? "Stats of non-drop trap $trap_name idle when should not" diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh index 251f228ce63e..fc794cd30389 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -33,13 +33,15 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ NUM_NETIFS=0 source $lib_dir/lib.sh -source $lib_dir/devlink_lib.sh source $lib_dir/fib_offload_lib.sh +DEVLINK_DEV= +source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} + ipv4_identical_routes() { fib_ipv4_identical_routes_test "testns1" diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh index ba75c81cda91..e8e0dc088d6a 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -44,12 +44,14 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh + +DEVLINK_DEV= source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} nexthop_check() { diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh index ee10b1a8933c..e689ff7a0b12 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/psample.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh @@ -14,13 +14,15 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/ CAPTURE_FILE=$(mktemp) NUM_NETIFS=0 source $lib_dir/lib.sh + +DEVLINK_DEV= source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} # Available at https://github.com/Mellanox/libpsample require_command psample diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 0efcd494daab..0e78b49d0f2f 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -6,3 +6,5 @@ futex_wait_private_mapped_file futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock +futex_wait +futex_requeue diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 23207829ec75..bd1fec59e010 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -INCLUDES := -I../include -I../../ +INCLUDES := -I../include -I../../ -I../../../../../usr/include/ \ + -I$(KBUILD_OUTPUT)/kselftest/usr/include CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) LDLIBS := -lpthread -lrt @@ -14,7 +15,9 @@ TEST_GEN_FILES := \ futex_requeue_pi_signal_restart \ futex_requeue_pi_mismatched_ops \ futex_wait_uninitialized_heap \ - futex_wait_private_mapped_file + futex_wait_private_mapped_file \ + futex_wait \ + futex_requeue TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c new file mode 100644 index 000000000000..51485be6eb2f --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_requeue.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright Collabora Ltd., 2021 + * + * futex cmp requeue test by André Almeida <andrealmeid@collabora.com> + */ + +#include <pthread.h> +#include <limits.h> +#include "logging.h" +#include "futextest.h" + +#define TEST_NAME "futex-requeue" +#define timeout_ns 30000000 +#define WAKE_WAIT_US 10000 + +volatile futex_t *f1; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *waiterfn(void *arg) +{ + struct timespec to; + + to.tv_sec = 0; + to.tv_nsec = timeout_ns; + + if (futex_wait(f1, *f1, &to, 0)) + printf("waiter failed errno %d\n", errno); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + pthread_t waiter[10]; + int res, ret = RET_PASS; + int c, i; + volatile futex_t _f1 = 0; + volatile futex_t f2 = 0; + + f1 = &_f1; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(2); + ksft_print_msg("%s: Test futex_requeue\n", + basename(argv[0])); + + /* + * Requeue a waiter from f1 to f2, and wake f2. + */ + if (pthread_create(&waiter[0], NULL, waiterfn, NULL)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + info("Requeuing 1 futex from f1 to f2\n"); + res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0); + if (res != 1) { + ksft_test_result_fail("futex_requeue simple returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } + + + info("Waking 1 futex at f2\n"); + res = futex_wake(&f2, 1, 0); + if (res != 1) { + ksft_test_result_fail("futex_requeue simple returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_requeue simple succeeds\n"); + } + + + /* + * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7. + * At futex_wake, wake INT_MAX (should be exactly 7). + */ + for (i = 0; i < 10; i++) { + if (pthread_create(&waiter[i], NULL, waiterfn, NULL)) + error("pthread_create failed\n", errno); + } + + usleep(WAKE_WAIT_US); + + info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n"); + res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0); + if (res != 10) { + ksft_test_result_fail("futex_requeue many returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } + + info("Waking INT_MAX futexes at f2\n"); + res = futex_wake(&f2, INT_MAX, 0); + if (res != 7) { + ksft_test_result_fail("futex_requeue many returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_requeue many succeeds\n"); + } + + ksft_print_cnts(); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c new file mode 100644 index 000000000000..685140d9b93d --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright Collabora Ltd., 2021 + * + * futex cmp requeue test by André Almeida <andrealmeid@collabora.com> + */ + +#include <pthread.h> +#include <sys/shm.h> +#include <sys/mman.h> +#include <fcntl.h> +#include "logging.h" +#include "futextest.h" + +#define TEST_NAME "futex-wait" +#define timeout_ns 30000000 +#define WAKE_WAIT_US 10000 +#define SHM_PATH "futex_shm_file" + +void *futex; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +static void *waiterfn(void *arg) +{ + struct timespec to; + unsigned int flags = 0; + + if (arg) + flags = *((unsigned int *) arg); + + to.tv_sec = 0; + to.tv_nsec = timeout_ns; + + if (futex_wait(futex, 0, &to, flags)) + printf("waiter failed errno %d\n", errno); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + int res, ret = RET_PASS, fd, c, shm_id; + u_int32_t f_private = 0, *shared_data; + unsigned int flags = FUTEX_PRIVATE_FLAG; + pthread_t waiter; + void *shm; + + futex = &f_private; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(3); + ksft_print_msg("%s: Test futex_wait\n", basename(argv[0])); + + /* Testing a private futex */ + info("Calling private futex_wait on futex: %p\n", futex); + if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + info("Calling private futex_wake on futex: %p\n", futex); + res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG); + if (res != 1) { + ksft_test_result_fail("futex_wake private returned: %d %s\n", + errno, strerror(errno)); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_wake private succeeds\n"); + } + + /* Testing an anon page shared memory */ + shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); + if (shm_id < 0) { + perror("shmget"); + exit(1); + } + + shared_data = shmat(shm_id, NULL, 0); + + *shared_data = 0; + futex = shared_data; + + info("Calling shared (page anon) futex_wait on futex: %p\n", futex); + if (pthread_create(&waiter, NULL, waiterfn, NULL)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + info("Calling shared (page anon) futex_wake on futex: %p\n", futex); + res = futex_wake(futex, 1, 0); + if (res != 1) { + ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n", + errno, strerror(errno)); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_wake shared (page anon) succeeds\n"); + } + + + /* Testing a file backed shared memory */ + fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (fd < 0) { + perror("open"); + exit(1); + } + + if (ftruncate(fd, sizeof(f_private))) { + perror("ftruncate"); + exit(1); + } + + shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (shm == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + memcpy(shm, &f_private, sizeof(f_private)); + + futex = shm; + + info("Calling shared (file backed) futex_wait on futex: %p\n", futex); + if (pthread_create(&waiter, NULL, waiterfn, NULL)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + info("Calling shared (file backed) futex_wake on futex: %p\n", futex); + res = futex_wake(shm, 1, 0); + if (res != 1) { + ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n", + errno, strerror(errno)); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_wake shared (file backed) succeeds\n"); + } + + /* Freeing resources */ + shmdt(shared_data); + munmap(shm, sizeof(f_private)); + remove(SHM_PATH); + close(fd); + + ksft_print_cnts(); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index ee55e6d389a3..1f8f6daaf1e7 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -11,21 +11,18 @@ * * HISTORY * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> + * 2021-Apr-26: More test cases by André Almeida <andrealmeid@collabora.com> * *****************************************************************************/ -#include <errno.h> -#include <getopt.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <time.h> +#include <pthread.h> #include "futextest.h" #include "logging.h" #define TEST_NAME "futex-wait-timeout" static long timeout_ns = 100000; /* 100us default timeout */ +static futex_t futex_pi; void usage(char *prog) { @@ -37,11 +34,67 @@ void usage(char *prog) VQUIET, VCRITICAL, VINFO); } +/* + * Get a PI lock and hold it forever, so the main thread lock_pi will block + * and we can test the timeout + */ +void *get_pi_lock(void *arg) +{ + int ret; + volatile futex_t lock = 0; + + ret = futex_lock_pi(&futex_pi, NULL, 0, 0); + if (ret != 0) + error("futex_lock_pi failed\n", ret); + + /* Blocks forever */ + ret = futex_wait(&lock, 0, NULL, 0); + error("futex_wait failed\n", ret); + + return NULL; +} + +/* + * Check if the function returned the expected error + */ +static void test_timeout(int res, int *ret, char *test_name, int err) +{ + if (!res || errno != err) { + ksft_test_result_fail("%s returned %d\n", test_name, + res < 0 ? errno : res); + *ret = RET_FAIL; + } else { + ksft_test_result_pass("%s succeeds\n", test_name); + } +} + +/* + * Calculate absolute timeout and correct overflow + */ +static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to, + long timeout_ns) +{ + if (clock_gettime(clockid, to)) { + error("clock_gettime failed\n", errno); + return errno; + } + + to->tv_nsec += timeout_ns; + + if (to->tv_nsec >= 1000000000) { + to->tv_sec++; + to->tv_nsec -= 1000000000; + } + + return 0; +} + int main(int argc, char *argv[]) { futex_t f1 = FUTEX_INITIALIZER; - struct timespec to; int res, ret = RET_PASS; + struct timespec to; + pthread_t thread; int c; while ((c = getopt(argc, argv, "cht:v:")) != -1) { @@ -65,22 +118,63 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(7); ksft_print_msg("%s: Block on a futex and wait for timeout\n", basename(argv[0])); ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); - /* initialize timeout */ + pthread_create(&thread, NULL, get_pi_lock, NULL); + + /* initialize relative timeout */ to.tv_sec = 0; to.tv_nsec = timeout_ns; - info("Calling futex_wait on f1: %u @ %p\n", f1, &f1); - res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG); - if (!res || errno != ETIMEDOUT) { - fail("futex_wait returned %d\n", ret < 0 ? errno : ret); - ret = RET_FAIL; - } + res = futex_wait(&f1, f1, &to, 0); + test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT); + + /* FUTEX_WAIT_BITSET with CLOCK_REALTIME */ + if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) + return RET_FAIL; + res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME); + test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT); + + /* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */ + if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) + return RET_FAIL; + res = futex_wait_bitset(&f1, f1, &to, 1, 0); + test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT); + + /* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */ + if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) + return RET_FAIL; + res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME); + test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT); + + /* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */ + if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) + return RET_FAIL; + res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0); + test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT); + + /* + * FUTEX_LOCK_PI with CLOCK_REALTIME + * Due to historical reasons, FUTEX_LOCK_PI supports only realtime + * clock, but requires the caller to not set CLOCK_REALTIME flag. + * + * If you call FUTEX_LOCK_PI with a monotonic clock, it'll be + * interpreted as a realtime clock, and (unless you mess your machine's + * time or your time machine) the monotonic clock value is always + * smaller than realtime and the syscall will timeout immediately. + */ + if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) + return RET_FAIL; + res = futex_lock_pi(&futex_pi, &to, 0, 0); + test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT); + + /* Test operations that don't support FUTEX_CLOCK_REALTIME */ + res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME); + test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS); - print_result(TEST_NAME, ret); + ksft_print_cnts(); return ret; } diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 1acb6ace1680..11a9d62290f5 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -73,3 +73,9 @@ echo echo ./futex_wait_uninitialized_heap $COLOR ./futex_wait_private_mapped_file $COLOR + +echo +./futex_wait $COLOR + +echo +./futex_requeue $COLOR diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 524c857a049c..06a351b4f93b 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/debug-exceptions /aarch64/get-reg-list -/aarch64/get-reg-list-sve /aarch64/vgic_init /s390x/memop /s390x/resets @@ -8,12 +8,15 @@ /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test +/x86_64/emulator_error_test /x86_64/get_cpuid_test /x86_64/get_msr_index_features /x86_64/kvm_pv_test /x86_64/hyperv_clock /x86_64/hyperv_cpuid +/x86_64/hyperv_features /x86_64/mmio_warning_test +/x86_64/mmu_role_test /x86_64/platform_info_test /x86_64/set_boot_cpu_id /x86_64/set_sregs_test @@ -29,6 +32,7 @@ /x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test +/x86_64/vmx_nested_tsc_scaling_test /x86_64/xapic_ipi_test /x86_64/xen_shinfo_test /x86_64/xen_vmcall_test @@ -44,3 +48,4 @@ /memslot_perf_test /set_memory_region_test /steal_time +/kvm_binary_stats_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index daaee1888b12..b853be2ae3c6 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -34,18 +34,21 @@ ifeq ($(ARCH),s390) endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c -LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S -LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c +LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S +LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test +TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test +TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test @@ -60,6 +63,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs @@ -77,9 +81,10 @@ TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += memslot_perf_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time +TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test +TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list -TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test @@ -88,6 +93,7 @@ TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += kvm_page_table_test TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time +TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test TEST_GEN_PROGS_s390x = s390x/memop TEST_GEN_PROGS_s390x += s390x/resets @@ -97,6 +103,7 @@ TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS_s390x += kvm_page_table_test TEST_GEN_PROGS_s390x += set_memory_region_test +TEST_GEN_PROGS_s390x += kvm_binary_stats_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c new file mode 100644 index 000000000000..e5e6c92b60da --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +#define VCPU_ID 0 + +#define MDSCR_KDE (1 << 13) +#define MDSCR_MDE (1 << 15) +#define MDSCR_SS (1 << 0) + +#define DBGBCR_LEN8 (0xff << 5) +#define DBGBCR_EXEC (0x0 << 3) +#define DBGBCR_EL1 (0x1 << 1) +#define DBGBCR_E (0x1 << 0) + +#define DBGWCR_LEN8 (0xff << 5) +#define DBGWCR_RD (0x1 << 3) +#define DBGWCR_WR (0x2 << 3) +#define DBGWCR_EL1 (0x1 << 1) +#define DBGWCR_E (0x1 << 0) + +#define SPSR_D (1 << 9) +#define SPSR_SS (1 << 21) + +extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start; +static volatile uint64_t sw_bp_addr, hw_bp_addr; +static volatile uint64_t wp_addr, wp_data_addr; +static volatile uint64_t svc_addr; +static volatile uint64_t ss_addr[4], ss_idx; +#define PC(v) ((uint64_t)&(v)) + +static void reset_debug_state(void) +{ + asm volatile("msr daifset, #8"); + + write_sysreg(osdlr_el1, 0); + write_sysreg(oslar_el1, 0); + isb(); + + write_sysreg(mdscr_el1, 0); + /* This test only uses the first bp and wp slot. */ + write_sysreg(dbgbvr0_el1, 0); + write_sysreg(dbgbcr0_el1, 0); + write_sysreg(dbgwcr0_el1, 0); + write_sysreg(dbgwvr0_el1, 0); + isb(); +} + +static void install_wp(uint64_t addr) +{ + uint32_t wcr; + uint32_t mdscr; + + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; + write_sysreg(dbgwcr0_el1, wcr); + write_sysreg(dbgwvr0_el1, addr); + isb(); + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; + write_sysreg(mdscr_el1, mdscr); + isb(); +} + +static void install_hw_bp(uint64_t addr) +{ + uint32_t bcr; + uint32_t mdscr; + + bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; + write_sysreg(dbgbcr0_el1, bcr); + write_sysreg(dbgbvr0_el1, addr); + isb(); + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; + write_sysreg(mdscr_el1, mdscr); + isb(); +} + +static void install_ss(void) +{ + uint32_t mdscr; + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS; + write_sysreg(mdscr_el1, mdscr); + isb(); +} + +static volatile char write_data; + +static void guest_code(void) +{ + GUEST_SYNC(0); + + /* Software-breakpoint */ + asm volatile("sw_bp: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); + + GUEST_SYNC(1); + + /* Hardware-breakpoint */ + reset_debug_state(); + install_hw_bp(PC(hw_bp)); + asm volatile("hw_bp: nop"); + GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp)); + + GUEST_SYNC(2); + + /* Hardware-breakpoint + svc */ + reset_debug_state(); + install_hw_bp(PC(bp_svc)); + asm volatile("bp_svc: svc #0"); + GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc)); + GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4); + + GUEST_SYNC(3); + + /* Hardware-breakpoint + software-breakpoint */ + reset_debug_state(); + install_hw_bp(PC(bp_brk)); + asm volatile("bp_brk: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk)); + GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk)); + + GUEST_SYNC(4); + + /* Watchpoint */ + reset_debug_state(); + install_wp(PC(write_data)); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); + + GUEST_SYNC(5); + + /* Single-step */ + reset_debug_state(); + install_ss(); + ss_idx = 0; + asm volatile("ss_start:\n" + "mrs x0, esr_el1\n" + "add x0, x0, #1\n" + "msr daifset, #8\n" + : : : "x0"); + GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start)); + GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); + GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); + + GUEST_DONE(); +} + +static void guest_sw_bp_handler(struct ex_regs *regs) +{ + sw_bp_addr = regs->pc; + regs->pc += 4; +} + +static void guest_hw_bp_handler(struct ex_regs *regs) +{ + hw_bp_addr = regs->pc; + regs->pstate |= SPSR_D; +} + +static void guest_wp_handler(struct ex_regs *regs) +{ + wp_data_addr = read_sysreg(far_el1); + wp_addr = regs->pc; + regs->pstate |= SPSR_D; +} + +static void guest_ss_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(ss_idx < 4, ss_idx); + ss_addr[ss_idx++] = regs->pc; + regs->pstate |= SPSR_SS; +} + +static void guest_svc_handler(struct ex_regs *regs) +{ + svc_addr = regs->pc; +} + +static int debug_version(struct kvm_vm *vm) +{ + uint64_t id_aa64dfr0; + + get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0); + return id_aa64dfr0 & 0xf; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct ucall uc; + int stage; + + vm = vm_create_default(VCPU_ID, 0, guest_code); + ucall_init(vm, NULL); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + if (debug_version(vm) < 6) { + print_skip("Armv8 debug architecture not supported."); + kvm_vm_free(vm); + exit(KSFT_SKIP); + } + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_BRK_INS, guest_sw_bp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_WP_CURRENT, guest_wp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_SSTEP_CURRENT, guest_ss_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_SVC64, guest_svc_handler); + + for (stage = 0; stage < 7; stage++) { + vcpu_run(vm, VCPU_ID); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Stage %d: Unexpected sync ucall, got %lx", + stage, (ulong)uc.args[1]); + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx", + (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2], uc.args[3]); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c deleted file mode 100644 index efba76682b4b..000000000000 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c +++ /dev/null @@ -1,3 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define REG_LIST_SVE -#include "get-reg-list.c" diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 486932164cf2..a16c8f05366c 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -27,17 +27,37 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> #include "kvm_util.h" #include "test_util.h" #include "processor.h" -#ifdef REG_LIST_SVE -#define reg_list_sve() (true) -#else -#define reg_list_sve() (false) -#endif +static struct kvm_reg_list *reg_list; +static __u64 *blessed_reg, blessed_n; -#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) +struct reg_sublist { + const char *name; + long capability; + int feature; + bool finalize; + __u64 *regs; + __u64 regs_n; + __u64 *rejects_set; + __u64 rejects_set_n; +}; + +struct vcpu_config { + char *name; + struct reg_sublist sublists[]; +}; + +static struct vcpu_config *vcpu_configs[]; +static int vcpu_configs_n; + +#define for_each_sublist(c, s) \ + for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) #define for_each_reg(i) \ for ((i) = 0; (i) < reg_list->n; ++(i)) @@ -54,12 +74,41 @@ for_each_reg_filtered(i) \ if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) +static const char *config_name(struct vcpu_config *c) +{ + struct reg_sublist *s; + int len = 0; -static struct kvm_reg_list *reg_list; + if (c->name) + return c->name; -static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; -static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; -static __u64 *blessed_reg, blessed_n; + for_each_sublist(c, s) + len += strlen(s->name) + 1; + + c->name = malloc(len); + + len = 0; + for_each_sublist(c, s) { + if (!strcmp(s->name, "base")) + continue; + strcat(c->name + len, s->name); + len += strlen(s->name) + 1; + c->name[len - 1] = '+'; + } + c->name[len - 1] = '\0'; + + return c->name; +} + +static bool has_cap(struct vcpu_config *c, long capability) +{ + struct reg_sublist *s; + + for_each_sublist(c, s) + if (s->capability == capability) + return true; + return false; +} static bool filter_reg(__u64 reg) { @@ -96,11 +145,13 @@ static const char *str_with_index(const char *template, __u64 index) return (const char *)str; } +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) + #define CORE_REGS_XX_NR_WORDS 2 #define CORE_SPSR_XX_NR_WORDS 2 #define CORE_FPREGS_XX_NR_WORDS 4 -static const char *core_id_to_str(__u64 id) +static const char *core_id_to_str(struct vcpu_config *c, __u64 id) { __u64 core_off = id & ~REG_MASK, idx; @@ -111,7 +162,7 @@ static const char *core_id_to_str(__u64 id) case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... KVM_REG_ARM_CORE_REG(regs.regs[30]): idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; - TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx); + TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx); return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); case KVM_REG_ARM_CORE_REG(regs.sp): return "KVM_REG_ARM_CORE_REG(regs.sp)"; @@ -126,12 +177,12 @@ static const char *core_id_to_str(__u64 id) case KVM_REG_ARM_CORE_REG(spsr[0]) ... KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; - TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx); + TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx); return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; - TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx); + TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx); return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; @@ -139,11 +190,11 @@ static const char *core_id_to_str(__u64 id) return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; } - TEST_FAIL("Unknown core reg id: 0x%llx", id); + TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id); return NULL; } -static const char *sve_id_to_str(__u64 id) +static const char *sve_id_to_str(struct vcpu_config *c, __u64 id) { __u64 sve_off, n, i; @@ -153,37 +204,37 @@ static const char *sve_id_to_str(__u64 id) sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); - TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id); + TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id); switch (sve_off) { case KVM_REG_ARM64_SVE_ZREG_BASE ... KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), - "Unexpected bits set in SVE ZREG id: 0x%llx", id); + "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id); return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); case KVM_REG_ARM64_SVE_PREG_BASE ... KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), - "Unexpected bits set in SVE PREG id: 0x%llx", id); + "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id); return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); case KVM_REG_ARM64_SVE_FFR_BASE: TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), - "Unexpected bits set in SVE FFR id: 0x%llx", id); + "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id); return "KVM_REG_ARM64_SVE_FFR(0)"; } return NULL; } -static void print_reg(__u64 id) +static void print_reg(struct vcpu_config *c, __u64 id) { unsigned op0, op1, crn, crm, op2; const char *reg_size = NULL; TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, - "KVM_REG_ARM64 missing in reg id: 0x%llx", id); + "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id); switch (id & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U8: @@ -214,17 +265,17 @@ static void print_reg(__u64 id) reg_size = "KVM_REG_SIZE_U2048"; break; default: - TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx", - (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", + config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); } switch (id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: - printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id)); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id)); break; case KVM_REG_ARM_DEMUX: TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), - "Unexpected bits set in DEMUX reg id: 0x%llx", id); + "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id); printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); break; @@ -235,23 +286,23 @@ static void print_reg(__u64 id) crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), - "Unexpected bits set in SYSREG reg id: 0x%llx", id); + "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id); printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); break; case KVM_REG_ARM_FW: TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), - "Unexpected bits set in FW reg id: 0x%llx", id); + "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id); printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM64_SVE: - if (reg_list_sve()) - printf("\t%s,\n", sve_id_to_str(id)); + if (has_cap(c, KVM_CAP_ARM_SVE)) + printf("\t%s,\n", sve_id_to_str(c, id)); else - TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id); break; default: - TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", - (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx", + config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); } } @@ -312,56 +363,58 @@ static void core_reg_fixup(void) reg_list = tmp; } -static void prepare_vcpu_init(struct kvm_vcpu_init *init) +static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init) { - if (reg_list_sve()) - init->features[0] |= 1 << KVM_ARM_VCPU_SVE; + struct reg_sublist *s; + + for_each_sublist(c, s) + if (s->capability) + init->features[s->feature / 32] |= 1 << (s->feature % 32); } -static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config *c) { + struct reg_sublist *s; int feature; - if (reg_list_sve()) { - feature = KVM_ARM_VCPU_SVE; - vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + for_each_sublist(c, s) { + if (s->finalize) { + feature = s->feature; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + } } } -static void check_supported(void) +static void check_supported(struct vcpu_config *c) { - if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) { - fprintf(stderr, "SVE not available, skipping tests\n"); - exit(KSFT_SKIP); + struct reg_sublist *s; + + for_each_sublist(c, s) { + if (s->capability && !kvm_check_cap(s->capability)) { + fprintf(stderr, "%s: %s not available, skipping tests\n", config_name(c), s->name); + exit(KSFT_SKIP); + } } } -int main(int ac, char **av) +static bool print_list; +static bool print_filtered; +static bool fixup_core_regs; + +static void run_test(struct vcpu_config *c) { struct kvm_vcpu_init init = { .target = -1, }; - int new_regs = 0, missing_regs = 0, i; + int new_regs = 0, missing_regs = 0, i, n; int failed_get = 0, failed_set = 0, failed_reject = 0; - bool print_list = false, print_filtered = false, fixup_core_regs = false; struct kvm_vm *vm; - __u64 *vec_regs; + struct reg_sublist *s; - check_supported(); - - for (i = 1; i < ac; ++i) { - if (strcmp(av[i], "--core-reg-fixup") == 0) - fixup_core_regs = true; - else if (strcmp(av[i], "--list") == 0) - print_list = true; - else if (strcmp(av[i], "--list-filtered") == 0) - print_filtered = true; - else - TEST_FAIL("Unknown option: %s\n", av[i]); - } + check_supported(c); vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); - prepare_vcpu_init(&init); + prepare_vcpu_init(c, &init); aarch64_vcpu_add_default(vm, 0, &init, NULL); - finalize_vcpu(vm, 0); + finalize_vcpu(vm, 0, c); reg_list = vcpu_get_reg_list(vm, 0); @@ -374,10 +427,10 @@ int main(int ac, char **av) __u64 id = reg_list->reg[i]; if ((print_list && !filter_reg(id)) || (print_filtered && filter_reg(id))) - print_reg(id); + print_reg(c, id); } putchar('\n'); - return 0; + return; } /* @@ -396,50 +449,52 @@ int main(int ac, char **av) .id = reg_list->reg[i], .addr = (__u64)&addr, }; + bool reject_reg = false; int ret; ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); if (ret) { - puts("Failed to get "); - print_reg(reg.id); + printf("%s: Failed to get ", config_name(c)); + print_reg(c, reg.id); putchar('\n'); ++failed_get; } /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ - if (find_reg(rejects_set, rejects_set_n, reg.id)) { - ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); - if (ret != -1 || errno != EPERM) { - printf("Failed to reject (ret=%d, errno=%d) ", ret, errno); - print_reg(reg.id); - putchar('\n'); - ++failed_reject; + for_each_sublist(c, s) { + if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) { + reject_reg = true; + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret != -1 || errno != EPERM) { + printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno); + print_reg(c, reg.id); + putchar('\n'); + ++failed_reject; + } + break; } - continue; } - ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); - if (ret) { - puts("Failed to set "); - print_reg(reg.id); - putchar('\n'); - ++failed_set; + if (!reject_reg) { + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret) { + printf("%s: Failed to set ", config_name(c)); + print_reg(c, reg.id); + putchar('\n'); + ++failed_set; + } } } - if (reg_list_sve()) { - blessed_n = base_regs_n + sve_regs_n; - vec_regs = sve_regs; - } else { - blessed_n = base_regs_n + vregs_n; - vec_regs = vregs; - } - + for_each_sublist(c, s) + blessed_n += s->regs_n; blessed_reg = calloc(blessed_n, sizeof(__u64)); - for (i = 0; i < base_regs_n; ++i) - blessed_reg[i] = base_regs[i]; - for (i = 0; i < blessed_n - base_regs_n; ++i) - blessed_reg[base_regs_n + i] = vec_regs[i]; + + n = 0; + for_each_sublist(c, s) { + for (i = 0; i < s->regs_n; ++i) + blessed_reg[n++] = s->regs[i]; + } for_each_new_reg(i) ++new_regs; @@ -448,40 +503,141 @@ int main(int ac, char **av) ++missing_regs; if (new_regs || missing_regs) { - printf("Number blessed registers: %5lld\n", blessed_n); - printf("Number registers: %5lld\n", reg_list->n); + printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); + printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n); } if (new_regs) { - printf("\nThere are %d new registers.\n" + printf("\n%s: There are %d new registers.\n" "Consider adding them to the blessed reg " - "list with the following lines:\n\n", new_regs); + "list with the following lines:\n\n", config_name(c), new_regs); for_each_new_reg(i) - print_reg(reg_list->reg[i]); + print_reg(c, reg_list->reg[i]); putchar('\n'); } if (missing_regs) { - printf("\nThere are %d missing registers.\n" - "The following lines are missing registers:\n\n", missing_regs); + printf("\n%s: There are %d missing registers.\n" + "The following lines are missing registers:\n\n", config_name(c), missing_regs); for_each_missing_reg(i) - print_reg(blessed_reg[i]); + print_reg(c, blessed_reg[i]); putchar('\n'); } TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, - "There are %d missing registers; " + "%s: There are %d missing registers; " "%d registers failed get; %d registers failed set; %d registers failed reject", - missing_regs, failed_get, failed_set, failed_reject); + config_name(c), missing_regs, failed_get, failed_set, failed_reject); - return 0; + pr_info("%s: PASS\n", config_name(c)); + blessed_n = 0; + free(blessed_reg); + free(reg_list); + kvm_vm_free(vm); +} + +static void help(void) +{ + struct vcpu_config *c; + int i; + + printf( + "\n" + "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n" + " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n" + " '<selection>' may be\n"); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + printf( + " '%s'\n", config_name(c)); + } + + printf( + "\n" + " --list Print the register list rather than test it (requires --config)\n" + " --list-filtered Print registers that would normally be filtered out (requires --config)\n" + " --core-reg-fixup Needed when running on old kernels with broken core reg listings\n" + "\n" + ); +} + +static struct vcpu_config *parse_config(const char *config) +{ + struct vcpu_config *c; + int i; + + if (config[8] != '=') + help(), exit(1); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (strcmp(config_name(c), &config[9]) == 0) + break; + } + + if (i == vcpu_configs_n) + help(), exit(1); + + return c; +} + +int main(int ac, char **av) +{ + struct vcpu_config *c, *sel = NULL; + int i, ret = 0; + pid_t pid; + + for (i = 1; i < ac; ++i) { + if (strcmp(av[i], "--core-reg-fixup") == 0) + fixup_core_regs = true; + else if (strncmp(av[i], "--config", 8) == 0) + sel = parse_config(av[i]); + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else if (strcmp(av[i], "--list-filtered") == 0) + print_filtered = true; + else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0) + help(), exit(0); + else + help(), exit(1); + } + + if (print_list || print_filtered) { + /* + * We only want to print the register list of a single config. + */ + if (!sel) + help(), exit(1); + } + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (sel && c != sel) + continue; + + pid = fork(); + + if (!pid) { + run_test(c); + exit(0); + } else { + int wstatus; + pid_t wpid = wait(&wstatus); + TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return"); + if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP) + ret = KSFT_FAIL; + } + } + + return ret; } /* * The current blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. * - * The blessed list is up to date with kernel version v5.10-rc5 + * The blessed list is up to date with kernel version v5.13-rc3 */ static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), @@ -673,8 +829,6 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ - ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ - ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ @@ -683,6 +837,16 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ +}; + +static __u64 pmu_regs[] = { + ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ @@ -692,8 +856,6 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ - ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ - ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ ARM64_SYS_REG(3, 3, 14, 8, 0), ARM64_SYS_REG(3, 3, 14, 8, 1), ARM64_SYS_REG(3, 3, 14, 8, 2), @@ -757,11 +919,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 3, 14, 15, 5), ARM64_SYS_REG(3, 3, 14, 15, 6), ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ - ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ - ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ - ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ }; -static __u64 base_regs_n = ARRAY_SIZE(base_regs); static __u64 vregs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), @@ -797,7 +955,6 @@ static __u64 vregs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), }; -static __u64 vregs_n = ARRAY_SIZE(vregs); static __u64 sve_regs[] = { KVM_REG_ARM64_SVE_VLS, @@ -852,11 +1009,57 @@ static __u64 sve_regs[] = { KVM_REG_ARM64_SVE_FFR(0), ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ }; -static __u64 sve_regs_n = ARRAY_SIZE(sve_regs); -static __u64 rejects_set[] = { -#ifdef REG_LIST_SVE +static __u64 sve_rejects_set[] = { KVM_REG_ARM64_SVE_VLS, -#endif }; -static __u64 rejects_set_n = ARRAY_SIZE(rejects_set); + +#define BASE_SUBLIST \ + { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } +#define VREGS_SUBLIST \ + { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } +#define PMU_SUBLIST \ + { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } +#define SVE_SUBLIST \ + { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ + .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ + .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), } + +static struct vcpu_config vregs_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; +static struct vcpu_config vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; +static struct vcpu_config sve_config = { + .sublists = { + BASE_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; +static struct vcpu_config sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_config *vcpu_configs[] = { + &vregs_config, + &vregs_pmu_config, + &sve_config, + &sve_pmu_config, +}; +static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 81edbd23d371..5fe0140e407e 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -16,7 +16,6 @@ #include <errno.h> #include <linux/bitmap.h> #include <linux/bitops.h> -#include <asm/barrier.h> #include <linux/atomic.h> #include "kvm_util.h" @@ -681,7 +680,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + kvm_vm_elf_load(vm, program_invocation_name); #ifdef __x86_64__ vm_create_irqchip(vm); #endif @@ -761,7 +760,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) KVM_MEM_LOG_DIRTY_PAGES); /* Do mapping for the dirty track memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index 4b8db3bce610..b21c69a56daa 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -105,7 +105,7 @@ static void run_test(uint32_t run) CPU_SET(i, &cpu_set); vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + kvm_vm_elf_load(vm, program_invocation_name); vm_create_irqchip(vm); pr_debug("%s: [%d] start vcpus\n", __func__, run); diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index b7fa0c8551db..27dc5c2e56b9 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -8,16 +8,20 @@ #define SELFTEST_KVM_PROCESSOR_H #include "kvm_util.h" +#include <linux/stringify.h> #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) -#define CPACR_EL1 3, 0, 1, 0, 2 -#define TCR_EL1 3, 0, 2, 0, 2 -#define MAIR_EL1 3, 0, 10, 2, 0 -#define TTBR0_EL1 3, 0, 2, 0, 0 -#define SCTLR_EL1 3, 0, 1, 0, 0 +#define CPACR_EL1 3, 0, 1, 0, 2 +#define TCR_EL1 3, 0, 2, 0, 2 +#define MAIR_EL1 3, 0, 10, 2, 0 +#define TTBR0_EL1 3, 0, 2, 0, 0 +#define SCTLR_EL1 3, 0, 1, 0, 0 +#define VBAR_EL1 3, 0, 12, 0, 0 + +#define ID_AA64DFR0_EL1 3, 0, 0, 5, 0 /* * Default MAIR @@ -56,4 +60,73 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init, void *guest_code); +struct ex_regs { + u64 regs[31]; + u64 sp; + u64 pc; + u64 pstate; +}; + +#define VECTOR_NUM 16 + +enum { + VECTOR_SYNC_CURRENT_SP0, + VECTOR_IRQ_CURRENT_SP0, + VECTOR_FIQ_CURRENT_SP0, + VECTOR_ERROR_CURRENT_SP0, + + VECTOR_SYNC_CURRENT, + VECTOR_IRQ_CURRENT, + VECTOR_FIQ_CURRENT, + VECTOR_ERROR_CURRENT, + + VECTOR_SYNC_LOWER_64, + VECTOR_IRQ_LOWER_64, + VECTOR_FIQ_LOWER_64, + VECTOR_ERROR_LOWER_64, + + VECTOR_SYNC_LOWER_32, + VECTOR_IRQ_LOWER_32, + VECTOR_FIQ_LOWER_32, + VECTOR_ERROR_LOWER_32, +}; + +#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \ + (v) == VECTOR_SYNC_CURRENT || \ + (v) == VECTOR_SYNC_LOWER_64 || \ + (v) == VECTOR_SYNC_LOWER_32) + +#define ESR_EC_NUM 64 +#define ESR_EC_SHIFT 26 +#define ESR_EC_MASK (ESR_EC_NUM - 1) + +#define ESR_EC_SVC64 0x15 +#define ESR_EC_HW_BP_CURRENT 0x31 +#define ESR_EC_SSTEP_CURRENT 0x33 +#define ESR_EC_WP_CURRENT 0x35 +#define ESR_EC_BRK_INS 0x3c + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); + +typedef void(*handler_fn)(struct ex_regs *); +void vm_install_exception_handler(struct kvm_vm *vm, + int vector, handler_fn handler); +void vm_install_sync_handler(struct kvm_vm *vm, + int vector, int ec, handler_fn handler); + +#define write_sysreg(reg, val) \ +({ \ + u64 __val = (u64)(val); \ + asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val)); \ +}) + +#define read_sysreg(reg) \ +({ u64 val; \ + asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\ + val; \ +}) + +#define isb() asm volatile("isb" : : : "memory") + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 35739567189e..615ab254899d 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -30,6 +30,7 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 #define DEFAULT_GUEST_PHY_PAGES 512 #define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 @@ -98,8 +99,7 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm); int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); -void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, - uint32_t data_memslot, uint32_t pgd_memslot); +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); @@ -141,10 +141,12 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - uint32_t data_memslot, uint32_t pgd_memslot); +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); + void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - unsigned int npages, uint32_t pgd_memslot); + unsigned int npages); void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); @@ -237,7 +239,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, const char *exit_reason_str(unsigned int exit_reason); -void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); +void virt_pgd_alloc(struct kvm_vm *vm); /* * VM Virtual Page Map @@ -255,13 +257,13 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); * Within @vm, creates a virtual translation for the page starting * at @vaddr to the page starting at @paddr. */ -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint32_t memslot); +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, vm_paddr_t paddr_min, uint32_t memslot); +vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); /* * Create a VM with reasonable defaults @@ -351,6 +353,7 @@ enum { UCALL_SYNC, UCALL_ABORT, UCALL_DONE, + UCALL_UNHANDLED, }; #define UCALL_MAX_ARGS 6 @@ -369,26 +372,31 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) #define GUEST_DONE() ucall(UCALL_DONE, 0) -#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \ - if (!(_condition)) \ - ucall(UCALL_ABORT, 2 + _nargs, \ - "Failed guest assert: " \ - #_condition, __LINE__, _args); \ +#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \ + if (!(_condition)) \ + ucall(UCALL_ABORT, 2 + _nargs, \ + "Failed guest assert: " \ + _condstr, __LINE__, _args); \ } while (0) #define GUEST_ASSERT(_condition) \ - __GUEST_ASSERT((_condition), 0, 0) + __GUEST_ASSERT(_condition, #_condition, 0, 0) #define GUEST_ASSERT_1(_condition, arg1) \ - __GUEST_ASSERT((_condition), 1, (arg1)) + __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) #define GUEST_ASSERT_2(_condition, arg1, arg2) \ - __GUEST_ASSERT((_condition), 2, (arg1), (arg2)) + __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) #define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ - __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3)) + __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) #define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ - __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4)) + __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) + +#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) + +int vm_get_stats_fd(struct kvm_vm *vm); +int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid); #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h new file mode 100644 index 000000000000..0be4757f1f20 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86_64/apic.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * tools/testing/selftests/kvm/include/x86_64/apic.h + * + * Copyright (C) 2021, Google LLC. + */ + +#ifndef SELFTEST_KVM_APIC_H +#define SELFTEST_KVM_APIC_H + +#include <stdint.h> + +#include "processor.h" + +#define APIC_DEFAULT_GPA 0xfee00000ULL + +/* APIC base address MSR and fields */ +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_EXTD (1<<10) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) +#define GET_APIC_BASE(x) (((x) >> 12) << 12) + +#define APIC_BASE_MSR 0x800 +#define X2APIC_ENABLE (1UL << 10) +#define APIC_ID 0x20 +#define APIC_LVR 0x30 +#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF) +#define APIC_TASKPRI 0x80 +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_SPIV 0xF0 +#define APIC_SPIV_FOCUS_DISABLED (1 << 9) +#define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define SET_APIC_DEST_FIELD(x) ((x) << 24) + +void apic_disable(void); +void xapic_enable(void); +void x2apic_enable(void); + +static inline uint32_t get_bsp_flag(void) +{ + return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; +} + +static inline uint32_t xapic_read_reg(unsigned int reg) +{ + return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2]; +} + +static inline void xapic_write_reg(unsigned int reg, uint32_t val) +{ + ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val; +} + +static inline uint64_t x2apic_read_reg(unsigned int reg) +{ + return rdmsr(APIC_BASE_MSR + (reg >> 4)); +} + +static inline void x2apic_write_reg(unsigned int reg, uint64_t value) +{ + wrmsr(APIC_BASE_MSR + (reg >> 4), value); +} + +#endif /* SELFTEST_KVM_APIC_H */ diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index a034438b6266..c9af97abd622 100644 --- a/tools/testing/selftests/kvm/include/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * tools/testing/selftests/kvm/include/vmx.h + * tools/testing/selftests/kvm/include/x86_64/evmcs.h * * Copyright (C) 2018, Red Hat, Inc. * diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h new file mode 100644 index 000000000000..412eaee7884a --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * tools/testing/selftests/kvm/include/x86_64/hyperv.h + * + * Copyright (C) 2021, Red Hat, Inc. + * + */ + +#ifndef SELFTEST_KVM_HYPERV_H +#define SELFTEST_KVM_HYPERV_H + +#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 +#define HYPERV_CPUID_INTERFACE 0x40000001 +#define HYPERV_CPUID_VERSION 0x40000002 +#define HYPERV_CPUID_FEATURES 0x40000003 +#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 +#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007 +#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A +#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080 +#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081 +#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082 + +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 +#define HV_X64_MSR_HYPERCALL 0x40000001 +#define HV_X64_MSR_VP_INDEX 0x40000002 +#define HV_X64_MSR_RESET 0x40000003 +#define HV_X64_MSR_VP_RUNTIME 0x40000010 +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 +#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 +#define HV_X64_MSR_APIC_FREQUENCY 0x40000023 +#define HV_X64_MSR_EOI 0x40000070 +#define HV_X64_MSR_ICR 0x40000071 +#define HV_X64_MSR_TPR 0x40000072 +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_SCONTROL 0x40000080 +#define HV_X64_MSR_SVERSION 0x40000081 +#define HV_X64_MSR_SIEFP 0x40000082 +#define HV_X64_MSR_SIMP 0x40000083 +#define HV_X64_MSR_EOM 0x40000084 +#define HV_X64_MSR_SINT0 0x40000090 +#define HV_X64_MSR_SINT1 0x40000091 +#define HV_X64_MSR_SINT2 0x40000092 +#define HV_X64_MSR_SINT3 0x40000093 +#define HV_X64_MSR_SINT4 0x40000094 +#define HV_X64_MSR_SINT5 0x40000095 +#define HV_X64_MSR_SINT6 0x40000096 +#define HV_X64_MSR_SINT7 0x40000097 +#define HV_X64_MSR_SINT8 0x40000098 +#define HV_X64_MSR_SINT9 0x40000099 +#define HV_X64_MSR_SINT10 0x4000009A +#define HV_X64_MSR_SINT11 0x4000009B +#define HV_X64_MSR_SINT12 0x4000009C +#define HV_X64_MSR_SINT13 0x4000009D +#define HV_X64_MSR_SINT14 0x4000009E +#define HV_X64_MSR_SINT15 0x4000009F +#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 +#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 +#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 +#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 +#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 +#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 +#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 +#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +#define HV_X64_MSR_GUEST_IDLE 0x400000F0 +#define HV_X64_MSR_CRASH_P0 0x40000100 +#define HV_X64_MSR_CRASH_P1 0x40000101 +#define HV_X64_MSR_CRASH_P2 0x40000102 +#define HV_X64_MSR_CRASH_P3 0x40000103 +#define HV_X64_MSR_CRASH_P4 0x40000104 +#define HV_X64_MSR_CRASH_CTL 0x40000105 +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 +#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 + +#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1 +#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2 +#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3 +#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4 +#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5 +#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF + +/* HYPERV_CPUID_FEATURES.EAX */ +#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0) +#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1) +#define HV_MSR_SYNIC_AVAILABLE BIT(2) +#define HV_MSR_SYNTIMER_AVAILABLE BIT(3) +#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4) +#define HV_MSR_HYPERCALL_AVAILABLE BIT(5) +#define HV_MSR_VP_INDEX_AVAILABLE BIT(6) +#define HV_MSR_RESET_AVAILABLE BIT(7) +#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8) +#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9) +#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10) +#define HV_ACCESS_FREQUENCY_MSRS BIT(11) +#define HV_ACCESS_REENLIGHTENMENT BIT(13) +#define HV_ACCESS_TSC_INVARIANT BIT(15) + +/* HYPERV_CPUID_FEATURES.EBX */ +#define HV_CREATE_PARTITIONS BIT(0) +#define HV_ACCESS_PARTITION_ID BIT(1) +#define HV_ACCESS_MEMORY_POOL BIT(2) +#define HV_ADJUST_MESSAGE_BUFFERS BIT(3) +#define HV_POST_MESSAGES BIT(4) +#define HV_SIGNAL_EVENTS BIT(5) +#define HV_CREATE_PORT BIT(6) +#define HV_CONNECT_PORT BIT(7) +#define HV_ACCESS_STATS BIT(8) +#define HV_DEBUGGING BIT(11) +#define HV_CPU_MANAGEMENT BIT(12) +#define HV_ISOLATION BIT(22) + +/* HYPERV_CPUID_FEATURES.EDX */ +#define HV_X64_MWAIT_AVAILABLE BIT(0) +#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) +#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) +#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) +#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) +#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) +#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) +#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) +#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) + +/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ +#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0) +#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1) +#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2) +#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3) +#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4) +#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5) +#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9) +#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10) +#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) +#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) + +/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ +#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING BIT(1) + +/* Hypercalls */ +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 +#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_SEND_IPI 0x000b +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 +#define HVCALL_SEND_IPI_EX 0x0015 +#define HVCALL_GET_PARTITION_ID 0x0046 +#define HVCALL_DEPOSIT_MEMORY 0x0048 +#define HVCALL_CREATE_VP 0x004e +#define HVCALL_GET_VP_REGISTERS 0x0050 +#define HVCALL_SET_VP_REGISTERS 0x0051 +#define HVCALL_POST_MESSAGE 0x005c +#define HVCALL_SIGNAL_EVENT 0x005d +#define HVCALL_POST_DEBUG_DATA 0x0069 +#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a +#define HVCALL_RESET_DEBUG_SESSION 0x006b +#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 +#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c +#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d +#define HVCALL_RETARGET_INTERRUPT 0x007e +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 + +#define HV_FLUSH_ALL_PROCESSORS BIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + +/* hypercall status code */ +#define HV_STATUS_SUCCESS 0 +#define HV_STATUS_INVALID_HYPERCALL_CODE 2 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 +#define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INVALID_PARAMETER 5 +#define HV_STATUS_ACCESS_DENIED 6 +#define HV_STATUS_OPERATION_DENIED 8 +#define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_PORT_ID 17 +#define HV_STATUS_INVALID_CONNECTION_ID 18 +#define HV_STATUS_INSUFFICIENT_BUFFERS 19 + +#endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0b30b4e15c38..242ae8e09a65 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -13,6 +13,8 @@ #include <asm/msr-index.h> +#include "../kvm_util.h" + #define X86_EFLAGS_FIXED (1u << 1) #define X86_CR4_VME (1ul << 0) @@ -53,7 +55,8 @@ #define CPUID_PKU (1ul << 3) #define CPUID_LA57 (1ul << 16) -#define UNEXPECTED_VECTOR_PORT 0xfff0u +/* CPUID.0x8000_0001.EDX */ +#define CPUID_GBPAGES (1ul << 26) /* General Registers in 64-Bit Mode */ struct gpr64_regs { @@ -391,9 +394,13 @@ struct ex_regs { void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); -void vm_handle_exception(struct kvm_vm *vm, int vector, +void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr); +void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr, + uint64_t pte); + /* * set_cpuid() - overwrites a matching cpuid entry with the provided value. * matches based on ent->function && ent->index. returns true @@ -410,6 +417,14 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); +enum x86_page_size { + X86_PAGE_SIZE_4K = 0, + X86_PAGE_SIZE_2M, + X86_PAGE_SIZE_1G, +}; +void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + enum x86_page_size page_size); + /* * Basic CPU control in CR0 */ @@ -425,53 +440,6 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui #define X86_CR0_CD (1UL<<30) /* Cache Disable */ #define X86_CR0_PG (1UL<<31) /* Paging */ -#define APIC_DEFAULT_GPA 0xfee00000ULL - -/* APIC base address MSR and fields */ -#define MSR_IA32_APICBASE 0x0000001b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_EXTD (1<<10) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) -#define GET_APIC_BASE(x) (((x) >> 12) << 12) - -#define APIC_BASE_MSR 0x800 -#define X2APIC_ENABLE (1UL << 10) -#define APIC_ID 0x20 -#define APIC_LVR 0x30 -#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF) -#define APIC_TASKPRI 0x80 -#define APIC_PROCPRI 0xA0 -#define APIC_EOI 0xB0 -#define APIC_SPIV 0xF0 -#define APIC_SPIV_FOCUS_DISABLED (1 << 9) -#define APIC_SPIV_APIC_ENABLED (1 << 8) -#define APIC_ICR 0x300 -#define APIC_DEST_SELF 0x40000 -#define APIC_DEST_ALLINC 0x80000 -#define APIC_DEST_ALLBUT 0xC0000 -#define APIC_ICR_RR_MASK 0x30000 -#define APIC_ICR_RR_INVALID 0x00000 -#define APIC_ICR_RR_INPROG 0x10000 -#define APIC_ICR_RR_VALID 0x20000 -#define APIC_INT_LEVELTRIG 0x08000 -#define APIC_INT_ASSERT 0x04000 -#define APIC_ICR_BUSY 0x01000 -#define APIC_DEST_LOGICAL 0x00800 -#define APIC_DEST_PHYSICAL 0x00000 -#define APIC_DM_FIXED 0x00000 -#define APIC_DM_FIXED_MASK 0x00700 -#define APIC_DM_LOWEST 0x00100 -#define APIC_DM_SMI 0x00200 -#define APIC_DM_REMRD 0x00300 -#define APIC_DM_NMI 0x00400 -#define APIC_DM_INIT 0x00500 -#define APIC_DM_STARTUP 0x00600 -#define APIC_DM_EXTINT 0x00700 -#define APIC_VECTOR_MASK 0x000FF -#define APIC_ICR2 0x310 -#define SET_APIC_DEST_FIELD(x) ((x) << 24) - /* VMX_EPT_VPID_CAP bits */ #define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 65eb1079a161..583ceb0d1457 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -10,6 +10,7 @@ #include <stdint.h> #include "processor.h" +#include "apic.h" /* * Definitions of Primary Processor-Based VM-Execution Controls. @@ -607,15 +608,13 @@ bool nested_vmx_supported(void); void nested_vmx_check_supported(void); void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot); + uint64_t nested_paddr, uint64_t paddr); void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size, - uint32_t eptp_memslot); + uint64_t nested_paddr, uint64_t paddr, uint64_t size); void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t memslot, uint32_t eptp_memslot); + uint32_t memslot); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); -void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot); +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c new file mode 100644 index 000000000000..5906bbc08483 --- /dev/null +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * kvm_binary_stats_test + * + * Copyright (C) 2021, Google LLC. + * + * Test the fd-based interface for KVM statistics. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "asm/kvm.h" +#include "linux/kvm.h" + +static void stats_test(int stats_fd) +{ + ssize_t ret; + int i; + size_t size_desc; + size_t size_data = 0; + struct kvm_stats_header *header; + char *id; + struct kvm_stats_desc *stats_desc; + u64 *stats_data; + struct kvm_stats_desc *pdesc; + + /* Read kvm stats header */ + header = malloc(sizeof(*header)); + TEST_ASSERT(header, "Allocate memory for stats header"); + + ret = read(stats_fd, header, sizeof(*header)); + TEST_ASSERT(ret == sizeof(*header), "Read stats header"); + size_desc = sizeof(*stats_desc) + header->name_size; + + /* Read kvm stats id string */ + id = malloc(header->name_size); + TEST_ASSERT(id, "Allocate memory for id string"); + ret = read(stats_fd, id, header->name_size); + TEST_ASSERT(ret == header->name_size, "Read id string"); + + /* Check id string, that should start with "kvm" */ + TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header->name_size, + "Invalid KVM stats type, id: %s", id); + + /* Sanity check for other fields in header */ + if (header->num_desc == 0) { + printf("No KVM stats defined!"); + return; + } + /* Check overlap */ + TEST_ASSERT(header->desc_offset > 0 && header->data_offset > 0 + && header->desc_offset >= sizeof(*header) + && header->data_offset >= sizeof(*header), + "Invalid offset fields in header"); + TEST_ASSERT(header->desc_offset > header->data_offset || + (header->desc_offset + size_desc * header->num_desc <= + header->data_offset), + "Descriptor block is overlapped with data block"); + + /* Allocate memory for stats descriptors */ + stats_desc = calloc(header->num_desc, size_desc); + TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); + /* Read kvm stats descriptors */ + ret = pread(stats_fd, stats_desc, + size_desc * header->num_desc, header->desc_offset); + TEST_ASSERT(ret == size_desc * header->num_desc, + "Read KVM stats descriptors"); + + /* Sanity check for fields in descriptors */ + for (i = 0; i < header->num_desc; ++i) { + pdesc = (void *)stats_desc + i * size_desc; + /* Check type,unit,base boundaries */ + TEST_ASSERT((pdesc->flags & KVM_STATS_TYPE_MASK) + <= KVM_STATS_TYPE_MAX, "Unknown KVM stats type"); + TEST_ASSERT((pdesc->flags & KVM_STATS_UNIT_MASK) + <= KVM_STATS_UNIT_MAX, "Unknown KVM stats unit"); + TEST_ASSERT((pdesc->flags & KVM_STATS_BASE_MASK) + <= KVM_STATS_BASE_MAX, "Unknown KVM stats base"); + /* Check exponent for stats unit + * Exponent for counter should be greater than or equal to 0 + * Exponent for unit bytes should be greater than or equal to 0 + * Exponent for unit seconds should be less than or equal to 0 + * Exponent for unit clock cycles should be greater than or + * equal to 0 + */ + switch (pdesc->flags & KVM_STATS_UNIT_MASK) { + case KVM_STATS_UNIT_NONE: + case KVM_STATS_UNIT_BYTES: + case KVM_STATS_UNIT_CYCLES: + TEST_ASSERT(pdesc->exponent >= 0, + "Unsupported KVM stats unit"); + break; + case KVM_STATS_UNIT_SECONDS: + TEST_ASSERT(pdesc->exponent <= 0, + "Unsupported KVM stats unit"); + break; + } + /* Check name string */ + TEST_ASSERT(strlen(pdesc->name) < header->name_size, + "KVM stats name(%s) too long", pdesc->name); + /* Check size field, which should not be zero */ + TEST_ASSERT(pdesc->size, "KVM descriptor(%s) with size of 0", + pdesc->name); + size_data += pdesc->size * sizeof(*stats_data); + } + /* Check overlap */ + TEST_ASSERT(header->data_offset >= header->desc_offset + || header->data_offset + size_data <= header->desc_offset, + "Data block is overlapped with Descriptor block"); + /* Check validity of all stats data size */ + TEST_ASSERT(size_data >= header->num_desc * sizeof(*stats_data), + "Data size is not correct"); + /* Check stats offset */ + for (i = 0; i < header->num_desc; ++i) { + pdesc = (void *)stats_desc + i * size_desc; + TEST_ASSERT(pdesc->offset < size_data, + "Invalid offset (%u) for stats: %s", + pdesc->offset, pdesc->name); + } + + /* Allocate memory for stats data */ + stats_data = malloc(size_data); + TEST_ASSERT(stats_data, "Allocate memory for stats data"); + /* Read kvm stats data as a bulk */ + ret = pread(stats_fd, stats_data, size_data, header->data_offset); + TEST_ASSERT(ret == size_data, "Read KVM stats data"); + /* Read kvm stats data one by one */ + size_data = 0; + for (i = 0; i < header->num_desc; ++i) { + pdesc = (void *)stats_desc + i * size_desc; + ret = pread(stats_fd, stats_data, + pdesc->size * sizeof(*stats_data), + header->data_offset + size_data); + TEST_ASSERT(ret == pdesc->size * sizeof(*stats_data), + "Read data of KVM stats: %s", pdesc->name); + size_data += pdesc->size * sizeof(*stats_data); + } + + free(stats_data); + free(stats_desc); + free(id); + free(header); +} + + +static void vm_stats_test(struct kvm_vm *vm) +{ + int stats_fd; + + /* Get fd for VM stats */ + stats_fd = vm_get_stats_fd(vm); + TEST_ASSERT(stats_fd >= 0, "Get VM stats fd"); + + stats_test(stats_fd); + close(stats_fd); + TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed"); +} + +static void vcpu_stats_test(struct kvm_vm *vm, int vcpu_id) +{ + int stats_fd; + + /* Get fd for VCPU stats */ + stats_fd = vcpu_get_stats_fd(vm, vcpu_id); + TEST_ASSERT(stats_fd >= 0, "Get VCPU stats fd"); + + stats_test(stats_fd); + close(stats_fd); + TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed"); +} + +#define DEFAULT_NUM_VM 4 +#define DEFAULT_NUM_VCPU 4 + +/* + * Usage: kvm_bin_form_stats [#vm] [#vcpu] + * The first parameter #vm set the number of VMs being created. + * The second parameter #vcpu set the number of VCPUs being created. + * By default, DEFAULT_NUM_VM VM and DEFAULT_NUM_VCPU VCPU for the VM would be + * created for testing. + */ + +int main(int argc, char *argv[]) +{ + int i, j; + struct kvm_vm **vms; + int max_vm = DEFAULT_NUM_VM; + int max_vcpu = DEFAULT_NUM_VCPU; + + /* Get the number of VMs and VCPUs that would be created for testing. */ + if (argc > 1) { + max_vm = strtol(argv[1], NULL, 0); + if (max_vm <= 0) + max_vm = DEFAULT_NUM_VM; + } + if (argc > 2) { + max_vcpu = strtol(argv[2], NULL, 0); + if (max_vcpu <= 0) + max_vcpu = DEFAULT_NUM_VCPU; + } + + /* Check the extension for binary stats */ + if (kvm_check_cap(KVM_CAP_BINARY_STATS_FD) <= 0) { + print_skip("Binary form statistics interface is not supported"); + exit(KSFT_SKIP); + } + + /* Create VMs and VCPUs */ + vms = malloc(sizeof(vms[0]) * max_vm); + TEST_ASSERT(vms, "Allocate memory for storing VM pointers"); + for (i = 0; i < max_vm; ++i) { + vms[i] = vm_create(VM_MODE_DEFAULT, + DEFAULT_GUEST_PHY_PAGES, O_RDWR); + for (j = 0; j < max_vcpu; ++j) + vm_vcpu_add(vms[i], j); + } + + /* Check stats read for every VM and VCPU */ + for (i = 0; i < max_vm; ++i) { + vm_stats_test(vms[i]); + for (j = 0; j < max_vcpu; ++j) + vcpu_stats_test(vms[i], j); + } + + for (i = 0; i < max_vm; ++i) + kvm_vm_free(vms[i]); + free(vms); + return 0; +} diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 82171f17c1d7..0d04a7db7f24 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -303,7 +303,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) TEST_MEM_SLOT_INDEX, guest_num_pages, 0); /* Do mapping(GVA->GPA) for the testing memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S new file mode 100644 index 000000000000..0e443eadfac6 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +.macro save_registers + add sp, sp, #-16 * 17 + + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x19, [sp, #16 * 9] + stp x20, x21, [sp, #16 * 10] + stp x22, x23, [sp, #16 * 11] + stp x24, x25, [sp, #16 * 12] + stp x26, x27, [sp, #16 * 13] + stp x28, x29, [sp, #16 * 14] + + /* + * This stores sp_el1 into ex_regs.sp so exception handlers can "look" + * at it. It will _not_ be used to restore the sp on return from the + * exception so handlers can not update it. + */ + add x1, sp, #16 * 17 + stp x30, x1, [sp, #16 * 15] /* x30, SP */ + + mrs x1, elr_el1 + mrs x2, spsr_el1 + stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ +.endm + +.macro restore_registers + ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ + msr elr_el1, x1 + msr spsr_el1, x2 + + /* sp is not restored */ + ldp x30, xzr, [sp, #16 * 15] /* x30, SP */ + + ldp x28, x29, [sp, #16 * 14] + ldp x26, x27, [sp, #16 * 13] + ldp x24, x25, [sp, #16 * 12] + ldp x22, x23, [sp, #16 * 11] + ldp x20, x21, [sp, #16 * 10] + ldp x18, x19, [sp, #16 * 9] + ldp x16, x17, [sp, #16 * 8] + ldp x14, x15, [sp, #16 * 7] + ldp x12, x13, [sp, #16 * 6] + ldp x10, x11, [sp, #16 * 5] + ldp x8, x9, [sp, #16 * 4] + ldp x6, x7, [sp, #16 * 3] + ldp x4, x5, [sp, #16 * 2] + ldp x2, x3, [sp, #16 * 1] + ldp x0, x1, [sp, #16 * 0] + + add sp, sp, #16 * 17 + + eret +.endm + +.pushsection ".entry.text", "ax" +.balign 0x800 +.global vectors +vectors: +.popsection + +.set vector, 0 + +/* + * Build an exception handler for vector and append a jump to it into + * vectors (while making sure that it's 0x80 aligned). + */ +.macro HANDLER, label +handler_\label: + save_registers + mov x0, sp + mov x1, #vector + bl route_exception + restore_registers + +.pushsection ".entry.text", "ax" +.balign 0x80 + b handler_\label +.popsection + +.set vector, vector + 1 +.endm + +.macro HANDLER_INVALID +.pushsection ".entry.text", "ax" +.balign 0x80 +/* This will abort so no need to save and restore registers. */ + mov x0, #vector + mov x1, #0 /* ec */ + mov x2, #0 /* valid_ec */ + b kvm_exit_unexpected_exception +.popsection + +.set vector, vector + 1 +.endm + +/* + * Caution: be sure to not add anything between the declaration of vectors + * above and these macro calls that will build the vectors table below it. + */ + HANDLER_INVALID // Synchronous EL1t + HANDLER_INVALID // IRQ EL1t + HANDLER_INVALID // FIQ EL1t + HANDLER_INVALID // Error EL1t + + HANDLER el1h_sync // Synchronous EL1h + HANDLER el1h_irq // IRQ EL1h + HANDLER el1h_fiq // FIQ EL1h + HANDLER el1h_error // Error EL1h + + HANDLER el0_sync_64 // Synchronous 64-bit EL0 + HANDLER el0_irq_64 // IRQ 64-bit EL0 + HANDLER el0_fiq_64 // FIQ 64-bit EL0 + HANDLER el0_error_64 // Error 64-bit EL0 + + HANDLER el0_sync_32 // Synchronous 32-bit EL0 + HANDLER el0_irq_32 // IRQ 32-bit EL0 + HANDLER el0_fiq_32 // FIQ 32-bit EL0 + HANDLER el0_error_32 // Error 32-bit EL0 diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index cee92d477dc0..9f49f6caafe5 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -6,14 +6,16 @@ */ #include <linux/compiler.h> +#include <assert.h> #include "kvm_util.h" #include "../kvm_util_internal.h" #include "processor.h" -#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 +static vm_vaddr_t exception_handlers; + static uint64_t page_align(struct kvm_vm *vm, uint64_t v) { return (v + vm->page_size) & ~(vm->page_size - 1); @@ -72,19 +74,19 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) return 1 << (vm->page_shift - 3); } -void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) +void virt_pgd_alloc(struct kvm_vm *vm) { if (!vm->pgd_created) { vm_paddr_t paddr = vm_phy_pages_alloc(vm, page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); vm->pgd = paddr; vm->pgd_created = true; } } -void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint32_t pgd_memslot, uint64_t flags) +static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint64_t flags) { uint8_t attr_idx = flags & 7; uint64_t *ptep; @@ -104,25 +106,19 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, paddr, vm->max_gfn, vm->page_size); ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; - if (!*ptep) { - *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); - *ptep |= 3; - } + if (!*ptep) + *ptep = vm_alloc_page_table(vm) | 3; switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; - if (!*ptep) { - *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); - *ptep |= 3; - } + if (!*ptep) + *ptep = vm_alloc_page_table(vm) | 3; /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; - if (!*ptep) { - *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); - *ptep |= 3; - } + if (!*ptep) + *ptep = vm_alloc_page_table(vm) | 3; /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -135,12 +131,11 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */; } -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint32_t pgd_memslot) +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */ - _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx); + _virt_pg_map(vm, vaddr, paddr, attr_idx); } vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) @@ -302,7 +297,7 @@ void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, DEFAULT_STACK_PGS * vm->page_size : vm->page_size; uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0); + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN); vm_vcpu_add(vm, vcpuid); aarch64_vcpu_setup(vm, vcpuid, init); @@ -334,6 +329,100 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } +void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec) +{ + ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec); + while (1) + ; +} + void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) { + struct ucall uc; + + if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED) + return; + + if (uc.args[2]) /* valid_ec */ { + assert(VECTOR_IS_SYNC(uc.args[0])); + TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)", + uc.args[0], uc.args[1]); + } else { + assert(!VECTOR_IS_SYNC(uc.args[0])); + TEST_FAIL("Unexpected exception (vector:0x%lx)", + uc.args[0]); + } +} + +struct handlers { + handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM]; +}; + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + extern char vectors; + + set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors); +} + +void route_exception(struct ex_regs *regs, int vector) +{ + struct handlers *handlers = (struct handlers *)exception_handlers; + bool valid_ec; + int ec = 0; + + switch (vector) { + case VECTOR_SYNC_CURRENT: + case VECTOR_SYNC_LOWER_64: + ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK; + valid_ec = true; + break; + case VECTOR_IRQ_CURRENT: + case VECTOR_IRQ_LOWER_64: + case VECTOR_FIQ_CURRENT: + case VECTOR_FIQ_LOWER_64: + case VECTOR_ERROR_CURRENT: + case VECTOR_ERROR_LOWER_64: + ec = 0; + valid_ec = false; + break; + default: + valid_ec = false; + goto unexpected_exception; + } + + if (handlers && handlers->exception_handlers[vector][ec]) + return handlers->exception_handlers[vector][ec](regs); + +unexpected_exception: + kvm_exit_unexpected_exception(vector, ec, valid_ec); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), + vm->page_size, 0, 0); + + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, + void (*handler)(struct ex_regs *)) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(VECTOR_IS_SYNC(vector)); + assert(vector < VECTOR_NUM); + assert(ec < ESR_EC_NUM); + handlers->exception_handlers[vector][ec] = handler; +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(!VECTOR_IS_SYNC(vector)); + assert(vector < VECTOR_NUM); + handlers->exception_handlers[vector][0] = handler; } diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index 2f37b90ee1a9..e0b0164e9af8 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -14,7 +14,7 @@ static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1)) return false; - virt_pg_map(vm, gpa, gpa, 0); + virt_pg_map(vm, gpa, gpa); ucall_exit_mmio_addr = (vm_vaddr_t *)gpa; sync_global_to_guest(vm, ucall_exit_mmio_addr); diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index bc75a91e00a6..eac44f5d0db0 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -111,8 +111,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp) * by the image and it needs to have sufficient available physical pages, to * back the virtual pages used to load the image. */ -void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, - uint32_t data_memslot, uint32_t pgd_memslot) +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) { off_t offset, offset_rv; Elf64_Ehdr hdr; @@ -164,8 +163,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart, - data_memslot, pgd_memslot); + vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart); TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" " segment idx: %u\n" diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a2b732cf96ea..5b56b57b3c20 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -365,7 +365,7 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, pages = vm_adjust_num_guest_pages(mode, pages); vm = vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + kvm_vm_elf_load(vm, program_invocation_name); #ifdef __x86_64__ vm_create_irqchip(vm); @@ -375,10 +375,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, uint32_t vcpuid = vcpuids ? vcpuids[i] : i; vm_vcpu_add_default(vm, vcpuid, guest_code); - -#ifdef __x86_64__ - vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); -#endif } return vm; @@ -1251,15 +1247,13 @@ va_found: * a unique set of pages, with the minimum real allocation being at least * a page. */ -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - uint32_t data_memslot, uint32_t pgd_memslot) +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) { uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); - virt_pgd_alloc(vm, pgd_memslot); + virt_pgd_alloc(vm); vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, - KVM_UTIL_MIN_PFN * vm->page_size, - data_memslot); + KVM_UTIL_MIN_PFN * vm->page_size, 0); /* * Find an unused range of virtual page addresses of at least @@ -1271,7 +1265,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, for (vm_vaddr_t vaddr = vaddr_start; pages > 0; pages--, vaddr += vm->page_size, paddr += vm->page_size) { - virt_pg_map(vm, vaddr, paddr, pgd_memslot); + virt_pg_map(vm, vaddr, paddr); sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); @@ -1281,6 +1275,44 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, } /* + * VM Virtual Address Allocate Pages + * + * Input Args: + * vm - Virtual Machine + * + * Output Args: None + * + * Return: + * Starting guest virtual address + * + * Allocates at least N system pages worth of bytes within the virtual address + * space of the vm. + */ +vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) +{ + return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); +} + +/* + * VM Virtual Address Allocate Page + * + * Input Args: + * vm - Virtual Machine + * + * Output Args: None + * + * Return: + * Starting guest virtual address + * + * Allocates at least one system page worth of bytes within the virtual address + * space of the vm. + */ +vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) +{ + return vm_vaddr_alloc_pages(vm, 1); +} + +/* * Map a range of VM virtual address to the VM's physical address * * Input Args: @@ -1298,7 +1330,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, * @npages starting at @vaddr to the page range starting at @paddr. */ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - unsigned int npages, uint32_t pgd_memslot) + unsigned int npages) { size_t page_size = vm->page_size; size_t size = npages * page_size; @@ -1307,7 +1339,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); while (npages--) { - virt_pg_map(vm, vaddr, paddr, pgd_memslot); + virt_pg_map(vm, vaddr, paddr); vaddr += page_size; paddr += page_size; } @@ -2177,6 +2209,14 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); } +/* Arbitrary minimum physical address used for virtual translation tables. */ +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) +{ + return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); +} + /* * Address Guest Virtual to Host Virtual * @@ -2286,3 +2326,15 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); return vm_adjust_num_guest_pages(mode, n); } + +int vm_get_stats_fd(struct kvm_vm *vm) +{ + return ioctl(vm->fd, KVM_GET_STATS_FD, NULL); +} + +int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + + return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL); +} diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 7397ca299835..b488f4aefea8 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -101,7 +101,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, guest_num_pages, 0); /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); ucall_init(vm, NULL); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 0152f356c099..f87c7137598e 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -9,11 +9,9 @@ #include "kvm_util.h" #include "../kvm_util_internal.h" -#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 - #define PAGES_PER_REGION 4 -void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot) +void virt_pgd_alloc(struct kvm_vm *vm) { vm_paddr_t paddr; @@ -24,7 +22,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot) return; paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot); + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); vm->pgd = paddr; @@ -36,12 +34,12 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot) * a page table (ri == 4). Returns a suitable region/segment table entry * which points to the freshly allocated pages. */ -static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot) +static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) { uint64_t taddr; taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot); + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size); return (taddr & REGION_ENTRY_ORIGIN) @@ -49,8 +47,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot) | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); } -void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa, - uint32_t memslot) +void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) { int ri, idx; uint64_t *entry; @@ -77,7 +74,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa, for (ri = 1; ri <= 4; ri++) { idx = (gva >> (64 - 11 * ri)) & 0x7ffu; if (entry[idx] & REGION_ENTRY_INVALID) - entry[idx] = virt_alloc_region(vm, ri, memslot); + entry[idx] = virt_alloc_region(vm, ri); entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); } @@ -170,7 +167,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) vm->page_size); stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); + DEFAULT_GUEST_STACK_VADDR_MIN); vm_vcpu_add(vm, vcpuid); diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c new file mode 100644 index 000000000000..7168e25c194e --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * tools/testing/selftests/kvm/lib/x86_64/processor.c + * + * Copyright (C) 2021, Google LLC. + */ + +#include "apic.h" + +void apic_disable(void) +{ + wrmsr(MSR_IA32_APICBASE, + rdmsr(MSR_IA32_APICBASE) & + ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD)); +} + +void xapic_enable(void) +{ + uint64_t val = rdmsr(MSR_IA32_APICBASE); + + /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */ + if (val & MSR_IA32_APICBASE_EXTD) { + apic_disable(); + wrmsr(MSR_IA32_APICBASE, + rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE); + } else if (!(val & MSR_IA32_APICBASE_ENABLE)) { + wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE); + } + + /* + * Per SDM: reset value of spurious interrupt vector register has the + * APIC software enabled bit=0. It must be enabled in addition to the + * enable bit in the MSR. + */ + val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED; + xapic_write_reg(APIC_SPIV, val); +} + +void x2apic_enable(void) +{ + wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) | + MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD); + x2apic_write_reg(APIC_SPIV, + x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index efe235044421..28cb881f440d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -17,13 +17,10 @@ #define DEFAULT_CODE_SELECTOR 0x8 #define DEFAULT_DATA_SELECTOR 0x10 -/* Minimum physical address used for virtual translation tables. */ -#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 - vm_vaddr_t exception_handlers; /* Virtual translation table structure declarations */ -struct pageMapL4Entry { +struct pageUpperEntry { uint64_t present:1; uint64_t writable:1; uint64_t user:1; @@ -33,37 +30,7 @@ struct pageMapL4Entry { uint64_t ignored_06:1; uint64_t page_size:1; uint64_t ignored_11_08:4; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t execute_disable:1; -}; - -struct pageDirectoryPointerEntry { - uint64_t present:1; - uint64_t writable:1; - uint64_t user:1; - uint64_t write_through:1; - uint64_t cache_disable:1; - uint64_t accessed:1; - uint64_t ignored_06:1; - uint64_t page_size:1; - uint64_t ignored_11_08:4; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t execute_disable:1; -}; - -struct pageDirectoryEntry { - uint64_t present:1; - uint64_t writable:1; - uint64_t user:1; - uint64_t write_through:1; - uint64_t cache_disable:1; - uint64_t accessed:1; - uint64_t ignored_06:1; - uint64_t page_size:1; - uint64_t ignored_11_08:4; - uint64_t address:40; + uint64_t pfn:40; uint64_t ignored_62_52:11; uint64_t execute_disable:1; }; @@ -79,7 +46,7 @@ struct pageTableEntry { uint64_t reserved_07:1; uint64_t global:1; uint64_t ignored_11_09:3; - uint64_t address:40; + uint64_t pfn:40; uint64_t ignored_62_52:11; uint64_t execute_disable:1; }; @@ -207,96 +174,211 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, } } -void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) +void virt_pgd_alloc(struct kvm_vm *vm) { TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); /* If needed, create page map l4 table. */ if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_page_alloc(vm, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); - vm->pgd = paddr; + vm->pgd = vm_alloc_page_table(vm); vm->pgd_created = true; } } -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint32_t pgd_memslot) +static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr, + int level) +{ + uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift); + int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu; + + return &page_table[index]; +} + +static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm, + uint64_t pt_pfn, + uint64_t vaddr, + uint64_t paddr, + int level, + enum x86_page_size page_size) +{ + struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level); + + if (!pte->present) { + pte->writable = true; + pte->present = true; + pte->page_size = (level == page_size); + if (pte->page_size) + pte->pfn = paddr >> vm->page_shift; + else + pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift; + } else { + /* + * Entry already present. Assert that the caller doesn't want + * a hugepage at this level, and that there isn't a hugepage at + * this level. + */ + TEST_ASSERT(level != page_size, + "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", + page_size, vaddr); + TEST_ASSERT(!pte->page_size, + "Cannot create page table at level: %u, vaddr: 0x%lx\n", + level, vaddr); + } + return pte; +} + +void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + enum x86_page_size page_size) +{ + const uint64_t pg_size = 1ull << ((page_size * 9) + 12); + struct pageUpperEntry *pml4e, *pdpe, *pde; + struct pageTableEntry *pte; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, + "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((vaddr % pg_size) == 0, + "Virtual address not aligned,\n" + "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % pg_size) == 0, + "Physical address not aligned,\n" + " paddr: 0x%lx page size: 0x%lx", paddr, pg_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + /* + * Allocate upper level page tables, if not already present. Return + * early if a hugepage was created. + */ + pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, + vaddr, paddr, 3, page_size); + if (pml4e->page_size) + return; + + pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size); + if (pdpe->page_size) + return; + + pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size); + if (pde->page_size) + return; + + /* Fill in page table entry. */ + pte = virt_get_pte(vm, pde->pfn, vaddr, 0); + TEST_ASSERT(!pte->present, + "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); + pte->pfn = paddr >> vm->page_shift; + pte->writable = true; + pte->present = 1; +} + +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K); +} + +static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, + uint64_t vaddr) { uint16_t index[4]; - struct pageMapL4Entry *pml4e; + struct pageUpperEntry *pml4e, *pdpe, *pde; + struct pageTableEntry *pte; + struct kvm_cpuid_entry2 *entry; + struct kvm_sregs sregs; + int max_phy_addr; + /* Set the bottom 52 bits. */ + uint64_t rsvd_mask = 0x000fffffffffffff; + + entry = kvm_get_supported_cpuid_index(0x80000008, 0); + max_phy_addr = entry->eax & 0x000000ff; + /* Clear the bottom bits of the reserved mask. */ + rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr; + + /* + * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries + * with 4-Level Paging and 5-Level Paging". + * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1, + * the XD flag (bit 63) is reserved. + */ + vcpu_sregs_get(vm, vcpuid, &sregs); + if ((sregs.efer & EFER_NX) == 0) { + rsvd_mask |= (1ull << 63); + } TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - TEST_ASSERT((vaddr % vm->page_size) == 0, - "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", - vaddr, vm->page_size); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), "Invalid virtual address, vaddr: 0x%lx", vaddr); - TEST_ASSERT((paddr % vm->page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", - paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); + /* + * Based on the mode check above there are 48 bits in the vaddr, so + * shift 16 to sign extend the last bit (bit-47), + */ + TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), + "Canonical check failed. The virtual address is invalid."); index[0] = (vaddr >> 12) & 0x1ffu; index[1] = (vaddr >> 21) & 0x1ffu; index[2] = (vaddr >> 30) & 0x1ffu; index[3] = (vaddr >> 39) & 0x1ffu; - /* Allocate page directory pointer table if not present. */ pml4e = addr_gpa2hva(vm, vm->pgd); - if (!pml4e[index[3]].present) { - pml4e[index[3]].address = vm_phy_page_alloc(vm, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) - >> vm->page_shift; - pml4e[index[3]].writable = true; - pml4e[index[3]].present = true; - } + TEST_ASSERT(pml4e[index[3]].present, + "Expected pml4e to be present for gva: 0x%08lx", vaddr); + TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) & + (rsvd_mask | (1ull << 7))) == 0, + "Unexpected reserved bits set."); + + pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size); + TEST_ASSERT(pdpe[index[2]].present, + "Expected pdpe to be present for gva: 0x%08lx", vaddr); + TEST_ASSERT(pdpe[index[2]].page_size == 0, + "Expected pdpe to map a pde not a 1-GByte page."); + TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0, + "Unexpected reserved bits set."); + + pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size); + TEST_ASSERT(pde[index[1]].present, + "Expected pde to be present for gva: 0x%08lx", vaddr); + TEST_ASSERT(pde[index[1]].page_size == 0, + "Expected pde to map a pte not a 2-MByte page."); + TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0, + "Unexpected reserved bits set."); + + pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size); + TEST_ASSERT(pte[index[0]].present, + "Expected pte to be present for gva: 0x%08lx", vaddr); + + return &pte[index[0]]; +} - /* Allocate page directory table if not present. */ - struct pageDirectoryPointerEntry *pdpe; - pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); - if (!pdpe[index[2]].present) { - pdpe[index[2]].address = vm_phy_page_alloc(vm, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) - >> vm->page_shift; - pdpe[index[2]].writable = true; - pdpe[index[2]].present = true; - } +uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr) +{ + struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr); - /* Allocate page table if not present. */ - struct pageDirectoryEntry *pde; - pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); - if (!pde[index[1]].present) { - pde[index[1]].address = vm_phy_page_alloc(vm, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) - >> vm->page_shift; - pde[index[1]].writable = true; - pde[index[1]].present = true; - } + return *(uint64_t *)pte; +} - /* Fill in page table entry. */ - struct pageTableEntry *pte; - pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); - pte[index[0]].address = paddr >> vm->page_shift; - pte[index[0]].writable = true; - pte[index[0]].present = 1; +void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr, + uint64_t pte) +{ + struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid, + vaddr); + + *(uint64_t *)new_pte = pte; } void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { - struct pageMapL4Entry *pml4e, *pml4e_start; - struct pageDirectoryPointerEntry *pdpe, *pdpe_start; - struct pageDirectoryEntry *pde, *pde_start; + struct pageUpperEntry *pml4e, *pml4e_start; + struct pageUpperEntry *pdpe, *pdpe_start; + struct pageUpperEntry *pde, *pde_start; struct pageTableEntry *pte, *pte_start; if (!vm->pgd_created) @@ -307,8 +389,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*s index hvaddr gpaddr " "addr w exec dirty\n", indent, ""); - pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, - vm->pgd); + pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd); for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { pml4e = &pml4e_start[n1]; if (!pml4e->present) @@ -317,11 +398,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) " %u\n", indent, "", pml4e - pml4e_start, pml4e, - addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, + addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn, pml4e->writable, pml4e->execute_disable); - pdpe_start = addr_gpa2hva(vm, pml4e->address - * vm->page_size); + pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size); for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { pdpe = &pdpe_start[n2]; if (!pdpe->present) @@ -331,11 +411,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) indent, "", pdpe - pdpe_start, pdpe, addr_hva2gpa(vm, pdpe), - (uint64_t) pdpe->address, pdpe->writable, + (uint64_t) pdpe->pfn, pdpe->writable, pdpe->execute_disable); - pde_start = addr_gpa2hva(vm, - pdpe->address * vm->page_size); + pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size); for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { pde = &pde_start[n3]; if (!pde->present) @@ -344,11 +423,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) "0x%-12lx 0x%-10lx %u %u\n", indent, "", pde - pde_start, pde, addr_hva2gpa(vm, pde), - (uint64_t) pde->address, pde->writable, + (uint64_t) pde->pfn, pde->writable, pde->execute_disable); - pte_start = addr_gpa2hva(vm, - pde->address * vm->page_size); + pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size); for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { pte = &pte_start[n4]; if (!pte->present) @@ -359,7 +437,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) indent, "", pte - pte_start, pte, addr_hva2gpa(vm, pte), - (uint64_t) pte->address, + (uint64_t) pte->pfn, pte->writable, pte->execute_disable, pte->dirty, @@ -480,9 +558,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint16_t index[4]; - struct pageMapL4Entry *pml4e; - struct pageDirectoryPointerEntry *pdpe; - struct pageDirectoryEntry *pde; + struct pageUpperEntry *pml4e, *pdpe, *pde; struct pageTableEntry *pte; TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " @@ -499,43 +575,39 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) if (!pml4e[index[3]].present) goto unmapped_gva; - pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size); if (!pdpe[index[2]].present) goto unmapped_gva; - pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size); if (!pde[index[1]].present) goto unmapped_gva; - pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size); if (!pte[index[0]].present) goto unmapped_gva; - return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); + return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu); unmapped_gva: TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); exit(EXIT_FAILURE); } -static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, - int pgd_memslot) +static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) { if (!vm->gdt) - vm->gdt = vm_vaddr_alloc(vm, getpagesize(), - KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); + vm->gdt = vm_vaddr_alloc_page(vm); dt->base = vm->gdt; dt->limit = getpagesize(); } static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, - int selector, int gdt_memslot, - int pgd_memslot) + int selector) { if (!vm->tss) - vm->tss = vm_vaddr_alloc(vm, getpagesize(), - KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); + vm->tss = vm_vaddr_alloc_page(vm); memset(segp, 0, sizeof(*segp)); segp->base = vm->tss; @@ -546,7 +618,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, kvm_seg_fill_gdt_64bit(vm, segp); } -static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) +static void vcpu_setup(struct kvm_vm *vm, int vcpuid) { struct kvm_sregs sregs; @@ -555,7 +627,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m sregs.idt.limit = 0; - kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); + kvm_setup_gdt(vm, &sregs.gdt); switch (vm->mode) { case VM_MODE_PXXV48_4K: @@ -567,7 +639,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); - kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); + kvm_setup_tss_64bit(vm, &sregs.tr, 0x18); break; default: @@ -584,11 +656,11 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) struct kvm_regs regs; vm_vaddr_t stack_vaddr; stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); + DEFAULT_GUEST_STACK_VADDR_MIN); /* Create VCPU */ vm_vcpu_add(vm, vcpuid); - vcpu_setup(vm, vcpuid, 0, 0); + vcpu_setup(vm, vcpuid); /* Setup guest general purpose registers */ vcpu_regs_get(vm, vcpuid, ®s); @@ -600,6 +672,9 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) /* Setup the MP state */ mp_state.mp_state = 0; vcpu_set_mp_state(vm, vcpuid, &mp_state); + + /* Setup supported CPUIDs */ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); } /* @@ -1201,7 +1276,7 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, void kvm_exit_unexpected_vector(uint32_t value) { - outl(UNEXPECTED_VECTOR_PORT, value); + ucall(UCALL_UNHANDLED, 1, value); } void route_exception(struct ex_regs *regs) @@ -1222,8 +1297,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm) extern void *idt_handlers; int i; - vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0); - vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0); + vm->idt = vm_vaddr_alloc_page(vm); + vm->handlers = vm_vaddr_alloc_page(vm); /* Handlers have the same address in both address spaces.*/ for (i = 0; i < NUM_INTERRUPTS; i++) set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, @@ -1244,8 +1319,8 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } -void vm_handle_exception(struct kvm_vm *vm, int vector, - void (*handler)(struct ex_regs *)) +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) { vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); @@ -1254,16 +1329,13 @@ void vm_handle_exception(struct kvm_vm *vm, int vector, void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) { - if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO - && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT - && vcpu_state(vm, vcpuid)->io.size == 4) { - /* Grab pointer to io data */ - uint32_t *data = (void *)vcpu_state(vm, vcpuid) - + vcpu_state(vm, vcpuid)->io.data_offset; - - TEST_ASSERT(false, - "Unexpected vectored event in guest (vector:0x%x)", - *data); + struct ucall uc; + + if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) { + uint64_t vector = uc.args[0]; + + TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)", + vector); } } diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c index 827fe6028dd4..2ac98d70d02b 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -30,17 +30,14 @@ u64 rflags; struct svm_test_data * vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) { - vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(), - 0x10000, 0, 0); + vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm); struct svm_test_data *svm = addr_gva2hva(vm, svm_gva); - svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(), - 0x10000, 0, 0); + svm->vmcb = (void *)vm_vaddr_alloc_page(vm); svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb); svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb); - svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(), - 0x10000, 0, 0); + svm->save_area = (void *)vm_vaddr_alloc_page(vm); svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area); svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 2448b30e8efa..d089d8b850b5 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -77,50 +77,48 @@ int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id) struct vmx_pages * vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) { - vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm); struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); /* Setup of a region of guest memory for the vmxon region. */ - vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmxon = (void *)vm_vaddr_alloc_page(vm); vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); /* Setup of a region of guest memory for a vmcs. */ - vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmcs = (void *)vm_vaddr_alloc_page(vm); vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); /* Setup of a region of guest memory for the MSR bitmap. */ - vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->msr = (void *)vm_vaddr_alloc_page(vm); vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); memset(vmx->msr_hva, 0, getpagesize()); /* Setup of a region of guest memory for the shadow VMCS. */ - vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm); vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ - vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmread = (void *)vm_vaddr_alloc_page(vm); vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); memset(vmx->vmread_hva, 0, getpagesize()); - vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm); vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); /* Setup of a region of guest memory for the VP Assist page. */ - vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(), - 0x10000, 0, 0); + vmx->vp_assist = (void *)vm_vaddr_alloc_page(vm); vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist); vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist); /* Setup of a region of guest memory for the enlightened VMCS. */ - vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), - 0x10000, 0, 0); + vmx->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); vmx->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs); vmx->enlightened_vmcs_gpa = @@ -395,7 +393,7 @@ void nested_vmx_check_supported(void) } void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot) + uint64_t nested_paddr, uint64_t paddr) { uint16_t index[4]; struct eptPageTableEntry *pml4e; @@ -428,9 +426,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, /* Allocate page directory pointer table if not present. */ pml4e = vmx->eptp_hva; if (!pml4e[index[3]].readable) { - pml4e[index[3]].address = vm_phy_page_alloc(vm, - KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) - >> vm->page_shift; + pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift; pml4e[index[3]].writable = true; pml4e[index[3]].readable = true; pml4e[index[3]].executable = true; @@ -440,9 +436,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, struct eptPageTableEntry *pdpe; pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); if (!pdpe[index[2]].readable) { - pdpe[index[2]].address = vm_phy_page_alloc(vm, - KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) - >> vm->page_shift; + pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift; pdpe[index[2]].writable = true; pdpe[index[2]].readable = true; pdpe[index[2]].executable = true; @@ -452,9 +446,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, struct eptPageTableEntry *pde; pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); if (!pde[index[1]].readable) { - pde[index[1]].address = vm_phy_page_alloc(vm, - KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) - >> vm->page_shift; + pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift; pde[index[1]].writable = true; pde[index[1]].readable = true; pde[index[1]].executable = true; @@ -494,8 +486,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, * page range starting at nested_paddr to the page range starting at paddr. */ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size, - uint32_t eptp_memslot) + uint64_t nested_paddr, uint64_t paddr, uint64_t size) { size_t page_size = vm->page_size; size_t npages = size / page_size; @@ -504,7 +495,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); while (npages--) { - nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot); + nested_pg_map(vmx, vm, nested_paddr, paddr); nested_paddr += page_size; paddr += page_size; } @@ -514,7 +505,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, * physical pages in VM. */ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t memslot, uint32_t eptp_memslot) + uint32_t memslot) { sparsebit_idx_t i, last; struct userspace_mem_region *region = @@ -530,24 +521,21 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, nested_map(vmx, vm, (uint64_t)i << vm->page_shift, (uint64_t)i << vm->page_shift, - 1 << vm->page_shift, - eptp_memslot); + 1 << vm->page_shift); } } void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot) { - vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->eptp = (void *)vm_vaddr_alloc_page(vm); vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); } -void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot) +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm) { - vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(), - 0x10000, 0, 0); + vmx->apic_access = (void *)vm_vaddr_alloc_page(vm); vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); } diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 11239652d805..d6e381e01db7 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -306,7 +306,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, guest_addr += npages * 4096; } - virt_map(data->vm, MEM_GPA, MEM_GPA, mempages, 0); + virt_map(data->vm, MEM_GPA, MEM_GPA, mempages); sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); atomic_init(&sync->start_flag, false); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index d8812f27648c..85b18bb8f762 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -132,7 +132,7 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code) gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT); TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); - virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0); + virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2); /* Ditto for the host mapping so that both pages can be zeroed. */ hva = addr_gpa2hva(vm, MEM_REGION_GPA); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index fcc840088c91..b0031f2d38fd 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -73,8 +73,6 @@ static void steal_time_init(struct kvm_vm *vm) for (i = 0; i < NR_VCPUS; ++i) { int ret; - vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid()); - /* ST_GPA_BASE is identity mapped */ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); sync_global_to_guest(vm, st_gva[i]); @@ -295,7 +293,7 @@ int main(int ac, char **av) vm = vm_create_default(0, 0, guest_code); gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); - virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0); + virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); ucall_init(vm, NULL); /* Add the rest of the VCPUs */ diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c new file mode 100644 index 000000000000..f070ff0224fa --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +#define VCPU_ID 1 +#define PAGE_SIZE 4096 +#define MAXPHYADDR 36 + +#define MEM_REGION_GVA 0x0000123456789000 +#define MEM_REGION_GPA 0x0000000700000000 +#define MEM_REGION_SLOT 10 +#define MEM_REGION_SIZE PAGE_SIZE + +static void guest_code(void) +{ + __asm__ __volatile__("flds (%[addr])" + :: [addr]"r"(MEM_REGION_GVA)); + + GUEST_DONE(); +} + +static void run_guest(struct kvm_vm *vm) +{ + int rc; + + rc = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); +} + +/* + * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2, + * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)". + */ +#define GET_RM(insn_byte) (insn_byte & 0x7) +#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3) +#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6) + +/* Ensure we are dealing with a simple 2-byte flds instruction. */ +static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size) +{ + return insn_size >= 2 && + insn_bytes[0] == 0xd9 && + GET_REG(insn_bytes[1]) == 0x0 && + GET_MOD(insn_bytes[1]) == 0x0 && + /* Ensure there is no SIB byte. */ + GET_RM(insn_bytes[1]) != 0x4 && + /* Ensure there is no displacement byte. */ + GET_RM(insn_bytes[1]) != 0x5; +} + +static void process_exit_on_emulation_error(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_regs regs; + uint8_t *insn_bytes; + uint8_t insn_size; + uint64_t flags; + + TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, + "Unexpected suberror: %u", + run->emulation_failure.suberror); + + if (run->emulation_failure.ndata >= 1) { + flags = run->emulation_failure.flags; + if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) && + run->emulation_failure.ndata >= 3) { + insn_size = run->emulation_failure.insn_size; + insn_bytes = run->emulation_failure.insn_bytes; + + TEST_ASSERT(insn_size <= 15 && insn_size > 0, + "Unexpected instruction size: %u", + insn_size); + + TEST_ASSERT(is_flds(insn_bytes, insn_size), + "Unexpected instruction. Expected 'flds' (0xd9 /0)"); + + /* + * If is_flds() succeeded then the instruction bytes + * contained an flds instruction that is 2-bytes in + * length (ie: no prefix, no SIB, no displacement). + */ + vcpu_regs_get(vm, VCPU_ID, ®s); + regs.rip += 2; + vcpu_regs_set(vm, VCPU_ID, ®s); + } + } +} + +static void do_guest_assert(struct kvm_vm *vm, struct ucall *uc) +{ + TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], __FILE__, + uc->args[1]); +} + +static void check_for_guest_assert(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + if (run->exit_reason == KVM_EXIT_IO && + get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) { + do_guest_assert(vm, &uc); + } +} + +static void process_ucall_done(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + check_for_guest_assert(vm); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE, + "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", + uc.cmd, UCALL_DONE); +} + +static uint64_t process_ucall(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + break; + case UCALL_ABORT: + do_guest_assert(vm, &uc); + break; + case UCALL_DONE: + process_ucall_done(vm); + break; + default: + TEST_ASSERT(false, "Unexpected ucall"); + } + + return uc.cmd; +} + +int main(int argc, char *argv[]) +{ + struct kvm_enable_cap emul_failure_cap = { + .cap = KVM_CAP_EXIT_ON_EMULATION_FAILURE, + .args[0] = 1, + }; + struct kvm_cpuid_entry2 *entry; + struct kvm_cpuid2 *cpuid; + struct kvm_vm *vm; + uint64_t gpa, pte; + uint64_t *hva; + int rc; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + vm = vm_create_default(VCPU_ID, 0, guest_code); + + if (!kvm_check_cap(KVM_CAP_SMALLER_MAXPHYADDR)) { + printf("module parameter 'allow_smaller_maxphyaddr' is not set. Skipping test.\n"); + return 0; + } + + cpuid = kvm_get_supported_cpuid(); + + entry = kvm_get_supported_cpuid_index(0x80000008, 0); + entry->eax = (entry->eax & 0xffffff00) | MAXPHYADDR; + set_cpuid(cpuid, entry); + + vcpu_set_cpuid(vm, VCPU_ID, cpuid); + + rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); + TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); + vm_enable_cap(vm, &emul_failure_cap); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + MEM_REGION_GPA, MEM_REGION_SLOT, + MEM_REGION_SIZE / PAGE_SIZE, 0); + gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, + MEM_REGION_GPA, MEM_REGION_SLOT); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); + virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); + memset(hva, 0, PAGE_SIZE); + pte = vm_get_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA); + vm_set_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA, pte | (1ull << 36)); + + run_guest(vm); + process_exit_on_emulation_error(vm); + run_guest(vm); + + TEST_ASSERT(process_ucall(vm) == UCALL_DONE, "Expected UCALL_DONE"); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 63096cea26c6..2b46dcca86a8 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -22,15 +22,6 @@ static int ud_count; -void enable_x2apic(void) -{ - uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4); - - wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) | - MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD); - wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED); -} - static void guest_ud_handler(struct ex_regs *regs) { ud_count++; @@ -59,7 +50,7 @@ void guest_code(struct vmx_pages *vmx_pages) #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - enable_x2apic(); + x2apic_enable(); GUEST_SYNC(1); GUEST_SYNC(2); @@ -121,14 +112,38 @@ void inject_nmi(struct kvm_vm *vm) vcpu_events_set(vm, VCPU_ID, &events); } +static void save_restore_vm(struct kvm_vm *vm) +{ + struct kvm_regs regs1, regs2; + struct kvm_x86_state *state; + + state = vcpu_save_state(vm, VCPU_ID); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID); + vcpu_set_hv_cpuid(vm, VCPU_ID); + vcpu_enable_evmcs(vm, VCPU_ID); + vcpu_load_state(vm, VCPU_ID, state); + free(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vm, VCPU_ID, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); +} + int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva = 0; - struct kvm_regs regs1, regs2; struct kvm_vm *vm; struct kvm_run *run; - struct kvm_x86_state *state; struct ucall uc; int stage; @@ -145,21 +160,18 @@ int main(int argc, char *argv[]) vcpu_set_hv_cpuid(vm, VCPU_ID); vcpu_enable_evmcs(vm, VCPU_ID); - run = vcpu_state(vm, VCPU_ID); - - vcpu_regs_get(vm, VCPU_ID, ®s1); - vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); - vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); pr_info("Running L1 which uses EVMCS to run L2\n"); for (stage = 1;; stage++) { + run = vcpu_state(vm, VCPU_ID); _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Stage %d: unexpected exit reason: %u (%s),\n", @@ -184,32 +196,23 @@ int main(int argc, char *argv[]) uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", stage, (ulong)uc.args[1]); - state = vcpu_save_state(vm, VCPU_ID); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); - - kvm_vm_release(vm); - - /* Restore state in a new VM. */ - kvm_vm_restart(vm, O_RDWR); - vm_vcpu_add(vm, VCPU_ID); - vcpu_set_hv_cpuid(vm, VCPU_ID); - vcpu_enable_evmcs(vm, VCPU_ID); - vcpu_load_state(vm, VCPU_ID, state); - run = vcpu_state(vm, VCPU_ID); - free(state); - - memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vm, VCPU_ID, ®s2); - TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), - "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", - (ulong) regs2.rdi, (ulong) regs2.rsi); + save_restore_vm(vm); /* Force immediate L2->L1 exit before resuming */ if (stage == 8) { pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n"); inject_nmi(vm); } + + /* + * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly + * restored VM (before the first KVM_RUN) to check that + * KVM_STATE_NESTED_EVMCS is not lost. + */ + if (stage == 9) { + pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n"); + save_restore_vm(vm); + } } done: diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c index 8c77537af5a1..a711f83749ea 100644 --- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c @@ -145,8 +145,7 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid) { int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]); - vm_vaddr_t gva = vm_vaddr_alloc(vm, size, - getpagesize(), 0, 0); + vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR); struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva); memcpy(guest_cpuids, cpuid, size); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index 7f1d2765572c..bab10ae787b6 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -7,6 +7,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "hyperv.h" struct ms_hyperv_tsc_page { volatile u32 tsc_sequence; @@ -15,13 +16,6 @@ struct ms_hyperv_tsc_page { volatile s64 tsc_offset; } __packed; -#define HV_X64_MSR_GUEST_OS_ID 0x40000000 -#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 -#define HV_X64_MSR_REFERENCE_TSC 0x40000021 -#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 -#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 -#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 - /* Simplified mul_u64_u64_shr() */ static inline u64 mul_u64_u64_shr64(u64 a, u64 b) { @@ -220,7 +214,7 @@ int main(void) vcpu_set_hv_cpuid(vm, VCPU_ID); - tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + tsc_page_gva = vm_vaddr_alloc_page(vm); memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned\n"); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c new file mode 100644 index 000000000000..42bd658f52a8 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -0,0 +1,649 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Red Hat, Inc. + * + * Tests for Hyper-V features enablement + */ +#include <asm/kvm_para.h> +#include <linux/kvm_para.h> +#include <stdint.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "hyperv.h" + +#define VCPU_ID 0 +#define LINUX_OS_ID ((u64)0x8100 << 48) + +extern unsigned char rdmsr_start; +extern unsigned char rdmsr_end; + +static u64 do_rdmsr(u32 idx) +{ + u32 lo, hi; + + asm volatile("rdmsr_start: rdmsr;" + "rdmsr_end:" + : "=a"(lo), "=c"(hi) + : "c"(idx)); + + return (((u64) hi) << 32) | lo; +} + +extern unsigned char wrmsr_start; +extern unsigned char wrmsr_end; + +static void do_wrmsr(u32 idx, u64 val) +{ + u32 lo, hi; + + lo = val; + hi = val >> 32; + + asm volatile("wrmsr_start: wrmsr;" + "wrmsr_end:" + : : "a"(lo), "c"(idx), "d"(hi)); +} + +static int nr_gp; + +static inline u64 hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address) +{ + u64 hv_status; + + asm volatile("mov %3, %%r8\n" + "vmcall" + : "=a" (hv_status), + "+c" (control), "+d" (input_address) + : "r" (output_address) + : "cc", "memory", "r8", "r9", "r10", "r11"); + + return hv_status; +} + +static void guest_gp_handler(struct ex_regs *regs) +{ + unsigned char *rip = (unsigned char *)regs->rip; + bool r, w; + + r = rip == &rdmsr_start; + w = rip == &wrmsr_start; + GUEST_ASSERT(r || w); + + nr_gp++; + + if (r) + regs->rip = (uint64_t)&rdmsr_end; + else + regs->rip = (uint64_t)&wrmsr_end; +} + +struct msr_data { + uint32_t idx; + bool available; + bool write; + u64 write_val; +}; + +struct hcall_data { + uint64_t control; + uint64_t expect; +}; + +static void guest_msr(struct msr_data *msr) +{ + int i = 0; + + while (msr->idx) { + WRITE_ONCE(nr_gp, 0); + if (!msr->write) + do_rdmsr(msr->idx); + else + do_wrmsr(msr->idx, msr->write_val); + + if (msr->available) + GUEST_ASSERT(READ_ONCE(nr_gp) == 0); + else + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + + GUEST_SYNC(i++); + } + + GUEST_DONE(); +} + +static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) +{ + int i = 0; + + wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); + + while (hcall->control) { + GUEST_ASSERT(hypercall(hcall->control, pgs_gpa, + pgs_gpa + 4096) == hcall->expect); + GUEST_SYNC(i++); + } + + GUEST_DONE(); +} + +static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 *feat, + struct kvm_cpuid_entry2 *recomm, + struct kvm_cpuid_entry2 *dbg) +{ + TEST_ASSERT(set_cpuid(cpuid, feat), + "failed to set KVM_CPUID_FEATURES leaf"); + TEST_ASSERT(set_cpuid(cpuid, recomm), + "failed to set HYPERV_CPUID_ENLIGHTMENT_INFO leaf"); + TEST_ASSERT(set_cpuid(cpuid, dbg), + "failed to set HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES leaf"); + vcpu_set_cpuid(vm, VCPU_ID, cpuid); +} + +static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, + struct kvm_cpuid2 *best) +{ + struct kvm_run *run; + struct ucall uc; + int stage = 0, r; + struct kvm_cpuid_entry2 feat = { + .function = HYPERV_CPUID_FEATURES + }; + struct kvm_cpuid_entry2 recomm = { + .function = HYPERV_CPUID_ENLIGHTMENT_INFO + }; + struct kvm_cpuid_entry2 dbg = { + .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES + }; + struct kvm_enable_cap cap = {0}; + + run = vcpu_state(vm, VCPU_ID); + + while (true) { + switch (stage) { + case 0: + /* + * Only available when Hyper-V identification is set + */ + msr->idx = HV_X64_MSR_GUEST_OS_ID; + msr->write = 0; + msr->available = 0; + break; + case 1: + msr->idx = HV_X64_MSR_HYPERCALL; + msr->write = 0; + msr->available = 0; + break; + case 2: + feat.eax |= HV_MSR_HYPERCALL_AVAILABLE; + /* + * HV_X64_MSR_GUEST_OS_ID has to be written first to make + * HV_X64_MSR_HYPERCALL available. + */ + msr->idx = HV_X64_MSR_GUEST_OS_ID; + msr->write = 1; + msr->write_val = LINUX_OS_ID; + msr->available = 1; + break; + case 3: + msr->idx = HV_X64_MSR_GUEST_OS_ID; + msr->write = 0; + msr->available = 1; + break; + case 4: + msr->idx = HV_X64_MSR_HYPERCALL; + msr->write = 0; + msr->available = 1; + break; + + case 5: + msr->idx = HV_X64_MSR_VP_RUNTIME; + msr->write = 0; + msr->available = 0; + break; + case 6: + feat.eax |= HV_MSR_VP_RUNTIME_AVAILABLE; + msr->write = 0; + msr->available = 1; + break; + case 7: + /* Read only */ + msr->write = 1; + msr->write_val = 1; + msr->available = 0; + break; + + case 8: + msr->idx = HV_X64_MSR_TIME_REF_COUNT; + msr->write = 0; + msr->available = 0; + break; + case 9: + feat.eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; + msr->write = 0; + msr->available = 1; + break; + case 10: + /* Read only */ + msr->write = 1; + msr->write_val = 1; + msr->available = 0; + break; + + case 11: + msr->idx = HV_X64_MSR_VP_INDEX; + msr->write = 0; + msr->available = 0; + break; + case 12: + feat.eax |= HV_MSR_VP_INDEX_AVAILABLE; + msr->write = 0; + msr->available = 1; + break; + case 13: + /* Read only */ + msr->write = 1; + msr->write_val = 1; + msr->available = 0; + break; + + case 14: + msr->idx = HV_X64_MSR_RESET; + msr->write = 0; + msr->available = 0; + break; + case 15: + feat.eax |= HV_MSR_RESET_AVAILABLE; + msr->write = 0; + msr->available = 1; + break; + case 16: + msr->write = 1; + msr->write_val = 0; + msr->available = 1; + break; + + case 17: + msr->idx = HV_X64_MSR_REFERENCE_TSC; + msr->write = 0; + msr->available = 0; + break; + case 18: + feat.eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; + msr->write = 0; + msr->available = 1; + break; + case 19: + msr->write = 1; + msr->write_val = 0; + msr->available = 1; + break; + + case 20: + msr->idx = HV_X64_MSR_EOM; + msr->write = 0; + msr->available = 0; + break; + case 21: + /* + * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2 + * capability enabled and guest visible CPUID bit unset. + */ + cap.cap = KVM_CAP_HYPERV_SYNIC2; + vcpu_enable_cap(vm, VCPU_ID, &cap); + break; + case 22: + feat.eax |= HV_MSR_SYNIC_AVAILABLE; + msr->write = 0; + msr->available = 1; + break; + case 23: + msr->write = 1; + msr->write_val = 0; + msr->available = 1; + break; + + case 24: + msr->idx = HV_X64_MSR_STIMER0_CONFIG; + msr->write = 0; + msr->available = 0; + break; + case 25: + feat.eax |= HV_MSR_SYNTIMER_AVAILABLE; + msr->write = 0; + msr->available = 1; + break; + case 26: + msr->write = 1; + msr->write_val = 0; + msr->available = 1; + break; + case 27: + /* Direct mode test */ + msr->write = 1; + msr->write_val = 1 << 12; + msr->available = 0; + break; + case 28: + feat.edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; + msr->available = 1; + break; + + case 29: + msr->idx = HV_X64_MSR_EOI; + msr->write = 0; + msr->available = 0; + break; + case 30: + feat.eax |= HV_MSR_APIC_ACCESS_AVAILABLE; + msr->write = 1; + msr->write_val = 1; + msr->available = 1; + break; + + case 31: + msr->idx = HV_X64_MSR_TSC_FREQUENCY; + msr->write = 0; + msr->available = 0; + break; + case 32: + feat.eax |= HV_ACCESS_FREQUENCY_MSRS; + msr->write = 0; + msr->available = 1; + break; + case 33: + /* Read only */ + msr->write = 1; + msr->write_val = 1; + msr->available = 0; + break; + + case 34: + msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; + msr->write = 0; + msr->available = 0; + break; + case 35: + feat.eax |= HV_ACCESS_REENLIGHTENMENT; + msr->write = 0; + msr->available = 1; + break; + case 36: + msr->write = 1; + msr->write_val = 1; + msr->available = 1; + break; + case 37: + /* Can only write '0' */ + msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS; + msr->write = 1; + msr->write_val = 1; + msr->available = 0; + break; + + case 38: + msr->idx = HV_X64_MSR_CRASH_P0; + msr->write = 0; + msr->available = 0; + break; + case 39: + feat.edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + msr->write = 0; + msr->available = 1; + break; + case 40: + msr->write = 1; + msr->write_val = 1; + msr->available = 1; + break; + + case 41: + msr->idx = HV_X64_MSR_SYNDBG_STATUS; + msr->write = 0; + msr->available = 0; + break; + case 42: + feat.edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; + dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + msr->write = 0; + msr->available = 1; + break; + case 43: + msr->write = 1; + msr->write_val = 0; + msr->available = 1; + break; + + case 44: + /* END */ + msr->idx = 0; + break; + } + + hv_set_cpuid(vm, best, &feat, &recomm, &dbg); + + if (msr->idx) + pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage, + msr->idx, msr->write ? "write" : "read"); + else + pr_debug("Stage %d: finish\n", stage); + + r = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + return; + case UCALL_DONE: + return; + } + + stage++; + } +} + +static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall, + void *input, void *output, struct kvm_cpuid2 *best) +{ + struct kvm_run *run; + struct ucall uc; + int stage = 0, r; + struct kvm_cpuid_entry2 feat = { + .function = HYPERV_CPUID_FEATURES, + .eax = HV_MSR_HYPERCALL_AVAILABLE + }; + struct kvm_cpuid_entry2 recomm = { + .function = HYPERV_CPUID_ENLIGHTMENT_INFO + }; + struct kvm_cpuid_entry2 dbg = { + .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES + }; + + run = vcpu_state(vm, VCPU_ID); + + while (true) { + switch (stage) { + case 0: + hcall->control = 0xdeadbeef; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; + break; + + case 1: + hcall->control = HVCALL_POST_MESSAGE; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 2: + feat.ebx |= HV_POST_MESSAGES; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + + case 3: + hcall->control = HVCALL_SIGNAL_EVENT; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 4: + feat.ebx |= HV_SIGNAL_EVENTS; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + + case 5: + hcall->control = HVCALL_RESET_DEBUG_SESSION; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; + break; + case 6: + dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 7: + feat.ebx |= HV_DEBUGGING; + hcall->expect = HV_STATUS_OPERATION_DENIED; + break; + + case 8: + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 9: + recomm.eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; + hcall->expect = HV_STATUS_SUCCESS; + break; + case 10: + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 11: + recomm.eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 12: + hcall->control = HVCALL_SEND_IPI; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 13: + recomm.eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + case 14: + /* Nothing in 'sparse banks' -> success */ + hcall->control = HVCALL_SEND_IPI_EX; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 15: + hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 16: + recomm.ebx = 0xfff; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 17: + /* END */ + hcall->control = 0; + break; + } + + hv_set_cpuid(vm, best, &feat, &recomm, &dbg); + + if (hcall->control) + pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, + hcall->control); + else + pr_debug("Stage %d: finish\n", stage); + + r = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + return; + case UCALL_DONE: + return; + } + + stage++; + } +} + +int main(void) +{ + struct kvm_cpuid2 *best; + struct kvm_vm *vm; + vm_vaddr_t msr_gva, hcall_page, hcall_params; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, + .args = {1} + }; + + /* Test MSRs */ + vm = vm_create_default(VCPU_ID, 0, guest_msr); + + msr_gva = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); + vcpu_args_set(vm, VCPU_ID, 1, msr_gva); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + + pr_info("Testing access to Hyper-V specific MSRs\n"); + guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva), + best); + kvm_vm_free(vm); + + /* Test hypercalls */ + vm = vm_create_default(VCPU_ID, 0, guest_hcall); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + hcall_params = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); + + vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); + + pr_info("Testing access to Hyper-V hypercalls\n"); + guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params), + addr_gva2hva(vm, hcall_page), + addr_gva2hva(vm, hcall_page) + getpagesize(), + best); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index 732b244d6956..04ed975662c9 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -227,7 +227,7 @@ int main(void) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); enter_guest(vm); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c new file mode 100644 index 000000000000..523371cf8e8f --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 1 + +#define MMIO_GPA 0x100000000ull + +static void guest_code(void) +{ + (void)READ_ONCE(*((uint64_t *)MMIO_GPA)); + (void)READ_ONCE(*((uint64_t *)MMIO_GPA)); + + GUEST_ASSERT(0); +} + +static void guest_pf_handler(struct ex_regs *regs) +{ + /* PFEC == RSVD | PRESENT (read, kernel). */ + GUEST_ASSERT(regs->error_code == 0x9); + GUEST_DONE(); +} + +static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val) +{ + u32 good_cpuid_val = *cpuid_reg; + struct kvm_run *run; + struct kvm_vm *vm; + uint64_t cmd; + int r; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + run = vcpu_state(vm, VCPU_ID); + + /* Map 1gb page without a backing memlot. */ + __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G); + + r = _vcpu_run(vm, VCPU_ID); + + /* Guest access to the 1gb page should trigger MMIO. */ + TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r); + TEST_ASSERT(run->exit_reason == KVM_EXIT_MMIO, + "Unexpected exit reason: %u (%s), expected MMIO exit (1gb page w/o memslot)\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + + TEST_ASSERT(run->mmio.len == 8, "Unexpected exit mmio size = %u", run->mmio.len); + + TEST_ASSERT(run->mmio.phys_addr == MMIO_GPA, + "Unexpected exit mmio address = 0x%llx", run->mmio.phys_addr); + + /* + * Effect the CPUID change for the guest and re-enter the guest. Its + * access should now #PF due to the PAGE_SIZE bit being reserved or + * the resulting GPA being invalid. Note, kvm_get_supported_cpuid() + * returns the struct that contains the entry being modified. Eww. + */ + *cpuid_reg = evil_cpuid_val; + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + /* + * Add a dummy memslot to coerce KVM into bumping the MMIO generation. + * KVM does not "officially" support mucking with CPUID after KVM_RUN, + * and will incorrectly reuse MMIO SPTEs. Don't delete the memslot! + * KVM x86 zaps all shadow pages on memslot deletion. + */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + MMIO_GPA << 1, 10, 1, 0); + + /* Set up a #PF handler to eat the RSVD #PF and signal all done! */ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, PF_VECTOR, guest_pf_handler); + + r = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r); + + cmd = get_ucall(vm, VCPU_ID, NULL); + TEST_ASSERT(cmd == UCALL_DONE, + "Unexpected guest exit, exit_reason=%s, ucall.cmd = %lu\n", + exit_reason_str(run->exit_reason), cmd); + + /* + * Restore the happy CPUID value for the next test. Yes, changes are + * indeed persistent across VM destruction. + */ + *cpuid_reg = good_cpuid_val; + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_cpuid_entry2 *entry; + int opt; + + /* + * All tests are opt-in because TDP doesn't play nice with reserved #PF + * in the GVA->GPA translation. The hardware page walker doesn't let + * software change GBPAGES or MAXPHYADDR, and KVM doesn't manually walk + * the GVA on fault for performance reasons. + */ + bool do_gbpages = false; + bool do_maxphyaddr = false; + + setbuf(stdout, NULL); + + while ((opt = getopt(argc, argv, "gm")) != -1) { + switch (opt) { + case 'g': + do_gbpages = true; + break; + case 'm': + do_maxphyaddr = true; + break; + case 'h': + default: + printf("usage: %s [-g (GBPAGES)] [-m (MAXPHYADDR)]\n", argv[0]); + break; + } + } + + if (!do_gbpages && !do_maxphyaddr) { + print_skip("No sub-tests selected"); + return 0; + } + + entry = kvm_get_supported_cpuid_entry(0x80000001); + if (!(entry->edx & CPUID_GBPAGES)) { + print_skip("1gb hugepages not supported"); + return 0; + } + + if (do_gbpages) { + pr_info("Test MMIO after toggling CPUID.GBPAGES\n\n"); + mmu_role_test(&entry->edx, entry->edx & ~CPUID_GBPAGES); + } + + if (do_maxphyaddr) { + pr_info("Test MMIO after changing CPUID.MAXPHYADDR\n\n"); + entry = kvm_get_supported_cpuid_entry(0x80000008); + mmu_role_test(&entry->eax, (entry->eax & ~0xff) | 0x20); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c index 12c558fc8074..ae76436af0cc 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -14,16 +14,12 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "apic.h" #define N_VCPU 2 #define VCPU_ID0 0 #define VCPU_ID1 1 -static uint32_t get_bsp_flag(void) -{ - return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; -} - static void guest_bsp_vcpu(void *arg) { GUEST_SYNC(1); @@ -94,7 +90,7 @@ static struct kvm_vm *create_vm(void) pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages); vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + kvm_vm_elf_load(vm, program_invocation_name); vm_create_irqchip(vm); return vm; @@ -106,8 +102,6 @@ static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code) vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu); else vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu); - - vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); } static void run_vm_bsp(uint32_t bsp_vcpu) diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index 613c42c5a9b8..c1f831803ad2 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -55,8 +55,8 @@ static inline void sync_with_host(uint64_t phase) void self_smi(void) { - wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4), - APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); + x2apic_write_reg(APIC_ICR, + APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); } void guest_code(void *arg) diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index d672f0a473f8..fc03a150278d 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -24,6 +24,10 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) +struct ucall uc_none = { + .cmd = UCALL_NONE, +}; + /* * ucall is embedded here to protect against compiler reshuffling registers * before calling a function. In this test we only need to get KVM_EXIT_IO @@ -34,7 +38,8 @@ void guest_code(void) asm volatile("1: in %[port], %%al\n" "add $0x1, %%rbx\n" "jmp 1b" - : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx"); + : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none) + : "rax", "rbx"); } static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index e357d8e222d4..5a6a662f2e59 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -18,15 +18,6 @@ #define rounded_rdmsr(x) ROUND(rdmsr(x)) #define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x)) -#define GUEST_ASSERT_EQ(a, b) do { \ - __typeof(a) _a = (a); \ - __typeof(b) _b = (b); \ - if (_a != _b) \ - ucall(UCALL_ABORT, 4, \ - "Failed guest assert: " \ - #a " == " #b, __LINE__, _a, _b); \ - } while(0) - static void guest_code(void) { u64 val = 0; diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index 72c0d0797522..e3e20e8848d0 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -574,7 +574,7 @@ static void test_msr_filter_allow(void) { vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); /* Process guest code userspace exits. */ run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); @@ -588,12 +588,12 @@ static void test_msr_filter_allow(void) { run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT); run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT); - vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); run_guest(vm); - vm_handle_exception(vm, UD_VECTOR, NULL); + vm_install_exception_handler(vm, UD_VECTOR, NULL); if (process_ucall(vm) != UCALL_DONE) { - vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler); /* Process emulated rdmsr and wrmsr instructions. */ run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c index d14888b34adb..d438c4d3228a 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -96,7 +96,7 @@ int main(int argc, char *argv[]) } vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); - prepare_virtualize_apic_accesses(vmx, vm, 0); + prepare_virtualize_apic_accesses(vmx, vm); vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa); while (!done) { diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 537de1068554..06a64980a5d2 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -97,7 +97,7 @@ int main(int argc, char *argv[]) * Add an identity map for GVA range [0xc0000000, 0xc0002000). This * affects both L1 and L2. However... */ - virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0); + virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES); /* * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to @@ -107,9 +107,9 @@ int main(int argc, char *argv[]) * meaning after the last call to virt_map. */ prepare_eptp(vmx, vm, 0); - nested_map_memslot(vmx, vm, 0, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0); + nested_map_memslot(vmx, vm, 0); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); bmap = bitmap_alloc(TEST_MEM_PAGES); host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c new file mode 100644 index 000000000000..280c01fd2412 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_nested_tsc_scaling_test + * + * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * This test case verifies that nested TSC scaling behaves as expected when + * both L1 and L2 are scaled using different ratios. For this test we scale + * L1 down and scale L2 up. + */ + +#include <time.h> + +#include "kvm_util.h" +#include "vmx.h" +#include "kselftest.h" + + +#define VCPU_ID 0 + +/* L2 is scaled up (from L1's perspective) by this factor */ +#define L2_SCALE_FACTOR 4ULL + +#define TSC_OFFSET_L2 ((uint64_t) -33125236320908) +#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48) + +#define L2_GUEST_STACK_SIZE 64 + +enum { USLEEP, UCHECK_L1, UCHECK_L2 }; +#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec) +#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq) + + +/* + * This function checks whether the "actual" TSC frequency of a guest matches + * its expected frequency. In order to account for delays in taking the TSC + * measurements, a difference of 1% between the actual and the expected value + * is tolerated. + */ +static void compare_tsc_freq(uint64_t actual, uint64_t expected) +{ + uint64_t tolerance, thresh_low, thresh_high; + + tolerance = expected / 100; + thresh_low = expected - tolerance; + thresh_high = expected + tolerance; + + TEST_ASSERT(thresh_low < actual, + "TSC freq is expected to be between %"PRIu64" and %"PRIu64 + " but it actually is %"PRIu64, + thresh_low, thresh_high, actual); + TEST_ASSERT(thresh_high > actual, + "TSC freq is expected to be between %"PRIu64" and %"PRIu64 + " but it actually is %"PRIu64, + thresh_low, thresh_high, actual); +} + +static void check_tsc_freq(int level) +{ + uint64_t tsc_start, tsc_end, tsc_freq; + + /* + * Reading the TSC twice with about a second's difference should give + * us an approximation of the TSC frequency from the guest's + * perspective. Now, this won't be completely accurate, but it should + * be good enough for the purposes of this test. + */ + tsc_start = rdmsr(MSR_IA32_TSC); + GUEST_SLEEP(1); + tsc_end = rdmsr(MSR_IA32_TSC); + + tsc_freq = tsc_end - tsc_start; + + GUEST_CHECK(level, tsc_freq); +} + +static void l2_guest_code(void) +{ + check_tsc_freq(UCHECK_L2); + + /* exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + + /* check that L1's frequency looks alright before launching L2 */ + check_tsc_freq(UCHECK_L1); + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* prepare the VMCS for L2 execution */ + prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* enable TSC offsetting and TSC scaling for L2 */ + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); + control |= SECONDARY_EXEC_TSC_SCALING; + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); + + vmwrite(TSC_OFFSET, TSC_OFFSET_L2); + vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2); + vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32); + + /* launch L2 */ + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + /* check that L1's frequency still looks good */ + check_tsc_freq(UCHECK_L1); + + GUEST_DONE(); +} + +static void tsc_scaling_check_supported(void) +{ + if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) { + print_skip("TSC scaling not supported by the HW"); + exit(KSFT_SKIP); + } +} + +static void stable_tsc_check_supported(void) +{ + FILE *fp; + char buf[4]; + + fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r"); + if (fp == NULL) + goto skip_test; + + if (fgets(buf, sizeof(buf), fp) == NULL) + goto skip_test; + + if (strncmp(buf, "tsc", sizeof(buf))) + goto skip_test; + + return; +skip_test: + print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable"); + exit(KSFT_SKIP); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + vm_vaddr_t vmx_pages_gva; + + uint64_t tsc_start, tsc_end; + uint64_t tsc_khz; + uint64_t l1_scale_factor; + uint64_t l0_tsc_freq = 0; + uint64_t l1_tsc_freq = 0; + uint64_t l2_tsc_freq = 0; + + nested_vmx_check_supported(); + tsc_scaling_check_supported(); + stable_tsc_check_supported(); + + /* + * We set L1's scale factor to be a random number from 2 to 10. + * Ideally we would do the same for L2's factor but that one is + * referenced by both main() and l1_guest_code() and using a global + * variable does not work. + */ + srand(time(NULL)); + l1_scale_factor = (rand() % 9) + 2; + printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor); + printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR); + + tsc_start = rdtsc(); + sleep(1); + tsc_end = rdtsc(); + + l0_tsc_freq = tsc_end - tsc_start; + printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + + tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL); + TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); + + /* scale down L1's TSC frequency */ + vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ, + (void *) (tsc_khz / l1_scale_factor)); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s", (const char *) uc.args[0]); + case UCALL_SYNC: + switch (uc.args[0]) { + case USLEEP: + sleep(uc.args[1]); + break; + case UCHECK_L1: + l1_tsc_freq = uc.args[1]; + printf("L1's TSC frequency is around: %"PRIu64 + "\n", l1_tsc_freq); + + compare_tsc_freq(l1_tsc_freq, + l0_tsc_freq / l1_scale_factor); + break; + case UCHECK_L2: + l2_tsc_freq = uc.args[1]; + printf("L2's TSC frequency is around: %"PRIu64 + "\n", l2_tsc_freq); + + compare_tsc_freq(l2_tsc_freq, + l1_tsc_freq * L2_SCALE_FACTOR); + break; + } + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index 2f964cdc273c..afbbc40df884 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -42,8 +42,6 @@ #define HALTER_VCPU_ID 0 #define SENDER_VCPU_ID 1 -volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA; - /* * Vector for IPI from sender vCPU to halting vCPU. * Value is arbitrary and was chosen for the alternating bit pattern. Any @@ -86,45 +84,6 @@ struct thread_params { uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ }; -uint32_t read_apic_reg(uint reg) -{ - return apic_base[reg >> 2]; -} - -void write_apic_reg(uint reg, uint32_t val) -{ - apic_base[reg >> 2] = val; -} - -void disable_apic(void) -{ - wrmsr(MSR_IA32_APICBASE, - rdmsr(MSR_IA32_APICBASE) & - ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD)); -} - -void enable_xapic(void) -{ - uint64_t val = rdmsr(MSR_IA32_APICBASE); - - /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */ - if (val & MSR_IA32_APICBASE_EXTD) { - disable_apic(); - wrmsr(MSR_IA32_APICBASE, - rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE); - } else if (!(val & MSR_IA32_APICBASE_ENABLE)) { - wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE); - } - - /* - * Per SDM: reset value of spurious interrupt vector register has the - * APIC software enabled bit=0. It must be enabled in addition to the - * enable bit in the MSR. - */ - val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED; - write_apic_reg(APIC_SPIV, val); -} - void verify_apic_base_addr(void) { uint64_t msr = rdmsr(MSR_IA32_APICBASE); @@ -136,10 +95,10 @@ void verify_apic_base_addr(void) static void halter_guest_code(struct test_data_page *data) { verify_apic_base_addr(); - enable_xapic(); + xapic_enable(); - data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID)); - data->halter_lvr = read_apic_reg(APIC_LVR); + data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); + data->halter_lvr = xapic_read_reg(APIC_LVR); /* * Loop forever HLTing and recording halts & wakes. Disable interrupts @@ -150,8 +109,8 @@ static void halter_guest_code(struct test_data_page *data) * TPR and PPR for diagnostic purposes in case the test fails. */ for (;;) { - data->halter_tpr = read_apic_reg(APIC_TASKPRI); - data->halter_ppr = read_apic_reg(APIC_PROCPRI); + data->halter_tpr = xapic_read_reg(APIC_TASKPRI); + data->halter_ppr = xapic_read_reg(APIC_PROCPRI); data->hlt_count++; asm volatile("sti; hlt; cli"); data->wake_count++; @@ -166,7 +125,7 @@ static void halter_guest_code(struct test_data_page *data) static void guest_ipi_handler(struct ex_regs *regs) { ipis_rcvd++; - write_apic_reg(APIC_EOI, 77); + xapic_write_reg(APIC_EOI, 77); } static void sender_guest_code(struct test_data_page *data) @@ -179,7 +138,7 @@ static void sender_guest_code(struct test_data_page *data) uint64_t tsc_start; verify_apic_base_addr(); - enable_xapic(); + xapic_enable(); /* * Init interrupt command register for sending IPIs @@ -206,8 +165,8 @@ static void sender_guest_code(struct test_data_page *data) * First IPI can be sent unconditionally because halter vCPU * starts earlier. */ - write_apic_reg(APIC_ICR2, icr2_val); - write_apic_reg(APIC_ICR, icr_val); + xapic_write_reg(APIC_ICR2, icr2_val); + xapic_write_reg(APIC_ICR, icr_val); data->ipis_sent++; /* @@ -462,13 +421,13 @@ int main(int argc, char *argv[]) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID); - vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler); + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); - virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0); + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code); - test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0); + test_data_page_vaddr = vm_vaddr_alloc_page(vm); data = (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr); memset(data, 0, sizeof(*data)); diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 1f4a0599683c..117bf49a3d79 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -146,7 +146,7 @@ int main(int argc, char *argv[]) /* Map a region for the shared_info page */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); - virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0); + virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); struct kvm_xen_hvm_config hvmc = { .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c index 8389e0bfd711..adc94452b57c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c @@ -103,7 +103,7 @@ int main(int argc, char *argv[]) /* Map a region for the hypercall pages */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0); - virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0); + virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2); for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index a105f094676e..ee71fc99d5b5 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -4,6 +4,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh +TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh strscpy.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index b80ee3f6e265..645839b50b0a 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -1,4 +1,5 @@ CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_PRIME_NUMBERS=m CONFIG_TEST_STRSCPY=m diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh new file mode 100755 index 000000000000..b59b8ba561c3 --- /dev/null +++ b/tools/testing/selftests/lib/scanf.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Tests the scanf infrastructure using test_scanf kernel module. +$(dirname $0)/../kselftest/module.sh "scanf" test_scanf diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 4e94e566e040..f31205f04ee0 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -136,6 +136,10 @@ struct mount_attr { #define MOUNT_ATTR_IDMAP 0x00100000 #endif +#ifndef MOUNT_ATTR_NOSYMFOLLOW +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 +#endif + static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags, struct mount_attr *attr, size_t size) { @@ -235,6 +239,10 @@ static int prepare_unpriv_mountns(void) return 0; } +#ifndef ST_NOSYMFOLLOW +#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */ +#endif + static int read_mnt_flags(const char *path) { int ret; @@ -245,9 +253,9 @@ static int read_mnt_flags(const char *path) if (ret != 0) return -EINVAL; - if (stat.f_flag & - ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME | - ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK)) + if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | + ST_NOATIME | ST_NODIRATIME | ST_RELATIME | + ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW)) return -EINVAL; mnt_flags = 0; @@ -269,6 +277,8 @@ static int read_mnt_flags(const char *path) mnt_flags |= MS_SYNCHRONOUS; if (stat.f_flag & ST_MANDLOCK) mnt_flags |= ST_MANDLOCK; + if (stat.f_flag & ST_NOSYMFOLLOW) + mnt_flags |= ST_NOSYMFOLLOW; return mnt_flags; } @@ -368,8 +378,13 @@ static bool mount_setattr_supported(void) FIXTURE(mount_setattr) { }; +#define NOSYMFOLLOW_TARGET "/mnt/A/AA/data" +#define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink" + FIXTURE_SETUP(mount_setattr) { + int fd = -EBADF; + if (!mount_setattr_supported()) SKIP(return, "mount_setattr syscall not supported"); @@ -412,6 +427,11 @@ FIXTURE_SETUP(mount_setattr) ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); + + fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC); + ASSERT_GT(fd, 0); + ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0); + ASSERT_EQ(close(fd), 0); } FIXTURE_TEARDOWN(mount_setattr) @@ -1421,4 +1441,66 @@ TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0); } +TEST_F(mount_setattr, mount_attr_nosymfollow) +{ + int fd; + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_NOSYMFOLLOW, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC); + ASSERT_GT(fd, 0); + ASSERT_EQ(close(fd), 0); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= ST_NOSYMFOLLOW; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, ELOOP); + + attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW; + attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW; + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~ST_NOSYMFOLLOW; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC); + ASSERT_GT(fd, 0); + ASSERT_EQ(close(fd), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 61ae899cfc17..19deb9cdf72f 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -30,3 +30,4 @@ hwtstamp_config rxtimestamp timestamping txtimestamp +so_netns_cookie diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3915bb7bfc39..79c9eb0034d5 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -30,7 +30,7 @@ TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag -TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr +TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat TEST_GEN_FILES += reuseaddr_ports_exhausted diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 614d5477365a..6f905b53904f 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,4 +1,5 @@ CONFIG_USER_NS=y +CONFIG_NET_NS=y CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m CONFIG_NUMA=y diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py index 834066d465fc..2b5d6ff87373 100755 --- a/tools/testing/selftests/net/devlink_port_split.py +++ b/tools/testing/selftests/net/devlink_port_split.py @@ -18,6 +18,8 @@ import sys # +# Kselftest framework requirement - SKIP code is 4 +KSFT_SKIP=4 Port = collections.namedtuple('Port', 'bus_info name') @@ -239,7 +241,11 @@ def main(cmdline=None): assert stderr == "" devs = json.loads(stdout)['dev'] - dev = list(devs.keys())[0] + if devs: + dev = list(devs.keys())[0] + else: + print("no devlink device was found, test skipped") + sys.exit(KSFT_SKIP) cmd = "devlink dev show %s" % dev stdout, stderr = run_command(cmd) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 49774a8a7736..0d293391e9a4 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -925,6 +925,14 @@ ipv6_fcnal_runtime() run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1" run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81" + # route can not use prefsrc with nexthops + run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 from 2001:db8:91::1" + log_test $? 2 "IPv6 route can not use src routing with external nexthop" + + # check cleanup path on invalid metric + run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 congctl lock foo" + log_test $? 2 "IPv6 route with invalid metric" + # rpfilter and default route $IP nexthop flush >/dev/null 2>&1 run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" @@ -1366,6 +1374,10 @@ ipv4_fcnal_runtime() run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3" log_test $? 2 "Nexthop replace with invalid scope for existing route" + # check cleanup path on invalid metric + run_cmd "$IP ro add 172.16.101.2/32 nhid 22 congctl lock foo" + log_test $? 2 "IPv4 route with invalid metric" + # # add route with nexthop and check traffic # diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh new file mode 100755 index 000000000000..a15d21dc035a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -0,0 +1,364 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test traffic distribution between two paths when using custom hash policy. +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.{2-253}/24 | | +# | 2001:db8:1::{2-fd}/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------------+ +# | SW1 | | +# | $rp1 + | +# | 198.51.100.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | | +# | $rp11 + + $rp12 | +# | 192.0.2.1/28 | | 192.0.2.17/28 | +# | 2001:db8:2::1/64 | | 2001:db8:3::1/64 | +# +------------------|-------------|------------------+ +# | | +# +------------------|-------------|------------------+ +# | SW2 | | | +# | | | | +# | $rp21 + + $rp22 | +# | 192.0.2.2/28 192.0.2.18/28 | +# | 2001:db8:2::2/64 2001:db8:3::2/64 | +# | | +# | | +# | $rp2 + | +# | 203.0.113.1/24 | | +# | 2001:db8:4::1/64 | | +# +-------------------------|-------------------------+ +# | +# +-------------------------|------+ +# | H2 | | +# | $h2 + | +# | 203.0.113.{2-253}/24 | +# | 2001:db8:4::{2-fd}/64 | +# +--------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + custom_hash +" + +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64 + ip route add vrf v$h1 default via 198.51.100.1 dev $h1 + ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 default + ip route del vrf v$h1 default + simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw1_create() +{ + simple_if_init $rp1 198.51.100.1/24 2001:db8:1::1/64 + __simple_if_init $rp11 v$rp1 192.0.2.1/28 2001:db8:2::1/64 + __simple_if_init $rp12 v$rp1 192.0.2.17/28 2001:db8:3::1/64 + + ip route add vrf v$rp1 203.0.113.0/24 \ + nexthop via 192.0.2.2 dev $rp11 \ + nexthop via 192.0.2.18 dev $rp12 + + ip -6 route add vrf v$rp1 2001:db8:4::/64 \ + nexthop via 2001:db8:2::2 dev $rp11 \ + nexthop via 2001:db8:3::2 dev $rp12 +} + +sw1_destroy() +{ + ip -6 route del vrf v$rp1 2001:db8:4::/64 + + ip route del vrf v$rp1 203.0.113.0/24 + + __simple_if_fini $rp12 192.0.2.17/28 2001:db8:3::1/64 + __simple_if_fini $rp11 192.0.2.1/28 2001:db8:2::1/64 + simple_if_fini $rp1 198.51.100.1/24 2001:db8:1::1/64 +} + +sw2_create() +{ + simple_if_init $rp2 203.0.113.1/24 2001:db8:4::1/64 + __simple_if_init $rp21 v$rp2 192.0.2.2/28 2001:db8:2::2/64 + __simple_if_init $rp22 v$rp2 192.0.2.18/28 2001:db8:3::2/64 + + ip route add vrf v$rp2 198.51.100.0/24 \ + nexthop via 192.0.2.1 dev $rp21 \ + nexthop via 192.0.2.17 dev $rp22 + + ip -6 route add vrf v$rp2 2001:db8:1::/64 \ + nexthop via 2001:db8:2::1 dev $rp21 \ + nexthop via 2001:db8:3::1 dev $rp22 +} + +sw2_destroy() +{ + ip -6 route del vrf v$rp2 2001:db8:1::/64 + + ip route del vrf v$rp2 198.51.100.0/24 + + __simple_if_fini $rp22 192.0.2.18/28 2001:db8:3::2/64 + __simple_if_fini $rp21 192.0.2.2/28 2001:db8:2::2/64 + simple_if_fini $rp2 203.0.113.1/24 2001:db8:4::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.2/24 2001:db8:4::2/64 + ip route add vrf v$h2 default via 203.0.113.1 dev $h2 + ip -6 route add vrf v$h2 default via 2001:db8:4::1 dev $h2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 default + ip route del vrf v$h2 default + simple_if_fini $h2 203.0.113.2/24 2001:db8:4::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + + rp1=${NETIFS[p2]} + + rp11=${NETIFS[p3]} + rp21=${NETIFS[p4]} + + rp12=${NETIFS[p5]} + rp22=${NETIFS[p6]} + + rp2=${NETIFS[p7]} + + h2=${NETIFS[p8]} + + vrf_prepare + h1_create + sw1_create + sw2_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 203.0.113.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:4::2 +} + +send_src_ipv4() +{ + $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_src_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +send_src_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_flowlabel() +{ + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec v$h1 \ + $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1 + done +} + +send_src_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +custom_hash_test() +{ + local field="$1"; shift + local balanced="$1"; shift + local send_flows="$@" + + RET=0 + + local t0_rp11=$(link_stats_tx_packets_get $rp11) + local t0_rp12=$(link_stats_tx_packets_get $rp12) + + $send_flows + + local t1_rp11=$(link_stats_tx_packets_get $rp11) + local t1_rp12=$(link_stats_tx_packets_get $rp12) + + local d_rp11=$((t1_rp11 - t0_rp11)) + local d_rp12=$((t1_rp12 - t0_rp12)) + + local diff=$((d_rp12 - d_rp11)) + local sum=$((d_rp11 + d_rp12)) + + local pct=$(echo "$diff / $sum * 100" | bc -l) + local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc) + + [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) || + ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]] + check_err $? "Expected traffic to be $balanced, but it is not" + + log_test "Multipath hash field: $field ($balanced)" + log_info "Packets sent on path1 / path2: $d_rp11 / $d_rp12" +} + +custom_hash_v4() +{ + log_info "Running IPv4 custom multipath hash tests" + + sysctl_set net.ipv4.fib_multipath_hash_policy 3 + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv4.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0001 + custom_hash_test "Source IP" "balanced" send_src_ipv4 + custom_hash_test "Source IP" "unbalanced" send_dst_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0002 + custom_hash_test "Destination IP" "balanced" send_dst_ipv4 + custom_hash_test "Destination IP" "unbalanced" send_src_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0010 + custom_hash_test "Source port" "balanced" send_src_udp4 + custom_hash_test "Source port" "unbalanced" send_dst_udp4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0020 + custom_hash_test "Destination port" "balanced" send_dst_udp4 + custom_hash_test "Destination port" "unbalanced" send_src_udp4 + + sysctl_restore net.ipv4.neigh.default.gc_thresh3 + sysctl_restore net.ipv4.neigh.default.gc_thresh2 + sysctl_restore net.ipv4.neigh.default.gc_thresh1 + + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +custom_hash_v6() +{ + log_info "Running IPv6 custom multipath hash tests" + + sysctl_set net.ipv6.fib_multipath_hash_policy 3 + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv6.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0001 + custom_hash_test "Source IP" "balanced" send_src_ipv6 + custom_hash_test "Source IP" "unbalanced" send_dst_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0002 + custom_hash_test "Destination IP" "balanced" send_dst_ipv6 + custom_hash_test "Destination IP" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0008 + custom_hash_test "Flowlabel" "balanced" send_flowlabel + custom_hash_test "Flowlabel" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0010 + custom_hash_test "Source port" "balanced" send_src_udp6 + custom_hash_test "Source port" "unbalanced" send_dst_udp6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0020 + custom_hash_test "Destination port" "balanced" send_dst_udp6 + custom_hash_test "Destination port" "unbalanced" send_src_udp6 + + sysctl_restore net.ipv6.neigh.default.gc_thresh3 + sysctl_restore net.ipv6.neigh.default.gc_thresh2 + sysctl_restore net.ipv6.neigh.default.gc_thresh1 + + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +custom_hash() +{ + # Test that when the hash policy is set to custom, traffic is + # distributed only according to the fields set in the + # fib_multipath_hash_fields sysctl. + # + # Each time set a different field and make sure traffic is only + # distributed when the field is changed in the packet stream. + custom_hash_v4 + custom_hash_v6 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 9c12c4fd3afc..13d3d4428a32 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -18,6 +18,12 @@ if [[ ! -v DEVLINK_DEV ]]; then DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \ -n | cut -d" " -f3) +elif [[ ! -z "$DEVLINK_DEV" ]]; then + devlink dev show $DEVLINK_DEV &> /dev/null + if [ $? -ne 0 ]; then + echo "SKIP: devlink device \"$DEVLINK_DEV\" not found" + exit 1 + fi fi ############################################################################## @@ -318,6 +324,14 @@ devlink_trap_rx_bytes_get() | jq '.[][][]["stats"]["rx"]["bytes"]' } +devlink_trap_drop_packets_get() +{ + local trap_name=$1; shift + + devlink -js trap show $DEVLINK_DEV trap $trap_name \ + | jq '.[][][]["stats"]["rx"]["dropped"]' +} + devlink_trap_stats_idle_test() { local trap_name=$1; shift @@ -339,6 +353,24 @@ devlink_trap_stats_idle_test() fi } +devlink_trap_drop_stats_idle_test() +{ + local trap_name=$1; shift + local t0_packets t0_bytes + + t0_packets=$(devlink_trap_drop_packets_get $trap_name) + + sleep 1 + + t1_packets=$(devlink_trap_drop_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + return 0 + else + return 1 + fi +} + devlink_traps_enable_all() { local trap_name diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh new file mode 100755 index 000000000000..a73f52efcb6c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -0,0 +1,456 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test traffic distribution when there are multiple paths between an IPv4 GRE +# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts. +# Multiple routes are in the underlay network. With the default multipath +# policy, SW2 will only look at the outer IP addresses, hence only a single +# route would be used. +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.{2-253}/24 | | +# | 2001:db8:1::{2-fd}/64 | | +# +-------------------------|------+ +# | +# +-------------------------|------------------+ +# | SW1 | | +# | $ol1 + | +# | 198.51.100.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | + g1 (gre) | +# | loc=192.0.2.1 | +# | rem=192.0.2.2 --. | +# | tos=inherit | | +# | v | +# | + $ul1 | +# | | 192.0.2.17/28 | +# +---------------------|----------------------+ +# | +# +---------------------|----------------------+ +# | SW2 | | +# | $ul21 + | +# | 192.0.2.18/28 | | +# | | | +# ! __________________+___ | +# | / \ | +# | | | | +# | + $ul22.111 (vlan) + $ul22.222 (vlan) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# | | | | +# +--|----------------------|------------------+ +# | | +# +--|----------------------|------------------+ +# | | | | +# | + $ul32.111 (vlan) + $ul32.222 (vlan) | +# | | 192.0.2.34/28 | 192.0.2.50/28 | +# | | | | +# | \__________________+___/ | +# | | | +# | | | +# | $ul31 + | +# | 192.0.2.65/28 | SW3 | +# +---------------------|----------------------+ +# | +# +---------------------|----------------------+ +# | + $ul4 | +# | ^ 192.0.2.66/28 | +# | | | +# | + g2 (gre) | | +# | loc=192.0.2.2 | | +# | rem=192.0.2.1 --' | +# | tos=inherit | +# | | +# | $ol4 + | +# | 203.0.113.1/24 | | +# | 2001:db8:2::1/64 | SW4 | +# +-------------------------|------------------+ +# | +# +-------------------------|------+ +# | | | +# | $h2 + | +# | 203.0.113.{2-253}/24 | +# | 2001:db8:2::{2-fd}/64 H2 | +# +--------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + custom_hash +" + +NUM_NETIFS=10 +source lib.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64 + ip route add vrf v$h1 default via 198.51.100.1 dev $h1 + ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 default + ip route del vrf v$h1 default + simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw1_create() +{ + simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64 + __simple_if_init $ul1 v$ol1 192.0.2.17/28 + + tunnel_create g1 gre 192.0.2.1 192.0.2.2 tos inherit dev v$ol1 + __simple_if_init g1 v$ol1 192.0.2.1/32 + ip route add vrf v$ol1 192.0.2.2/32 via 192.0.2.18 + + ip route add vrf v$ol1 203.0.113.0/24 dev g1 + ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1 +} + +sw1_destroy() +{ + ip -6 route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 203.0.113.0/24 + + ip route del vrf v$ol1 192.0.2.2/32 + __simple_if_fini g1 192.0.2.1/32 + tunnel_destroy g1 + + __simple_if_fini $ul1 192.0.2.17/28 + simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64 +} + +sw2_create() +{ + simple_if_init $ul21 192.0.2.18/28 + __simple_if_init $ul22 v$ul21 + vlan_create $ul22 111 v$ul21 192.0.2.33/28 + vlan_create $ul22 222 v$ul21 192.0.2.49/28 + + ip route add vrf v$ul21 192.0.2.1/32 via 192.0.2.17 + ip route add vrf v$ul21 192.0.2.2/32 \ + nexthop via 192.0.2.34 \ + nexthop via 192.0.2.50 +} + +sw2_destroy() +{ + ip route del vrf v$ul21 192.0.2.2/32 + ip route del vrf v$ul21 192.0.2.1/32 + + vlan_destroy $ul22 222 + vlan_destroy $ul22 111 + __simple_if_fini $ul22 + simple_if_fini $ul21 192.0.2.18/28 +} + +sw3_create() +{ + simple_if_init $ul31 192.0.2.65/28 + __simple_if_init $ul32 v$ul31 + vlan_create $ul32 111 v$ul31 192.0.2.34/28 + vlan_create $ul32 222 v$ul31 192.0.2.50/28 + + ip route add vrf v$ul31 192.0.2.2/32 via 192.0.2.66 + ip route add vrf v$ul31 192.0.2.1/32 \ + nexthop via 192.0.2.33 \ + nexthop via 192.0.2.49 + + tc qdisc add dev $ul32 clsact + tc filter add dev $ul32 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul32 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw3_destroy() +{ + tc qdisc del dev $ul32 clsact + + ip route del vrf v$ul31 192.0.2.1/32 + ip route del vrf v$ul31 192.0.2.2/32 + + vlan_destroy $ul32 222 + vlan_destroy $ul32 111 + __simple_if_fini $ul32 + simple_if_fini $ul31 192.0.2.65/28 +} + +sw4_create() +{ + simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64 + __simple_if_init $ul4 v$ol4 192.0.2.66/28 + + tunnel_create g2 gre 192.0.2.2 192.0.2.1 tos inherit dev v$ol4 + __simple_if_init g2 v$ol4 192.0.2.2/32 + ip route add vrf v$ol4 192.0.2.1/32 via 192.0.2.65 + + ip route add vrf v$ol4 198.51.100.0/24 dev g2 + ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2 +} + +sw4_destroy() +{ + ip -6 route del vrf v$ol4 2001:db8:1::/64 + ip route del vrf v$ol4 198.51.100.0/24 + + ip route del vrf v$ol4 192.0.2.1/32 + __simple_if_fini g2 192.0.2.2/32 + tunnel_destroy g2 + + __simple_if_fini $ul4 192.0.2.66/28 + simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64 + ip route add vrf v$h2 default via 203.0.113.1 dev $h2 + ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 default + ip route del vrf v$h2 default + simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + + ol1=${NETIFS[p2]} + ul1=${NETIFS[p3]} + + ul21=${NETIFS[p4]} + ul22=${NETIFS[p5]} + + ul32=${NETIFS[p6]} + ul31=${NETIFS[p7]} + + ul4=${NETIFS[p8]} + ol4=${NETIFS[p9]} + + h2=${NETIFS[p10]} + + vrf_prepare + h1_create + sw1_create + sw2_create + sw3_create + sw4_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw4_destroy + sw3_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 203.0.113.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +send_src_ipv4() +{ + $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_src_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +send_src_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_flowlabel() +{ + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec v$h1 \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 + done +} + +send_src_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +custom_hash_test() +{ + local field="$1"; shift + local balanced="$1"; shift + local send_flows="$@" + + RET=0 + + local t0_111=$(tc_rule_stats_get $ul32 111 ingress) + local t0_222=$(tc_rule_stats_get $ul32 222 ingress) + + $send_flows + + local t1_111=$(tc_rule_stats_get $ul32 111 ingress) + local t1_222=$(tc_rule_stats_get $ul32 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + + local diff=$((d222 - d111)) + local sum=$((d111 + d222)) + + local pct=$(echo "$diff / $sum * 100" | bc -l) + local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc) + + [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) || + ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]] + check_err $? "Expected traffic to be $balanced, but it is not" + + log_test "Multipath hash field: $field ($balanced)" + log_info "Packets sent on path1 / path2: $d111 / $d222" +} + +custom_hash_v4() +{ + log_info "Running IPv4 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv4.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv4 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp4 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp4 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp4 + + sysctl_restore net.ipv4.neigh.default.gc_thresh3 + sysctl_restore net.ipv4.neigh.default.gc_thresh2 + sysctl_restore net.ipv4.neigh.default.gc_thresh1 +} + +custom_hash_v6() +{ + log_info "Running IPv6 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv6.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv6 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0200 + custom_hash_test "Inner flowlabel" "balanced" send_flowlabel + custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp6 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp6 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp6 + + sysctl_restore net.ipv6.neigh.default.gc_thresh3 + sysctl_restore net.ipv6.neigh.default.gc_thresh2 + sysctl_restore net.ipv6.neigh.default.gc_thresh1 +} + +custom_hash() +{ + # Test that when the hash policy is set to custom, traffic is + # distributed only according to the fields set in the + # fib_multipath_hash_fields sysctl. + # + # Each time set a different field and make sure traffic is only + # distributed when the field is changed in the packet stream. + + sysctl_set net.ipv4.fib_multipath_hash_policy 3 + + custom_hash_v4 + custom_hash_v6 + + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh new file mode 100755 index 000000000000..8fea2c2e0b25 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -0,0 +1,458 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test traffic distribution when there are multiple paths between an IPv6 GRE +# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts. +# Multiple routes are in the underlay network. With the default multipath +# policy, SW2 will only look at the outer IP addresses, hence only a single +# route would be used. +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.{2-253}/24 | | +# | 2001:db8:1::{2-fd}/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------+ +# | SW1 | | +# | $ol1 + | +# | 198.51.100.1/24 | +# | 2001:db8:1::1/64 | +# | | +# |+ g1 (ip6gre) | +# | loc=2001:db8:3::1 | +# | rem=2001:db8:3::2 -. | +# | tos=inherit | | +# | v | +# | + $ul1 | +# | | 2001:db8:10::1/64 | +# +---------------------|-----------------------+ +# | +# +---------------------|-----------------------+ +# | SW2 | | +# | $ul21 + | +# | 2001:db8:10::2/64 | | +# | | | +# ! __________________+___ | +# | / \ | +# | | | | +# | + $ul22.111 (vlan) + $ul22.222 (vlan) | +# | | 2001:db8:11::1/64 | 2001:db8:12::1/64 | +# | | | | +# +--|----------------------|-------------------+ +# | | +# +--|----------------------|-------------------+ +# | | | | +# | + $ul32.111 (vlan) + $ul32.222 (vlan) | +# | | 2001:db8:11::2/64 | 2001:db8:12::2/64 | +# | | | | +# | \__________________+___/ | +# | | | +# | | | +# | $ul31 + | +# | 2001:db8:13::1/64 | SW3 | +# +---------------------|-----------------------+ +# | +# +---------------------|-----------------------+ +# | + $ul4 | +# | ^ 2001:db8:13::2/64 | +# | | | +# |+ g2 (ip6gre) | | +# | loc=2001:db8:3::2 | | +# | rem=2001:db8:3::1 -' | +# | tos=inherit | +# | | +# | $ol4 + | +# | 203.0.113.1/24 | | +# | 2001:db8:2::1/64 | SW4 | +# +-------------------------|-------------------+ +# | +# +-------------------------|------+ +# | | | +# | $h2 + | +# | 203.0.113.{2-253}/24 | +# | 2001:db8:2::{2-fd}/64 H2 | +# +--------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + custom_hash +" + +NUM_NETIFS=10 +source lib.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64 + ip route add vrf v$h1 default via 198.51.100.1 dev $h1 + ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 default + ip route del vrf v$h1 default + simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw1_create() +{ + simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64 + __simple_if_init $ul1 v$ol1 2001:db8:10::1/64 + + tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + dev v$ol1 + __simple_if_init g1 v$ol1 2001:db8:3::1/128 + ip route add vrf v$ol1 2001:db8:3::2/128 via 2001:db8:10::2 + + ip route add vrf v$ol1 203.0.113.0/24 dev g1 + ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1 +} + +sw1_destroy() +{ + ip -6 route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 203.0.113.0/24 + + ip route del vrf v$ol1 2001:db8:3::2/128 + __simple_if_fini g1 2001:db8:3::1/128 + tunnel_destroy g1 + + __simple_if_fini $ul1 2001:db8:10::1/64 + simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64 +} + +sw2_create() +{ + simple_if_init $ul21 2001:db8:10::2/64 + __simple_if_init $ul22 v$ul21 + vlan_create $ul22 111 v$ul21 2001:db8:11::1/64 + vlan_create $ul22 222 v$ul21 2001:db8:12::1/64 + + ip -6 route add vrf v$ul21 2001:db8:3::1/128 via 2001:db8:10::1 + ip -6 route add vrf v$ul21 2001:db8:3::2/128 \ + nexthop via 2001:db8:11::2 \ + nexthop via 2001:db8:12::2 +} + +sw2_destroy() +{ + ip -6 route del vrf v$ul21 2001:db8:3::2/128 + ip -6 route del vrf v$ul21 2001:db8:3::1/128 + + vlan_destroy $ul22 222 + vlan_destroy $ul22 111 + __simple_if_fini $ul22 + simple_if_fini $ul21 2001:db8:10::2/64 +} + +sw3_create() +{ + simple_if_init $ul31 2001:db8:13::1/64 + __simple_if_init $ul32 v$ul31 + vlan_create $ul32 111 v$ul31 2001:db8:11::2/64 + vlan_create $ul32 222 v$ul31 2001:db8:12::2/64 + + ip -6 route add vrf v$ul31 2001:db8:3::2/128 via 2001:db8:13::2 + ip -6 route add vrf v$ul31 2001:db8:3::1/128 \ + nexthop via 2001:db8:11::1 \ + nexthop via 2001:db8:12::1 + + tc qdisc add dev $ul32 clsact + tc filter add dev $ul32 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul32 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw3_destroy() +{ + tc qdisc del dev $ul32 clsact + + ip -6 route del vrf v$ul31 2001:db8:3::1/128 + ip -6 route del vrf v$ul31 2001:db8:3::2/128 + + vlan_destroy $ul32 222 + vlan_destroy $ul32 111 + __simple_if_fini $ul32 + simple_if_fini $ul31 2001:db8:13::1/64 +} + +sw4_create() +{ + simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64 + __simple_if_init $ul4 v$ol4 2001:db8:13::2/64 + + tunnel_create g2 ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \ + dev v$ol4 + __simple_if_init g2 v$ol4 2001:db8:3::2/128 + ip -6 route add vrf v$ol4 2001:db8:3::1/128 via 2001:db8:13::1 + + ip route add vrf v$ol4 198.51.100.0/24 dev g2 + ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2 +} + +sw4_destroy() +{ + ip -6 route del vrf v$ol4 2001:db8:1::/64 + ip route del vrf v$ol4 198.51.100.0/24 + + ip -6 route del vrf v$ol4 2001:db8:3::1/128 + __simple_if_fini g2 2001:db8:3::2/128 + tunnel_destroy g2 + + __simple_if_fini $ul4 2001:db8:13::2/64 + simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64 + ip route add vrf v$h2 default via 203.0.113.1 dev $h2 + ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 default + ip route del vrf v$h2 default + simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + + ol1=${NETIFS[p2]} + ul1=${NETIFS[p3]} + + ul21=${NETIFS[p4]} + ul22=${NETIFS[p5]} + + ul32=${NETIFS[p6]} + ul31=${NETIFS[p7]} + + ul4=${NETIFS[p8]} + ol4=${NETIFS[p9]} + + h2=${NETIFS[p10]} + + vrf_prepare + h1_create + sw1_create + sw2_create + sw3_create + sw4_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw4_destroy + sw3_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 203.0.113.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +send_src_ipv4() +{ + $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_src_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +send_src_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_flowlabel() +{ + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec v$h1 \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 + done +} + +send_src_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +custom_hash_test() +{ + local field="$1"; shift + local balanced="$1"; shift + local send_flows="$@" + + RET=0 + + local t0_111=$(tc_rule_stats_get $ul32 111 ingress) + local t0_222=$(tc_rule_stats_get $ul32 222 ingress) + + $send_flows + + local t1_111=$(tc_rule_stats_get $ul32 111 ingress) + local t1_222=$(tc_rule_stats_get $ul32 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + + local diff=$((d222 - d111)) + local sum=$((d111 + d222)) + + local pct=$(echo "$diff / $sum * 100" | bc -l) + local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc) + + [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) || + ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]] + check_err $? "Expected traffic to be $balanced, but it is not" + + log_test "Multipath hash field: $field ($balanced)" + log_info "Packets sent on path1 / path2: $d111 / $d222" +} + +custom_hash_v4() +{ + log_info "Running IPv4 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv4.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv4 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp4 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp4 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp4 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp4 + + sysctl_restore net.ipv4.neigh.default.gc_thresh3 + sysctl_restore net.ipv4.neigh.default.gc_thresh2 + sysctl_restore net.ipv4.neigh.default.gc_thresh1 +} + +custom_hash_v6() +{ + log_info "Running IPv6 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv6.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv6 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0200 + custom_hash_test "Inner flowlabel" "balanced" send_flowlabel + custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp6 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp6 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp6 + + sysctl_restore net.ipv6.neigh.default.gc_thresh3 + sysctl_restore net.ipv6.neigh.default.gc_thresh2 + sysctl_restore net.ipv6.neigh.default.gc_thresh1 +} + +custom_hash() +{ + # Test that when the hash policy is set to custom, traffic is + # distributed only according to the fields set in the + # fib_multipath_hash_fields sysctl. + # + # Each time set a different field and make sure traffic is only + # distributed when the field is changed in the packet stream. + + sysctl_set net.ipv6.fib_multipath_hash_policy 3 + + custom_hash_v4 + custom_hash_v6 + + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh index 55eeacf59241..64fbd211d907 100755 --- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -75,7 +75,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh index 5f20d289ee43..10e594c55117 100755 --- a/tools/testing/selftests/net/forwarding/pedit_l4port.sh +++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh @@ -71,7 +71,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh index e3bd8a6bb8b4..bde11dc27873 100755 --- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -72,7 +72,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index bf361f30d6ef..c19ecc6a8614 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -63,10 +63,14 @@ log_test() local rc=$1 local expected=$2 local msg="$3" + local xfail=$4 if [ ${rc} -eq ${expected} ]; then printf "TEST: %-60s [ OK ]\n" "${msg}" nsuccess=$((nsuccess+1)) + elif [ ${rc} -eq ${xfail} ]; then + printf "TEST: %-60s [XFAIL]\n" "${msg}" + nxfail=$((nxfail+1)) else ret=1 nfail=$((nfail+1)) @@ -322,7 +326,7 @@ check_exception() ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -v "mtu" | grep -q "${R1_LLADDR}" fi - log_test $? 0 "IPv6: ${desc}" + log_test $? 0 "IPv6: ${desc}" 1 } run_ping() @@ -488,6 +492,7 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) ret=0 nsuccess=0 nfail=0 +nxfail=0 while getopts :pv o do @@ -532,5 +537,6 @@ fi printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} +printf "Tests xfailed: %3d\n" ${nxfail} exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d88e1fdfb147..89c4753c2760 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -6,6 +6,7 @@ #include <limits.h> #include <fcntl.h> #include <string.h> +#include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> @@ -25,6 +26,7 @@ #include <netinet/in.h> #include <linux/tcp.h> +#include <linux/time_types.h> extern int optind; @@ -66,6 +68,13 @@ static unsigned int cfg_do_w; static int cfg_wait; static uint32_t cfg_mark; +struct cfg_cmsg_types { + unsigned int cmsg_enabled:1; + unsigned int timestampns:1; +}; + +static struct cfg_cmsg_types cfg_cmsg_types; + static void die_usage(void) { fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" @@ -80,11 +89,22 @@ static void die_usage(void) fprintf(stderr, "\t-M mark -- set socket packet mark\n"); fprintf(stderr, "\t-u -- check mptcp ulp\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); + fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); fprintf(stderr, "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); exit(1); } +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + static void handle_signal(int nr) { quit = true; @@ -338,6 +358,58 @@ static size_t do_write(const int fd, char *buf, const size_t len) return offset; } +static void process_cmsg(struct msghdr *msgh) +{ + struct __kernel_timespec ts; + bool ts_found = false; + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) { + memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts)); + ts_found = true; + continue; + } + } + + if (cfg_cmsg_types.timestampns) { + if (!ts_found) + xerror("TIMESTAMPNS not present\n"); + } +} + +static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) +{ + char msg_buf[8192]; + struct iovec iov = { + .iov_base = buf, + .iov_len = len, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = msg_buf, + .msg_controllen = sizeof(msg_buf), + }; + int flags = 0; + int ret = recvmsg(fd, &msg, flags); + + if (ret <= 0) + return ret; + + if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) + xerror("got %lu bytes of cmsg data, expected 0\n", + (unsigned long)msg.msg_controllen); + + if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled) + xerror("%s\n", "got no cmsg data"); + + if (msg.msg_controllen) + process_cmsg(&msg); + + return ret; +} + static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) { int ret = 0; @@ -357,6 +429,8 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) } else if (cfg_peek == CFG_AFTER_PEEK) { ret = recv(fd, buf, cap, MSG_PEEK); ret = (ret < 0) ? ret : read(fd, buf, cap); + } else if (cfg_cmsg_types.cmsg_enabled) { + ret = do_recvmsg_cmsg(fd, buf, cap); } else { ret = read(fd, buf, cap); } @@ -786,6 +860,48 @@ static void init_rng(void) srand(foo); } +static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen) +{ + int err; + + err = setsockopt(fd, level, optname, optval, optlen); + if (err) { + perror("setsockopt"); + exit(1); + } +} + +static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) +{ + static const unsigned int on = 1; + + if (cmsg->timestampns) + xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); +} + +static void parse_cmsg_types(const char *type) +{ + char *next = strchr(type, ','); + unsigned int len = 0; + + cfg_cmsg_types.cmsg_enabled = 1; + + if (next) { + parse_cmsg_types(next + 1); + len = next - type; + } else { + len = strlen(type); + } + + if (strncmp(type, "TIMESTAMPNS", len) == 0) { + cfg_cmsg_types.timestampns = 1; + return; + } + + fprintf(stderr, "Unrecognized cmsg option %s\n", type); + exit(1); +} + int main_loop(void) { int fd; @@ -801,6 +917,8 @@ int main_loop(void) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); return copyfd_io(0, fd, 1); } @@ -887,7 +1005,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:")) != -1) { + while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) { switch (c) { case 'j': cfg_join = true; @@ -943,6 +1061,9 @@ static void parse_opts(int argc, char **argv) case 'P': cfg_peek = parse_peek(optarg); break; + case 'c': + parse_cmsg_types(optarg); + break; } } @@ -976,6 +1097,8 @@ int main(int argc, char *argv[]) set_sndbuf(fd, cfg_sndbuf); if (cfg_mark) set_mark(fd, cfg_mark); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); return main_loop_s(fd); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 2b495dc8d78e..559173a8e387 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -3,7 +3,7 @@ time_start=$(date +%s) -optstring="S:R:d:e:l:r:h4cm:f:t" +optstring="S:R:d:e:l:r:h4cm:f:tC" ret=0 sin="" sout="" @@ -22,6 +22,7 @@ sndbuf=0 rcvbuf=0 options_log=true do_tcp=0 +checksum=false filesize=0 if [ $tc_loss -eq 100 ];then @@ -47,6 +48,7 @@ usage() { echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" + echo -e "\t-C: enable the MPTCP data checksum" } while getopts "$optstring" option;do @@ -104,6 +106,9 @@ while getopts "$optstring" option;do "t") do_tcp=$((do_tcp+1)) ;; + "C") + checksum=true + ;; "?") usage $0 exit 1 @@ -197,6 +202,12 @@ ip -net "$ns4" link set ns4eth3 up ip -net "$ns4" route add default via 10.0.3.2 ip -net "$ns4" route add default via dead:beef:3::2 +if $checksum; then + for i in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1 + done +fi + set_ethtool_flags() { local ns="$1" local dev="$2" @@ -669,6 +680,25 @@ run_tests_peekmode() run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" } +display_time() +{ + time_end=$(date +%s) + time_run=$((time_end-time_start)) + + echo "Time: ${time_run} seconds" +} + +stop_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + echo "FAIL: ${msg}" 1>&2 + display_time + exit ${ret} + fi +} + make_file "$cin" "client" make_file "$sin" "server" @@ -676,6 +706,8 @@ check_mptcp_disabled check_mptcp_ulp_setsockopt +stop_if_error "The kernel configuration is not valid for MPTCP" + echo "INFO: validating network environment with pings" for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns1" $sender 10.0.1.1 @@ -695,6 +727,8 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns4" $sender dead:beef:3::1 done +stop_if_error "Could not even run ping tests" + [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms echo -n "INFO: Using loss of $tc_loss " test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " @@ -722,18 +756,13 @@ echo "on ns3eth4" tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder -for sender in $ns1 $ns2 $ns3 $ns4;do - run_tests_lo "$ns1" "$sender" 10.0.1.1 1 - if [ $ret -ne 0 ] ;then - echo "FAIL: Could not even run loopback test" 1>&2 - exit $ret - fi - run_tests_lo "$ns1" $sender dead:beef:1::1 1 - if [ $ret -ne 0 ] ;then - echo "FAIL: Could not even run loopback v6 test" 2>&1 - exit $ret - fi +run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 +stop_if_error "Could not even run loopback test" +run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 +stop_if_error "Could not even run loopback v6 test" + +for sender in $ns1 $ns2 $ns3 $ns4;do # ns1<->ns2 is not subject to reordering/tc delays. Use it to test # mptcp syncookie support. if [ $sender = $ns1 ]; then @@ -742,6 +771,9 @@ for sender in $ns1 $ns2 $ns3 $ns4;do ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1 fi + run_tests "$ns1" $sender 10.0.1.1 + run_tests "$ns1" $sender dead:beef:1::1 + run_tests "$ns2" $sender 10.0.1.2 run_tests "$ns2" $sender dead:beef:1::2 run_tests "$ns2" $sender 10.0.2.1 @@ -754,14 +786,13 @@ for sender in $ns1 $ns2 $ns3 $ns4;do run_tests "$ns4" $sender 10.0.3.1 run_tests "$ns4" $sender dead:beef:3::1 + + stop_if_error "Tests with $sender as a sender have failed" done run_tests_peekmode "saveWithPeek" run_tests_peekmode "saveAfterPeek" +stop_if_error "Tests with peek mode have failed" -time_end=$(date +%s) -time_run=$((time_end-time_start)) - -echo "Time: ${time_run} seconds" - +display_time exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index fd99485cf2a4..9a191c1a5de8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -12,6 +12,7 @@ timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" capture=0 +checksum=0 do_all_tests=1 TEST_COUNT=0 @@ -49,6 +50,9 @@ init() ip netns exec $netns sysctl -q net.mptcp.enabled=1 ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 + if [ $checksum -eq 1 ]; then + ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1 + fi done # ns1 ns2 @@ -124,6 +128,28 @@ reset_with_add_addr_timeout() -j DROP } +reset_with_checksum() +{ + local ns1_enable=$1 + local ns2_enable=$2 + + reset + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable +} + +reset_with_allow_join_id0() +{ + local ns1_enable=$1 + local ns2_enable=$2 + + reset + + ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable +} + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" @@ -476,6 +502,45 @@ run_tests() fi } +chk_csum_nr() +{ + local msg=${1:-""} + local count + local dump_stats + + if [ ! -z "$msg" ]; then + printf "%02u" "$TEST_COUNT" + else + echo -n " " + fi + printf " %-36s %s" "$msg" "sum" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != 0 ]; then + echo "[fail] got $count data checksum error[s] expected 0" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + echo -n " - csum " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != 0 ]; then + echo "[fail] got $count data checksum error[s] expected 0" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + chk_join_nr() { local msg="$1" @@ -523,6 +588,9 @@ chk_join_nr() echo Client ns stats ip netns exec $ns2 nstat -as | grep MPTcp fi + if [ $checksum -eq 1 ]; then + chk_csum_nr + fi } chk_add_nr() @@ -1374,6 +1442,94 @@ syncookies_tests() chk_add_nr 1 1 } +checksum_tests() +{ + # checksum test 0 0 + reset_with_checksum 0 0 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 0 0" + + # checksum test 1 1 + reset_with_checksum 1 1 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 1 1" + + # checksum test 0 1 + reset_with_checksum 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 0 1" + + # checksum test 1 0 + reset_with_checksum 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 1 0" +} + +deny_join_id0_tests() +{ + # subflow allow join id0 ns1 + reset_with_allow_join_id0 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow allow join id0 ns1" 1 1 1 + + # subflow allow join id0 ns2 + reset_with_allow_join_id0 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow allow join id0 ns2" 0 0 0 + + # signal address allow join id0 ns1 + # ADD_ADDRs are not affected by allow_join_id0 value. + reset_with_allow_join_id0 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address allow join id0 ns1" 1 1 1 + chk_add_nr 1 1 + + # signal address allow join id0 ns2 + # ADD_ADDRs are not affected by allow_join_id0 value. + reset_with_allow_join_id0 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address allow join id0 ns2" 1 1 1 + chk_add_nr 1 1 + + # subflow and address allow join id0 ns1 + reset_with_allow_join_id0 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and address allow join id0 1" 2 2 2 + + # subflow and address allow join id0 ns2 + reset_with_allow_join_id0 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and address allow join id0 2" 1 1 1 +} + all_tests() { subflows_tests @@ -1387,6 +1543,8 @@ all_tests() backup_tests add_addr_ports_tests syncookies_tests + checksum_tests + deny_join_id0_tests } usage() @@ -1403,7 +1561,10 @@ usage() echo " -b backup_tests" echo " -p add_addr_ports_tests" echo " -k syncookies_tests" + echo " -S checksum_tests" + echo " -d deny_join_id0_tests" echo " -c capture pcap files" + echo " -C enable data checksum" echo " -h help" } @@ -1418,13 +1579,16 @@ make_file "$sin" "server" 1 trap cleanup EXIT for arg in "$@"; do - # check for "capture" arg before launching tests + # check for "capture/checksum" args before launching tests if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then capture=1 fi + if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then + checksum=1 + fi - # exception for the capture option, the rest means: a part of the tests - if [ "${arg}" != "-c" ]; then + # exception for the capture/checksum options, the rest means: a part of the tests + if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then do_all_tests=0 fi done @@ -1434,7 +1598,7 @@ if [ $do_all_tests -eq 1 ]; then exit $ret fi -while getopts 'fsltra64bpkch' opt; do +while getopts 'fsltra64bpkdchCS' opt; do case $opt in f) subflows_tests @@ -1469,8 +1633,16 @@ while getopts 'fsltra64bpkch' opt; do k) syncookies_tests ;; + S) + checksum_tests + ;; + d) + deny_join_id0_tests + ;; c) ;; + C) + ;; h | *) usage ;; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 2fa13946ac04..1579e471a5e7 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -178,7 +178,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS \ ${local_addr} < "$sin" > "$sout" & spid=$! @@ -186,7 +186,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS \ $connect_addr < "$cin" > "$cout" & cpid=$! diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 3aeef3bcb101..fd63ebfe9a2b 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -60,6 +60,8 @@ setup() for i in "$ns1" "$ns2" "$ns3";do ip netns add $i || exit $ksft_skip ip -net $i link set lo up + ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0 + ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0 done ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" @@ -80,7 +82,6 @@ setup() ip netns exec "$ns1" ./pm_nl_ctl limits 1 1 ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow - ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0 ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad diff --git a/tools/testing/selftests/net/so_netns_cookie.c b/tools/testing/selftests/net/so_netns_cookie.c new file mode 100644 index 000000000000..b39e87e967cd --- /dev/null +++ b/tools/testing/selftests/net/so_netns_cookie.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/socket.h> + +#ifndef SO_NETNS_COOKIE +#define SO_NETNS_COOKIE 71 +#endif + +#define pr_err(fmt, ...) \ + ({ \ + fprintf(stderr, "%s:%d:" fmt ": %m\n", \ + __func__, __LINE__, ##__VA_ARGS__); \ + 1; \ + }) + +int main(int argc, char *argvp[]) +{ + uint64_t cookie1, cookie2; + socklen_t vallen; + int sock1, sock2; + + sock1 = socket(AF_INET, SOCK_STREAM, 0); + if (sock1 < 0) + return pr_err("Unable to create TCP socket"); + + vallen = sizeof(cookie1); + if (getsockopt(sock1, SOL_SOCKET, SO_NETNS_COOKIE, &cookie1, &vallen) != 0) + return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)"); + + if (!cookie1) + return pr_err("SO_NETNS_COOKIE returned zero cookie"); + + if (unshare(CLONE_NEWNET)) + return pr_err("unshare"); + + sock2 = socket(AF_INET, SOCK_STREAM, 0); + if (sock2 < 0) + return pr_err("Unable to create TCP socket"); + + vallen = sizeof(cookie2); + if (getsockopt(sock2, SOL_SOCKET, SO_NETNS_COOKIE, &cookie2, &vallen) != 0) + return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)"); + + if (!cookie2) + return pr_err("SO_NETNS_COOKIE returned zero cookie"); + + if (cookie1 == cookie2) + return pr_err("SO_NETNS_COOKIE returned identical cookies for distinct ns"); + + close(sock1); + close(sock2); + return 0; +} diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh new file mode 100755 index 000000000000..75ada17ac061 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -0,0 +1,573 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it> + +# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for +# implementing IPv4/IPv6 L3 VPN use cases. +# +# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and +# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and +# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a +# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic. +# The SRv6 End.DT46 Behavior implementation is meant to support the +# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel. +# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies +# the setup and operations of SRv6 VPNs. +# +# Hereafter a network diagram is shown, where two different tenants (named 100 +# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with +# each other across an IPv6 network. +# +# Only hosts belonging to the same tenant (and to the same VPN) can communicate +# with each other. Instead, the communication among hosts of different tenants +# is forbidden. +# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the +# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected +# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 +# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach +# hs-t200-3 and vice versa. +# +# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DT46 Behavior and c) VRF. +# +# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly +# consider an example where, within the same domain of tenant 100, the host +# hs-t100-1 pings the host hs-t100-2. +# +# First of all, L2 reachability of the host hs-t100-2 is taken into account by +# the router rt-1 which acts as a arp/ndp proxy. +# +# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2, +# the router rt-1 receives the packet on the internal veth-t100 interface. Such +# interface is enslaved to the VRF vrf-100 whose associated table contains the +# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the +# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6) +# core network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and +# performs the lookup on the vrf-100 table using the destination address of +# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the +# host hs-t100-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the roles of rt-1 +# and rt-2 are swapped. +# +# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6 +# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are +# able to connect with each other. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-t100-1 netns | | hs-t100-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | | +# | +-------+-------+ | localsid | | | | localsid | +-------+-------- | +# | | | table | | | | table | | | +# | +----+----+ +----------+ | | +----------+ +----+----+ | +# | | vrf-100 | | | | vrf-100 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | veth0 | | | | veth0 | | +# | | fd00::1/64 |.|...|.| fd00::2/64 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | vrf-200 | | | | vrf-200 | | +# | +----+----+ | | +----+----+ | +# | | | | | | +# | +-------+-------+ | | +-------+-------- | +# | | veth-t200 | | | | veth-t200 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | | +# | +---------------+ rt-1 netns | | rt-2 netns +---------------- | +# | . | | . | +# +-----------------------------------+ +-----------------------------------+ +# . . +# . . +# . . +# . . +# +-------------------+ +-------------------+ +# | . | | . | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::3/64 | | | | cafe::4/64 | | +# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | | +# | +-------------+ | | +-------------+ | +# | | | | +# | hs-t200-3 netns | | hs-t200-4 netns | +# | | | | +# +-------------------+ +-------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table (table 90) +# +--------------------------------------------------+ +# |SID |Action | +# +--------------------------------------------------+ +# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100| +# +--------------------------------------------------+ +# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200| +# +--------------------------------------------------+ +# +# rt-1: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t100 | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t100 | +# +---------------------------------------------------+ +# +# rt-1: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::4 |apply seg6 encap segs fc00:12:200::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t200 | +# +---------------------------------------------------+ +# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t200 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table (table 90) +# +--------------------------------------------------+ +# |SID |Action | +# +--------------------------------------------------+ +# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100| +# +--------------------------------------------------+ +# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200| +# +--------------------------------------------------+ +# +# rt-2: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t100 | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t100 | +# +---------------------------------------------------+ +# +# rt-2: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::3 |apply seg6 encap segs fc00:21:200::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t200 | +# +---------------------------------------------------+ +# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t200 | +# +---------------------------------------------------+ +# + +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fd00 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fc00 +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-t${tid}-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + # configure the VRF for the tenant X on the router which is directly + # connected to the source host. + ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid} + ip -netns ${rtname} link set vrf-${tid} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + # enslave the veth-tX interface to the vrf-X in the access router + ip -netns ${rtname} link set ${rtveth} master vrf-${tid} + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0 + + ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \ + via fd00::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \ + encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid} + + # all sids for VPNs start with a common locator which is fc00::/16. + # Routes for handling the SRv6 End.DT46 behavior instances are grouped + # together in the 'localsid' table. + # + # NOTE: added only once + if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \ + grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then + ip -netns ${rtdst_name} -6 rule add \ + to ${VPN_LOCATOR_SERVICE}::/16 \ + lookup ${LOCALSID_TABLE_ID} prio 999 + fi + + # set default routes to unreachable for both ipv4 and ipv6 + ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \ + vrf vrf-${tid} + + ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \ + vrf vrf-${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 #args: host router tenant + setup_hs 2 2 100 + + # setup two hosts for the tenant 200 + # - host hs-3 is directly connected to the router rt-1; + # - host hs-4 is directly connected to the router rt-2. + setup_hs 3 1 200 + setup_hs 4 2 200 + + # setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host + # hs-t100-2 within the same tenant 100. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 + + # setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host + # hs-t200-4 within the same tenant 200. + setup_vpn_config 3 1 4 2 200 + setup_vpn_config 4 2 3 1 200 +} + +check_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + check_rt_connectivity ${rtsrc} ${rtdst} + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" + + check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" + +} + +check_and_log_hs_isolation() +{ + local hssrc=$1 + local tidsrc=$2 + local hsdst=$3 + local tiddst=$4 + + check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" + + check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" + +} + + +check_and_log_hs2gw_connectivity() +{ + local hssrc=$1 + local tid=$2 + + check_hs_ipv6_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" + + check_hs_ipv4_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" + +} + +router_tests() +{ + log_section "IPv6 routers connectivity test" + + check_and_log_rt_connectivity 1 2 + check_and_log_rt_connectivity 2 1 +} + +host2gateway_tests() +{ + log_section "IPv4/IPv6 connectivity test among hosts and gateway" + + check_and_log_hs2gw_connectivity 1 100 + check_and_log_hs2gw_connectivity 2 100 + + check_and_log_hs2gw_connectivity 3 200 + check_and_log_hs2gw_connectivity 4 200 +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 + + check_and_log_hs_connectivity 3 4 200 + check_and_log_hs_connectivity 4 3 200 +} + +host_vpn_isolation_tests() +{ + local i + local j + local k + local tmp + local l1="1 2" + local l2="3 4" + local t1=100 + local t2=200 + + log_section "SRv6 VPN isolation test among hosts in different tentants" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation ${i} ${t1} ${j} ${t2} + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + tmp=${t1}; t1=${t2}; t2=${tmp} + done +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +modprobe vrf &>/dev/null +if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit 0 +fi + +cleanup &>/dev/null + +setup + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 426d07875a48..112d41d01b12 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -25,6 +25,47 @@ #define TLS_PAYLOAD_MAX_LEN 16384 #define SOL_TLS 282 +struct tls_crypto_info_keys { + union { + struct tls12_crypto_info_aes_gcm_128 aes128; + struct tls12_crypto_info_chacha20_poly1305 chacha20; + }; + size_t len; +}; + +static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, + struct tls_crypto_info_keys *tls12) +{ + memset(tls12, 0, sizeof(*tls12)); + + switch (cipher_type) { + case TLS_CIPHER_CHACHA20_POLY1305: + tls12->len = sizeof(struct tls12_crypto_info_chacha20_poly1305); + tls12->chacha20.info.version = tls_version; + tls12->chacha20.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_AES_GCM_128: + tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_128); + tls12->aes128.info.version = tls_version; + tls12->aes128.info.cipher_type = cipher_type; + break; + default: + break; + } +} + +static void memrnd(void *s, size_t n) +{ + int *dword = s; + char *byte; + + for (; n >= 4; n -= 4) + *dword++ = rand(); + byte = (void *)dword; + while (n--) + *byte++ = rand(); +} + FIXTURE(tls_basic) { int fd, cfd; @@ -133,33 +174,16 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha) FIXTURE_SETUP(tls) { - union { - struct tls12_crypto_info_aes_gcm_128 aes128; - struct tls12_crypto_info_chacha20_poly1305 chacha20; - } tls12; + struct tls_crypto_info_keys tls12; struct sockaddr_in addr; socklen_t len; int sfd, ret; - size_t tls12_sz; self->notls = false; len = sizeof(addr); - memset(&tls12, 0, sizeof(tls12)); - switch (variant->cipher_type) { - case TLS_CIPHER_CHACHA20_POLY1305: - tls12_sz = sizeof(struct tls12_crypto_info_chacha20_poly1305); - tls12.chacha20.info.version = variant->tls_version; - tls12.chacha20.info.cipher_type = variant->cipher_type; - break; - case TLS_CIPHER_AES_GCM_128: - tls12_sz = sizeof(struct tls12_crypto_info_aes_gcm_128); - tls12.aes128.info.version = variant->tls_version; - tls12.aes128.info.cipher_type = variant->cipher_type; - break; - default: - tls12_sz = 0; - } + tls_crypto_info_init(variant->tls_version, variant->cipher_type, + &tls12); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); @@ -187,7 +211,7 @@ FIXTURE_SETUP(tls) if (!self->notls) { ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, - tls12_sz); + tls12.len); ASSERT_EQ(ret, 0); } @@ -200,7 +224,7 @@ FIXTURE_SETUP(tls) ASSERT_EQ(ret, 0); ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, - tls12_sz); + tls12.len); ASSERT_EQ(ret, 0); } @@ -308,6 +332,8 @@ TEST_F(tls, recv_max) char recv_mem[TLS_PAYLOAD_MAX_LEN]; char buf[TLS_PAYLOAD_MAX_LEN]; + memrnd(buf, sizeof(buf)); + EXPECT_GE(send(self->fd, buf, send_len, 0), 0); EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1); EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0); @@ -588,6 +614,8 @@ TEST_F(tls, recvmsg_single_max) struct iovec vec; struct msghdr hdr; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len); vec.iov_base = (char *)recv_mem; vec.iov_len = TLS_PAYLOAD_MAX_LEN; @@ -610,6 +638,8 @@ TEST_F(tls, recvmsg_multiple) struct msghdr hdr; int i; + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len); for (i = 0; i < msg_iovlen; i++) { iov_base[i] = (char *)malloc(iov_len); @@ -634,6 +664,8 @@ TEST_F(tls, single_send_multiple_recv) char send_mem[TLS_PAYLOAD_MAX_LEN * 2]; char recv_mem[TLS_PAYLOAD_MAX_LEN * 2]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0); memset(recv_mem, 0, total_len); @@ -834,18 +866,17 @@ TEST_F(tls, bidir) int ret; if (!self->notls) { - struct tls12_crypto_info_aes_gcm_128 tls12; + struct tls_crypto_info_keys tls12; - memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = variant->tls_version; - tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128; + tls_crypto_info_init(variant->tls_version, variant->cipher_type, + &tls12); ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); + tls12.len); ASSERT_EQ(ret, 0); ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); + tls12.len); ASSERT_EQ(ret, 0); } diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh index dbf0421986df..66354cdd5ce4 100755 --- a/tools/testing/selftests/net/unicast_extensions.sh +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -189,6 +189,15 @@ segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255 route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)" route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)" # +# Test support for lowest address ending in .0 +segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (/24)" +# +# Test support for lowest address not ending in .0 +segmenttest 192.168.101.192 192.168.101.193 26 "assign and ping lowest address (/26)" +# +# Routing using lowest address as a gateway/endpoint +route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address" +# # ============================================== # ==== TESTS THAT CURRENTLY EXPECT FAILURE ===== # ============================================== @@ -202,14 +211,6 @@ segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forb # Currently Linux does not allow this, so this should fail too segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)" # -# Test support for lowest address -# Currently Linux does not allow this, so this should fail too -segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)" -# -# Routing using lowest address as a gateway/endpoint -# Currently Linux does not allow this, so this should fail too -route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)" -# # Test support for unicast use of class D # Currently Linux does not allow this, so this should fail too segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)" diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c index 381d874cce99..d7ec1e7da0d0 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -155,7 +155,7 @@ struct flag_test { int err; }; -#define NUM_OPENAT2_FLAG_TESTS 24 +#define NUM_OPENAT2_FLAG_TESTS 25 void test_openat2_flags(void) { @@ -229,6 +229,11 @@ void test_openat2_flags(void) { .name = "invalid how.resolve and O_PATH", .how.flags = O_PATH, .how.resolve = 0x1337, .err = -EINVAL }, + + /* currently unknown upper 32 bit rejected. */ + { .name = "currently unknown bit (1 << 63)", + .how.flags = O_RDONLY | (1ULL << 63), + .how.resolve = 0, .err = -EINVAL }, }; BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS); diff --git a/tools/testing/selftests/rlimits/.gitignore b/tools/testing/selftests/rlimits/.gitignore new file mode 100644 index 000000000000..091021f255b3 --- /dev/null +++ b/tools/testing/selftests/rlimits/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +rlimits-per-userns diff --git a/tools/testing/selftests/rlimits/Makefile b/tools/testing/selftests/rlimits/Makefile new file mode 100644 index 000000000000..03aadb406212 --- /dev/null +++ b/tools/testing/selftests/rlimits/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g +TEST_GEN_PROGS := rlimits-per-userns + +include ../lib.mk diff --git a/tools/testing/selftests/rlimits/config b/tools/testing/selftests/rlimits/config new file mode 100644 index 000000000000..416bd53ce982 --- /dev/null +++ b/tools/testing/selftests/rlimits/config @@ -0,0 +1 @@ +CONFIG_USER_NS=y diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c new file mode 100644 index 000000000000..26dc949e93ea --- /dev/null +++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Alexey Gladkov <gladkov.alexey@gmail.com> + */ +#define _GNU_SOURCE +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/prctl.h> +#include <sys/stat.h> + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sched.h> +#include <signal.h> +#include <limits.h> +#include <fcntl.h> +#include <errno.h> +#include <err.h> + +#define NR_CHILDS 2 + +static char *service_prog; +static uid_t user = 60000; +static uid_t group = 60000; + +static void setrlimit_nproc(rlim_t n) +{ + pid_t pid = getpid(); + struct rlimit limit = { + .rlim_cur = n, + .rlim_max = n + }; + + warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n); + + if (setrlimit(RLIMIT_NPROC, &limit) < 0) + err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid); +} + +static pid_t fork_child(void) +{ + pid_t pid = fork(); + + if (pid < 0) + err(EXIT_FAILURE, "fork"); + + if (pid > 0) + return pid; + + pid = getpid(); + + warnx("(pid=%d): New process starting ...", pid); + + if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) + err(EXIT_FAILURE, "(pid=%d): prctl(PR_SET_PDEATHSIG)", pid); + + signal(SIGUSR1, SIG_DFL); + + warnx("(pid=%d): Changing to uid=%d, gid=%d", pid, user, group); + + if (setgid(group) < 0) + err(EXIT_FAILURE, "(pid=%d): setgid(%d)", pid, group); + if (setuid(user) < 0) + err(EXIT_FAILURE, "(pid=%d): setuid(%d)", pid, user); + + warnx("(pid=%d): Service running ...", pid); + + warnx("(pid=%d): Unshare user namespace", pid); + if (unshare(CLONE_NEWUSER) < 0) + err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)"); + + char *const argv[] = { "service", NULL }; + char *const envp[] = { "I_AM_SERVICE=1", NULL }; + + warnx("(pid=%d): Executing real service ...", pid); + + execve(service_prog, argv, envp); + err(EXIT_FAILURE, "(pid=%d): execve", pid); +} + +int main(int argc, char **argv) +{ + size_t i; + pid_t child[NR_CHILDS]; + int wstatus[NR_CHILDS]; + int childs = NR_CHILDS; + pid_t pid; + + if (getenv("I_AM_SERVICE")) { + pause(); + exit(EXIT_SUCCESS); + } + + service_prog = argv[0]; + pid = getpid(); + + warnx("(pid=%d) Starting testcase", pid); + + /* + * This rlimit is not a problem for root because it can be exceeded. + */ + setrlimit_nproc(1); + + for (i = 0; i < NR_CHILDS; i++) { + child[i] = fork_child(); + wstatus[i] = 0; + usleep(250000); + } + + while (1) { + for (i = 0; i < NR_CHILDS; i++) { + if (child[i] <= 0) + continue; + + errno = 0; + pid_t ret = waitpid(child[i], &wstatus[i], WNOHANG); + + if (!ret || (!WIFEXITED(wstatus[i]) && !WIFSIGNALED(wstatus[i]))) + continue; + + if (ret < 0 && errno != ECHILD) + warn("(pid=%d): waitpid(%d)", pid, child[i]); + + child[i] *= -1; + childs -= 1; + } + + if (!childs) + break; + + usleep(250000); + + for (i = 0; i < NR_CHILDS; i++) { + if (child[i] <= 0) + continue; + kill(child[i], SIGUSR1); + } + } + + for (i = 0; i < NR_CHILDS; i++) { + if (WIFEXITED(wstatus[i])) + warnx("(pid=%d): pid %d exited, status=%d", + pid, -child[i], WEXITSTATUS(wstatus[i])); + else if (WIFSIGNALED(wstatus[i])) + warnx("(pid=%d): pid %d killed by signal %d", + pid, -child[i], WTERMSIG(wstatus[i])); + + if (WIFSIGNALED(wstatus[i]) && WTERMSIG(wstatus[i]) == SIGUSR1) + continue; + + warnx("(pid=%d): Test failed", pid); + exit(EXIT_FAILURE); + } + + warnx("(pid=%d): Test passed", pid); + exit(EXIT_SUCCESS); +} diff --git a/tools/testing/selftests/sched/.gitignore b/tools/testing/selftests/sched/.gitignore new file mode 100644 index 000000000000..6996d4654d92 --- /dev/null +++ b/tools/testing/selftests/sched/.gitignore @@ -0,0 +1 @@ +cs_prctl_test diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile new file mode 100644 index 000000000000..10c72f14fea9 --- /dev/null +++ b/tools/testing/selftests/sched/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0+ + +ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) +CLANG_FLAGS += -no-integrated-as +endif + +CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -Wl,-rpath=./ \ + $(CLANG_FLAGS) +LDLIBS += -lpthread + +TEST_GEN_FILES := cs_prctl_test +TEST_PROGS := cs_prctl_test + +include ../lib.mk diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config new file mode 100644 index 000000000000..e8b09aa7c0c4 --- /dev/null +++ b/tools/testing/selftests/sched/config @@ -0,0 +1 @@ +CONFIG_SCHED_DEBUG=y diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c new file mode 100644 index 000000000000..63fe6521c56d --- /dev/null +++ b/tools/testing/selftests/sched/cs_prctl_test.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Use the core scheduling prctl() to test core scheduling cookies control. + * + * Copyright (c) 2021 Oracle and/or its affiliates. + * Author: Chris Hyser <chris.hyser@oracle.com> + * + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License as + * published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, see <http://www.gnu.org/licenses>. + */ + +#define _GNU_SOURCE +#include <sys/eventfd.h> +#include <sys/wait.h> +#include <sys/types.h> +#include <sched.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if __GLIBC_PREREQ(2, 30) == 0 +#include <sys/syscall.h> +static pid_t gettid(void) +{ + return syscall(SYS_gettid); +} +#endif + +#ifndef PR_SCHED_CORE +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 +#endif + +#define MAX_PROCESSES 128 +#define MAX_THREADS 128 + +static const char USAGE[] = "cs_prctl_test [options]\n" +" options:\n" +" -P : number of processes to create.\n" +" -T : number of threads per process to create.\n" +" -d : delay time to keep tasks alive.\n" +" -k : keep tasks alive until keypress.\n"; + +enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID}; + +const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES; + +static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, + unsigned long arg5) +{ + int res; + + res = prctl(option, arg2, arg3, arg4, arg5); + printf("%d = prctl(%d, %ld, %ld, %ld, %lx)\n", res, option, (long)arg2, (long)arg3, + (long)arg4, arg5); + return res; +} + +#define STACK_SIZE (1024 * 1024) + +#define handle_error(msg) __handle_error(__FILE__, __LINE__, msg) +static void __handle_error(char *fn, int ln, char *msg) +{ + printf("(%s:%d) - ", fn, ln); + perror(msg); + exit(EXIT_FAILURE); +} + +static void handle_usage(int rc, char *msg) +{ + puts(USAGE); + puts(msg); + putchar('\n'); + exit(rc); +} + +static unsigned long get_cs_cookie(int pid) +{ + unsigned long long cookie; + int ret; + + ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid, PIDTYPE_PID, + (unsigned long)&cookie); + if (ret) { + printf("Not a core sched system\n"); + return -1UL; + } + + return cookie; +} + +struct child_args { + int num_threads; + int pfd[2]; + int cpid; + int thr_tids[MAX_THREADS]; +}; + +static int child_func_thread(void __attribute__((unused))*arg) +{ + while (1) + usleep(20000); + return 0; +} + +static void create_threads(int num_threads, int thr_tids[]) +{ + void *child_stack; + pid_t tid; + int i; + + for (i = 0; i < num_threads; ++i) { + child_stack = malloc(STACK_SIZE); + if (!child_stack) + handle_error("child stack allocate"); + + tid = clone(child_func_thread, child_stack + STACK_SIZE, THREAD_CLONE_FLAGS, NULL); + if (tid == -1) + handle_error("clone thread"); + thr_tids[i] = tid; + } +} + +static int child_func_process(void *arg) +{ + struct child_args *ca = (struct child_args *)arg; + + close(ca->pfd[0]); + + create_threads(ca->num_threads, ca->thr_tids); + + write(ca->pfd[1], &ca->thr_tids, sizeof(int) * ca->num_threads); + close(ca->pfd[1]); + + while (1) + usleep(20000); + return 0; +} + +static unsigned char child_func_process_stack[STACK_SIZE]; + +void create_processes(int num_processes, int num_threads, struct child_args proc[]) +{ + pid_t cpid; + int i; + + for (i = 0; i < num_processes; ++i) { + proc[i].num_threads = num_threads; + + if (pipe(proc[i].pfd) == -1) + handle_error("pipe() failed"); + + cpid = clone(child_func_process, child_func_process_stack + STACK_SIZE, + SIGCHLD, &proc[i]); + proc[i].cpid = cpid; + close(proc[i].pfd[1]); + } + + for (i = 0; i < num_processes; ++i) { + read(proc[i].pfd[0], &proc[i].thr_tids, sizeof(int) * proc[i].num_threads); + close(proc[i].pfd[0]); + } +} + +void disp_processes(int num_processes, struct child_args proc[]) +{ + int i, j; + + printf("tid=%d, / tgid=%d / pgid=%d: %lx\n", gettid(), getpid(), getpgid(0), + get_cs_cookie(getpid())); + + for (i = 0; i < num_processes; ++i) { + printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].cpid, proc[i].cpid, + getpgid(proc[i].cpid), get_cs_cookie(proc[i].cpid)); + for (j = 0; j < proc[i].num_threads; ++j) { + printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].thr_tids[j], + proc[i].cpid, getpgid(0), get_cs_cookie(proc[i].thr_tids[j])); + } + } + puts("\n"); +} + +static int errors; + +#define validate(v) _validate(__LINE__, v, #v) +void _validate(int line, int val, char *msg) +{ + if (!val) { + ++errors; + printf("(%d) FAILED: %s\n", line, msg); + } else { + printf("(%d) PASSED: %s\n", line, msg); + } +} + +int main(int argc, char *argv[]) +{ + struct child_args procs[MAX_PROCESSES]; + + int keypress = 0; + int num_processes = 2; + int num_threads = 3; + int delay = 0; + int res = 0; + int pidx; + int pid; + int opt; + + while ((opt = getopt(argc, argv, ":hkT:P:d:")) != -1) { + switch (opt) { + case 'P': + num_processes = (int)strtol(optarg, NULL, 10); + break; + case 'T': + num_threads = (int)strtoul(optarg, NULL, 10); + break; + case 'd': + delay = (int)strtol(optarg, NULL, 10); + break; + case 'k': + keypress = 1; + break; + case 'h': + printf(USAGE); + exit(EXIT_SUCCESS); + default: + handle_usage(20, "unknown option"); + } + } + + if (num_processes < 1 || num_processes > MAX_PROCESSES) + handle_usage(1, "Bad processes value"); + + if (num_threads < 1 || num_threads > MAX_THREADS) + handle_usage(2, "Bad thread value"); + + if (keypress) + delay = -1; + + srand(time(NULL)); + + /* put into separate process group */ + if (setpgid(0, 0) != 0) + handle_error("process group"); + + printf("\n## Create a thread/process/process group hiearchy\n"); + create_processes(num_processes, num_threads, procs); + disp_processes(num_processes, procs); + validate(get_cs_cookie(0) == 0); + + printf("\n## Set a cookie on entire process group\n"); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, 0, PIDTYPE_PGID, 0) < 0) + handle_error("core_sched create failed -- PGID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) != 0); + + /* get a random process pid */ + pidx = rand() % num_processes; + pid = procs[pidx].cpid; + + validate(get_cs_cookie(0) == get_cs_cookie(pid)); + validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0])); + + printf("\n## Set a new cookie on entire process/TGID [%d]\n", pid); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid, PIDTYPE_TGID, 0) < 0) + handle_error("core_sched create failed -- TGID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) != get_cs_cookie(pid)); + validate(get_cs_cookie(pid) != 0); + validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0])); + + printf("\n## Copy the cookie of current/PGID[%d], to pid [%d] as PIDTYPE_PID\n", + getpid(), pid); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, pid, PIDTYPE_PID, 0) < 0) + handle_error("core_sched share to itself failed -- PID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) == get_cs_cookie(pid)); + validate(get_cs_cookie(pid) != 0); + validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0])); + + printf("\n## Copy cookie from a thread [%d] to current/PGID [%d] as PIDTYPE_PID\n", + procs[pidx].thr_tids[0], getpid()); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, procs[pidx].thr_tids[0], + PIDTYPE_PID, 0) < 0) + handle_error("core_sched share from thread failed -- PID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0])); + validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0])); + + printf("\n## Copy cookie from current [%d] to current as pidtype PGID\n", getpid()); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, 0, PIDTYPE_PGID, 0) < 0) + handle_error("core_sched share to self failed -- PGID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) == get_cs_cookie(pid)); + validate(get_cs_cookie(pid) != 0); + validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0])); + + if (errors) { + printf("TESTS FAILED. errors: %d\n", errors); + res = 10; + } else { + printf("SUCCESS !!!\n"); + } + + if (keypress) + getchar(); + else + sleep(delay); + + for (pidx = 0; pidx < num_processes; ++pidx) + kill(procs[pidx].cpid, 15); + + return res; +} diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index fcc806585266..6e5102a7d7c9 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -143,9 +143,15 @@ int main(int argc, char *argv[]) unsigned long long native, filter1, filter2, bitmap1, bitmap2; unsigned long long entry, per_filter1, per_filter2; + setbuf(stdout, NULL); + + printf("Running on:\n"); + system("uname -a"); + printf("Current BPF sysctl settings:\n"); - system("sysctl net.core.bpf_jit_enable"); - system("sysctl net.core.bpf_jit_harden"); + /* Avoid using "sysctl" which may not be installed. */ + system("grep -H . /proc/sys/net/core/bpf_jit_enable"); + system("grep -H . /proc/sys/net/core/bpf_jit_harden"); if (argc > 1) samples = strtoull(argv[1], NULL, 0); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e3d5c77a8612..1d64891e6492 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -235,6 +235,10 @@ struct seccomp_notif_addfd { }; #endif +#ifndef SECCOMP_ADDFD_FLAG_SEND +#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ +#endif + struct seccomp_notif_addfd_small { __u64 id; char weird[4]; @@ -3959,7 +3963,7 @@ TEST(user_notification_addfd) { pid_t pid; long ret; - int status, listener, memfd, fd; + int status, listener, memfd, fd, nextfd; struct seccomp_notif_addfd addfd = {}; struct seccomp_notif_addfd_small small = {}; struct seccomp_notif_addfd_big big = {}; @@ -3968,25 +3972,34 @@ TEST(user_notification_addfd) /* 100 ms */ struct timespec delay = { .tv_nsec = 100000000 }; + /* There may be arbitrary already-open fds at test start. */ memfd = memfd_create("test", 0); ASSERT_GE(memfd, 0); + nextfd = memfd + 1; ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } + /* fd: 4 */ /* Check that the basic notification machinery works */ listener = user_notif_syscall(__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER); - ASSERT_GE(listener, 0); + ASSERT_EQ(listener, nextfd++); pid = fork(); ASSERT_GE(pid, 0); if (pid == 0) { + /* fds will be added and this value is expected */ if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) exit(1); + + /* Atomic addfd+send is received here. Check it is a valid fd */ + if (fcntl(syscall(__NR_getppid), F_GETFD) == -1) + exit(1); + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); } @@ -4028,14 +4041,14 @@ TEST(user_notification_addfd) /* Verify we can set an arbitrary remote fd */ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); - EXPECT_GE(fd, 0); + EXPECT_EQ(fd, nextfd++); EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); /* Verify we can set an arbitrary remote fd with large size */ memset(&big, 0x0, sizeof(big)); big.addfd = addfd; fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); - EXPECT_GE(fd, 0); + EXPECT_EQ(fd, nextfd++); /* Verify we can set a specific remote fd */ addfd.newfd = 42; @@ -4065,6 +4078,32 @@ TEST(user_notification_addfd) ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); ASSERT_EQ(addfd.id, req.id); + /* Verify we can do an atomic addfd and send */ + addfd.newfd = 0; + addfd.flags = SECCOMP_ADDFD_FLAG_SEND; + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + /* + * Child has earlier "low" fds and now 42, so we expect the next + * lowest available fd to be assigned here. + */ + EXPECT_EQ(fd, nextfd++); + EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + + /* + * This sets the ID of the ADD FD to the last request plus 1. The + * notification ID increments 1 per notification. + */ + addfd.id = req.id + 1; + + /* This spins until the underlying notification is generated */ + while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && + errno != -EINPROGRESS) + nanosleep(&delay, NULL); + + memset(&req, 0, sizeof(req)); + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + ASSERT_EQ(addfd.id, req.id); + resp.id = req.id; resp.error = 0; resp.val = USER_NOTIF_MAGIC; @@ -4125,6 +4164,10 @@ TEST(user_notification_addfd_rlimit) EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); EXPECT_EQ(errno, EMFILE); + addfd.flags = SECCOMP_ADDFD_FLAG_SEND; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EMFILE); + addfd.newfd = 100; addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); diff --git a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py index 229ee185b27e..254136e3da5a 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py @@ -29,22 +29,26 @@ class SubPlugin(TdcPlugin): return # Check for required fields - scapyinfo = self.args.caseinfo['scapy'] - scapy_keys = ['iface', 'count', 'packet'] - missing_keys = [] - keyfail = False - for k in scapy_keys: - if k not in scapyinfo: - keyfail = True - missing_keys.add(k) - if keyfail: - print('{}: Scapy block present in the test, but is missing info:' - .format(self.sub_class)) - print('{}'.format(missing_keys)) - - pkt = eval(scapyinfo['packet']) - if '$' in scapyinfo['iface']: - tpl = Template(scapyinfo['iface']) - scapyinfo['iface'] = tpl.safe_substitute(NAMES) - for count in range(scapyinfo['count']): - sendp(pkt, iface=scapyinfo['iface']) + lscapyinfo = self.args.caseinfo['scapy'] + if type(lscapyinfo) != list: + lscapyinfo = [ lscapyinfo, ] + + for scapyinfo in lscapyinfo: + scapy_keys = ['iface', 'count', 'packet'] + missing_keys = [] + keyfail = False + for k in scapy_keys: + if k not in scapyinfo: + keyfail = True + missing_keys.append(k) + if keyfail: + print('{}: Scapy block present in the test, but is missing info:' + .format(self.sub_class)) + print('{}'.format(missing_keys)) + + pkt = eval(scapyinfo['packet']) + if '$' in scapyinfo['iface']: + tpl = Template(scapyinfo['iface']) + scapyinfo['iface'] = tpl.safe_substitute(NAMES) + for count in range(scapyinfo['count']): + sendp(pkt, iface=scapyinfo['iface']) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json index 4202e95e27b9..bd843ab00a58 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json @@ -406,5 +406,50 @@ "teardown": [ "$TC actions flush action ct" ] + }, + { + "id": "3992", + "name": "Add ct action triggering DNAT tuple conflict", + "category": [ + "actions", + "ct", + "scapy" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + [ + "$TC qdisc del dev $DEV1 ingress", + 0, + 1, + 2, + 255 + ], + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 flower ct_state -trk action ct commit nat dst addr 20.0.0.1 port 10 pipe action drop", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.10')/TCP(sport=5000,dport=10)" + }, + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "expExitCode": "0", + "verifyCmd": "cat /proc/net/nf_conntrack", + "matchPattern": "dst=10.0.0.20", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json index 41d783254b08..2aad4caa8581 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json @@ -446,6 +446,30 @@ "teardown": [] }, { + "id": "ba5b", + "name": "Add vlan modify action for protocol 802.1Q setting priority 0", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 priority 0 index 100", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 100", + "matchPattern": "action order [0-9]+: vlan.*modify id 100 priority 0 protocol 802.1Q pipe.*index 100 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { "id": "6812", "name": "Add vlan modify action for protocol 802.1Q", "category": [ @@ -463,7 +487,7 @@ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100", "expExitCode": "0", "verifyCmd": "$TC actions get action vlan index 100", - "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref", + "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q pipe.*index 100 ref", "matchCount": "0", "teardown": [ "$TC actions flush action vlan" @@ -487,7 +511,7 @@ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action vlan index 12", - "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref", + "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad reclassify.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action vlan" diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c index d6b09cb1aa2c..991591718bb0 100644 --- a/tools/testing/selftests/x86/syscall_numbering.c +++ b/tools/testing/selftests/x86/syscall_numbering.c @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args + * syscall_numbering.c - test calling the x86-64 kernel with various + * valid and invalid system call numbers. + * * Copyright (c) 2018 Andrew Lutomirski */ @@ -11,79 +13,470 @@ #include <stdbool.h> #include <errno.h> #include <unistd.h> -#include <syscall.h> +#include <string.h> +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <sysexits.h> -static int nerrs; +#include <sys/ptrace.h> +#include <sys/user.h> +#include <sys/wait.h> +#include <sys/mman.h> -#define X32_BIT 0x40000000UL +#include <linux/ptrace.h> -static void check_enosys(unsigned long nr, bool *ok) +/* Common system call numbers */ +#define SYS_READ 0 +#define SYS_WRITE 1 +#define SYS_GETPID 39 +/* x64-only system call numbers */ +#define X64_IOCTL 16 +#define X64_READV 19 +#define X64_WRITEV 20 +/* x32-only system call numbers (without X32_BIT) */ +#define X32_IOCTL 514 +#define X32_READV 515 +#define X32_WRITEV 516 + +#define X32_BIT 0x40000000 + +static int nullfd = -1; /* File descriptor for /dev/null */ +static bool with_x32; /* x32 supported on this kernel? */ + +enum ptrace_pass { + PTP_NOTHING, + PTP_GETREGS, + PTP_WRITEBACK, + PTP_FUZZRET, + PTP_FUZZHIGH, + PTP_INTNUM, + PTP_DONE +}; + +static const char * const ptrace_pass_name[] = { - /* If this fails, a segfault is reasonably likely. */ - fflush(stdout); + [PTP_NOTHING] = "just stop, no data read", + [PTP_GETREGS] = "only getregs", + [PTP_WRITEBACK] = "getregs, unmodified setregs", + [PTP_FUZZRET] = "modifying the default return", + [PTP_FUZZHIGH] = "clobbering the top 32 bits", + [PTP_INTNUM] = "sign-extending the syscall number", +}; - long ret = syscall(nr, 0, 0, 0, 0, 0, 0); - if (ret == 0) { - printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr); - *ok = false; - } else if (errno != ENOSYS) { - printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno); - *ok = false; - } +/* + * Shared memory block between tracer and test + */ +struct shared { + unsigned int nerr; /* Total error count */ + unsigned int indent; /* Message indentation level */ + enum ptrace_pass ptrace_pass; + bool probing_syscall; /* In probe_syscall() */ +}; +static volatile struct shared *sh; + +static inline unsigned int offset(void) +{ + unsigned int level = sh ? sh->indent : 0; + + return 8 + level * 4; } -static void test_x32_without_x32_bit(void) +#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \ + ## __VA_ARGS__) + +#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__) +#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__) +#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__) + +#define fail(fmt, ...) \ + do { \ + msg(FAIL, fmt, ## __VA_ARGS__); \ + sh->nerr++; \ + } while (0) + +#define crit(fmt, ...) \ + do { \ + sh->indent = 0; \ + msg(FAIL, fmt, ## __VA_ARGS__); \ + msg(SKIP, "Unable to run test\n"); \ + exit(EX_OSERR); \ + } while (0) + +/* Sentinel for ptrace-modified return value */ +#define MODIFIED_BY_PTRACE -9999 + +/* + * Directly invokes the given syscall with nullfd as the first argument + * and the rest zero. Avoids involving glibc wrappers in case they ever + * end up intercepting some system calls for some reason, or modify + * the system call number itself. + */ +static long long probe_syscall(int msb, int lsb) { - bool ok = true; + register long long arg1 asm("rdi") = nullfd; + register long long arg2 asm("rsi") = 0; + register long long arg3 asm("rdx") = 0; + register long long arg4 asm("r10") = 0; + register long long arg5 asm("r8") = 0; + register long long arg6 asm("r9") = 0; + long long nr = ((long long)msb << 32) | (unsigned int)lsb; + long long ret; /* - * Syscalls 512-547 are "x32" syscalls. They are intended to be - * called with the x32 (0x40000000) bit set. Calling them without - * the x32 bit set is nonsense and should not work. + * We pass in an extra copy of the extended system call number + * in %rbx, so we can examine it from the ptrace handler without + * worrying about it being possibly modified. This is to test + * the validity of struct user regs.orig_rax a.k.a. + * struct pt_regs.orig_ax. */ - printf("[RUN]\tChecking syscalls 512-547\n"); - for (int i = 512; i <= 547; i++) - check_enosys(i, &ok); + sh->probing_syscall = true; + asm volatile("syscall" + : "=a" (ret) + : "a" (nr), "b" (nr), + "r" (arg1), "r" (arg2), "r" (arg3), + "r" (arg4), "r" (arg5), "r" (arg6) + : "rcx", "r11", "memory", "cc"); + sh->probing_syscall = false; + + return ret; +} + +static const char *syscall_str(int msb, int start, int end) +{ + static char buf[64]; + const char * const type = (start & X32_BIT) ? "x32" : "x64"; + int lsb = start; /* - * Check that a handful of 64-bit-only syscalls are rejected if the x32 - * bit is set. + * Improve readability by stripping the x32 bit, but round + * toward zero so we don't display -1 as -1073741825. */ - printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n"); - check_enosys(16 | X32_BIT, &ok); /* ioctl */ - check_enosys(19 | X32_BIT, &ok); /* readv */ - check_enosys(20 | X32_BIT, &ok); /* writev */ + if (lsb < 0) + lsb |= X32_BIT; + else + lsb &= ~X32_BIT; + + if (start == end) + snprintf(buf, sizeof buf, "%s syscall %d:%d", + type, msb, lsb); + else + snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d", + type, msb, lsb, lsb + (end-start)); + + return buf; +} + +static unsigned int _check_for(int msb, int start, int end, long long expect, + const char *expect_str) +{ + unsigned int err = 0; + + sh->indent++; + if (start != end) + sh->indent++; + + for (int nr = start; nr <= end; nr++) { + long long ret = probe_syscall(msb, nr); + + if (ret != expect) { + fail("%s returned %lld, but it should have returned %s\n", + syscall_str(msb, nr, nr), + ret, expect_str); + err++; + } + } + + if (start != end) + sh->indent--; + + if (err) { + if (start != end) + fail("%s had %u failure%s\n", + syscall_str(msb, start, end), + err, err == 1 ? "s" : ""); + } else { + ok("%s returned %s as expected\n", + syscall_str(msb, start, end), expect_str); + } + + sh->indent--; + + return err; +} + +#define check_for(msb,start,end,expect) \ + _check_for(msb,start,end,expect,#expect) + +static bool check_zero(int msb, int nr) +{ + return check_for(msb, nr, nr, 0); +} + +static bool check_enosys(int msb, int nr) +{ + return check_for(msb, nr, nr, -ENOSYS); +} + +/* + * Anyone diagnosing a failure will want to know whether the kernel + * supports x32. Tell them. This can also be used to conditionalize + * tests based on existence or nonexistence of x32. + */ +static bool test_x32(void) +{ + long long ret; + pid_t mypid = getpid(); + + run("Checking for x32 by calling x32 getpid()\n"); + ret = probe_syscall(0, SYS_GETPID | X32_BIT); + + sh->indent++; + if (ret == mypid) { + info("x32 is supported\n"); + with_x32 = true; + } else if (ret == -ENOSYS) { + info("x32 is not supported\n"); + with_x32 = false; + } else { + fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid); + with_x32 = false; + } + sh->indent--; + return with_x32; +} + +static void test_syscalls_common(int msb) +{ + enum ptrace_pass pass = sh->ptrace_pass; + + run("Checking some common syscalls as 64 bit\n"); + check_zero(msb, SYS_READ); + check_zero(msb, SYS_WRITE); + + run("Checking some 64-bit only syscalls as 64 bit\n"); + check_zero(msb, X64_READV); + check_zero(msb, X64_WRITEV); + + run("Checking out of range system calls\n"); + check_for(msb, -64, -2, -ENOSYS); + if (pass >= PTP_FUZZRET) + check_for(msb, -1, -1, MODIFIED_BY_PTRACE); + else + check_for(msb, -1, -1, -ENOSYS); + check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS); + check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS); + check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS); +} +static void test_syscalls_with_x32(int msb) +{ /* - * Check some syscalls with high bits set. + * Syscalls 512-547 are "x32" syscalls. They are + * intended to be called with the x32 (0x40000000) bit + * set. Calling them without the x32 bit set is + * nonsense and should not work. */ - printf("[RUN]\tChecking numbers above 2^32-1\n"); - check_enosys((1UL << 32), &ok); - check_enosys(X32_BIT | (1UL << 32), &ok); + run("Checking x32 syscalls as 64 bit\n"); + check_for(msb, 512, 547, -ENOSYS); - if (!ok) - nerrs++; - else - printf("[OK]\tThey all returned -ENOSYS\n"); + run("Checking some common syscalls as x32\n"); + check_zero(msb, SYS_READ | X32_BIT); + check_zero(msb, SYS_WRITE | X32_BIT); + + run("Checking some x32 syscalls as x32\n"); + check_zero(msb, X32_READV | X32_BIT); + check_zero(msb, X32_WRITEV | X32_BIT); + + run("Checking some 64-bit syscalls as x32\n"); + check_enosys(msb, X64_IOCTL | X32_BIT); + check_enosys(msb, X64_READV | X32_BIT); + check_enosys(msb, X64_WRITEV | X32_BIT); } -int main() +static void test_syscalls_without_x32(int msb) { + run("Checking for absence of x32 system calls\n"); + check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS); +} + +static void test_syscall_numbering(void) +{ + static const int msbs[] = { + 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX, + INT_MIN, INT_MIN+1 + }; + + sh->indent++; + /* - * Anyone diagnosing a failure will want to know whether the kernel - * supports x32. Tell them. + * The MSB is supposed to be ignored, so we loop over a few + * to test that out. */ - printf("\tChecking for x32..."); - fflush(stdout); - if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) { - printf(" supported\n"); - } else if (errno == ENOSYS) { - printf(" not supported\n"); + for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) { + int msb = msbs[i]; + run("Checking system calls with msb = %d (0x%x)\n", + msb, msb); + + sh->indent++; + + test_syscalls_common(msb); + if (with_x32) + test_syscalls_with_x32(msb); + else + test_syscalls_without_x32(msb); + + sh->indent--; + } + + sh->indent--; +} + +static void syscall_numbering_tracee(void) +{ + enum ptrace_pass pass; + + if (ptrace(PTRACE_TRACEME, 0, 0, 0)) { + crit("Failed to request tracing\n"); + return; + } + raise(SIGSTOP); + + for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE; + sh->ptrace_pass = ++pass) { + run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]); + test_syscall_numbering(); + } +} + +static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass) +{ + struct user_regs_struct regs; + + sh->probing_syscall = false; /* Do this on entry only */ + + /* For these, don't even getregs */ + if (pass == PTP_NOTHING || pass == PTP_DONE) + return; + + ptrace(PTRACE_GETREGS, testpid, NULL, ®s); + + if (regs.orig_rax != regs.rbx) { + fail("orig_rax %#llx doesn't match syscall number %#llx\n", + (unsigned long long)regs.orig_rax, + (unsigned long long)regs.rbx); + } + + switch (pass) { + case PTP_GETREGS: + /* Just read, no writeback */ + return; + case PTP_WRITEBACK: + /* Write back the same register state verbatim */ + break; + case PTP_FUZZRET: + regs.rax = MODIFIED_BY_PTRACE; + break; + case PTP_FUZZHIGH: + regs.rax = MODIFIED_BY_PTRACE; + regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL; + break; + case PTP_INTNUM: + regs.rax = MODIFIED_BY_PTRACE; + regs.orig_rax = (int)regs.orig_rax; + break; + default: + crit("invalid ptrace_pass\n"); + break; + } + + ptrace(PTRACE_SETREGS, testpid, NULL, ®s); +} + +static void syscall_numbering_tracer(pid_t testpid) +{ + int wstatus; + + do { + pid_t wpid = waitpid(testpid, &wstatus, 0); + if (wpid < 0 && errno != EINTR) + break; + if (wpid != testpid) + continue; + if (!WIFSTOPPED(wstatus)) + break; /* Thread exited? */ + + if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP) + mess_with_syscall(testpid, sh->ptrace_pass); + } while (sh->ptrace_pass != PTP_DONE && + !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL)); + + ptrace(PTRACE_DETACH, testpid, NULL, NULL); + + /* Wait for the child process to terminate */ + while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus)) + /* wait some more */; +} + +static void test_traced_syscall_numbering(void) +{ + pid_t testpid; + + /* Launch the test thread; this thread continues as the tracer thread */ + testpid = fork(); + + if (testpid < 0) { + crit("Unable to launch tracer process\n"); + } else if (testpid == 0) { + syscall_numbering_tracee(); + _exit(0); } else { - printf(" confused\n"); + syscall_numbering_tracer(testpid); } +} - test_x32_without_x32_bit(); +int main(void) +{ + unsigned int nerr; - return nerrs ? 1 : 0; + /* + * It is quite likely to get a segfault on a failure, so make + * sure the message gets out by setting stdout to nonbuffered. + */ + setvbuf(stdout, NULL, _IONBF, 0); + + /* + * Harmless file descriptor to work on... + */ + nullfd = open("/dev/null", O_RDWR); + if (nullfd < 0) { + crit("Unable to open /dev/null: %s\n", strerror(errno)); + } + + /* + * Set up a block of shared memory... + */ + sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_SHARED, 0, 0); + if (sh == MAP_FAILED) { + crit("Unable to allocated shared memory block: %s\n", + strerror(errno)); + } + + with_x32 = test_x32(); + + run("Running tests without ptrace...\n"); + test_syscall_numbering(); + + test_traced_syscall_numbering(); + + nerr = sh->nerr; + if (!nerr) { + ok("All system calls succeeded or failed as expected\n"); + return 0; + } else { + fail("A total of %u system call%s had incorrect behavior\n", + nerr, nerr != 1 ? "s" : ""); + return 1; + } } |