diff options
Diffstat (limited to 'tools/testing/selftests')
140 files changed, 10872 insertions, 1625 deletions
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index c30079c86998..1bb204cee853 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -30,8 +30,6 @@ test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user test_sysctl -test_hashmap -test_btf_dump test_current_pid_tgid_new_ns xdping test_cpp @@ -39,4 +37,5 @@ test_cpp /no_alu32 /bpf_gcc /tools - +/runqslower +/bench diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 7729892e0b04..22aaec74ea0a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -2,6 +2,8 @@ include ../../../../scripts/Kbuild.include include ../../../scripts/Makefile.arch +CXX ?= $(CROSS_COMPILE)g++ + CURDIR := $(abspath .) TOOLSDIR := $(abspath ../../..) LIBDIR := $(TOOLSDIR)/lib @@ -20,9 +22,10 @@ CLANG ?= clang LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) -CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) \ - -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \ - -I$(APIDIR) \ +SAN_CFLAGS ?= +CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \ + -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ + -I$(TOOLSINCDIR) -I$(APIDIR) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread @@ -32,7 +35,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ - test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ + test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \ test_progs-no_alu32 \ test_current_pid_tgid_new_ns @@ -76,7 +79,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ - test_lirc_mode2_user xdping test_cpp runqslower + test_lirc_mode2_user xdping test_cpp runqslower bench TEST_CUSTOM_PROGS = urandom_read @@ -141,7 +144,8 @@ VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(OUTPUT)/runqslower: $(BPFOBJ) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ - BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) + BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ + cp $(SCRATCH_DIR)/runqslower $@ $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ) @@ -241,7 +245,7 @@ define GCC_BPF_BUILD_RULE $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef -SKEL_BLACKLIST := btf__% test_pinning_invalid.c +SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. @@ -263,6 +267,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS) TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ $$(filter-out $(SKEL_BLACKLIST), \ $$(TRUNNER_BPF_SRCS))) +TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) # Evaluate rules now with extra TRUNNER_XXX variables above already defined $$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2)) @@ -323,7 +328,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) - cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) + cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ @@ -352,6 +357,7 @@ endef TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ + network_helpers.c testing_helpers.c \ flow_dissector_load.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(wildcard progs/btf_dump_test_case_*.c) @@ -403,6 +409,24 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ +# Benchmark runner +$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h + $(call msg,CC,,$@) + $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ +$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h +$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h +$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ + $(OUTPUT)/perfbuf_bench.skel.h +$(OUTPUT)/bench.o: bench.h testing_helpers.h +$(OUTPUT)/bench: LDLIBS += -lm +$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ + $(OUTPUT)/bench_count.o \ + $(OUTPUT)/bench_rename.o \ + $(OUTPUT)/bench_trigger.o \ + $(OUTPUT)/bench_ringbufs.o + $(call msg,BINARY,,$@) + $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) + EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature \ diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst new file mode 100644 index 000000000000..e885d351595f --- /dev/null +++ b/tools/testing/selftests/bpf/README.rst @@ -0,0 +1,45 @@ +================== +BPF Selftest Notes +================== +General instructions on running selftests can be found in +`Documentation/bpf/bpf_devel_QA.rst`_. + +Additional information about selftest failures are +documented here. + +bpf_iter test failures with clang/llvm 10.0.0 +============================================= + +With clang/llvm 10.0.0, the following two bpf_iter tests failed: + * ``bpf_iter/ipv6_route`` + * ``bpf_iter/netlink`` + +The symptom for ``bpf_iter/ipv6_route`` looks like + +.. code-block:: c + + 2: (79) r8 = *(u64 *)(r1 +8) + ... + 14: (bf) r2 = r8 + 15: (0f) r2 += r1 + ; BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); + 16: (7b) *(u64 *)(r8 +64) = r2 + only read is supported + +The symptom for ``bpf_iter/netlink`` looks like + +.. code-block:: c + + ; struct netlink_sock *nlk = ctx->sk; + 2: (79) r7 = *(u64 *)(r1 +8) + ... + 15: (bf) r2 = r7 + 16: (0f) r2 += r1 + ; BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol); + 17: (7b) *(u64 *)(r7 +0) = r2 + only read is supported + +This is due to a llvm BPF backend bug. The fix + https://reviews.llvm.org/D78466 +has been pushed to llvm 10.x release branch and will be +available in 10.0.1. The fix is available in llvm 11.0.0 trunk. diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c new file mode 100644 index 000000000000..944ad4721c83 --- /dev/null +++ b/tools/testing/selftests/bpf/bench.c @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include <argp.h> +#include <linux/compiler.h> +#include <sys/time.h> +#include <sched.h> +#include <fcntl.h> +#include <pthread.h> +#include <sys/sysinfo.h> +#include <sys/resource.h> +#include <signal.h> +#include "bench.h" +#include "testing_helpers.h" + +struct env env = { + .warmup_sec = 1, + .duration_sec = 5, + .affinity = false, + .consumer_cnt = 1, + .producer_cnt = 1, +}; + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static int bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +void setup_libbpf() +{ + int err; + + libbpf_set_print(libbpf_print_fn); + + err = bump_memlock_rlimit(); + if (err) + fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); +} + +void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double hits_per_sec, drops_per_sec; + double hits_per_prod; + + hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0); + hits_per_prod = hits_per_sec / env.producer_cnt; + drops_per_sec = res->drops / 1000000.0 / (delta_ns / 1000000000.0); + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + + printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n", + hits_per_sec, hits_per_prod, drops_per_sec); +} + +void hits_drops_report_final(struct bench_res res[], int res_cnt) +{ + int i; + double hits_mean = 0.0, drops_mean = 0.0; + double hits_stddev = 0.0, drops_stddev = 0.0; + + for (i = 0; i < res_cnt; i++) { + hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); + drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt); + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + hits_stddev += (hits_mean - res[i].hits / 1000000.0) * + (hits_mean - res[i].hits / 1000000.0) / + (res_cnt - 1.0); + drops_stddev += (drops_mean - res[i].drops / 1000000.0) * + (drops_mean - res[i].drops / 1000000.0) / + (res_cnt - 1.0); + } + hits_stddev = sqrt(hits_stddev); + drops_stddev = sqrt(drops_stddev); + } + printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ", + hits_mean, hits_stddev, hits_mean / env.producer_cnt); + printf("drops %8.3lf \u00B1 %5.3lfM/s\n", + drops_mean, drops_stddev); +} + +const char *argp_program_version = "benchmark"; +const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; +const char argp_program_doc[] = +"benchmark Generic benchmarking framework.\n" +"\n" +"This tool runs benchmarks.\n" +"\n" +"USAGE: benchmark <bench-name>\n" +"\n" +"EXAMPLES:\n" +" # run 'count-local' benchmark with 1 producer and 1 consumer\n" +" benchmark count-local\n" +" # run 'count-local' with 16 producer and 8 consumer thread, pinned to CPUs\n" +" benchmark -p16 -c8 -a count-local\n"; + +enum { + ARG_PROD_AFFINITY_SET = 1000, + ARG_CONS_AFFINITY_SET = 1001, +}; + +static const struct argp_option opts[] = { + { "list", 'l', NULL, 0, "List available benchmarks"}, + { "duration", 'd', "SEC", 0, "Duration of benchmark, seconds"}, + { "warmup", 'w', "SEC", 0, "Warm-up period, seconds"}, + { "producers", 'p', "NUM", 0, "Number of producer threads"}, + { "consumers", 'c', "NUM", 0, "Number of consumer threads"}, + { "verbose", 'v', NULL, 0, "Verbose debug output"}, + { "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"}, + { "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0, + "Set of CPUs for producer threads; implies --affinity"}, + { "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0, + "Set of CPUs for consumer threads; implies --affinity"}, + {}, +}; + +extern struct argp bench_ringbufs_argp; + +static const struct argp_child bench_parsers[] = { + { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + static int pos_args; + + switch (key) { + case 'v': + env.verbose = true; + break; + case 'l': + env.list = true; + break; + case 'd': + env.duration_sec = strtol(arg, NULL, 10); + if (env.duration_sec <= 0) { + fprintf(stderr, "Invalid duration: %s\n", arg); + argp_usage(state); + } + break; + case 'w': + env.warmup_sec = strtol(arg, NULL, 10); + if (env.warmup_sec <= 0) { + fprintf(stderr, "Invalid warm-up duration: %s\n", arg); + argp_usage(state); + } + break; + case 'p': + env.producer_cnt = strtol(arg, NULL, 10); + if (env.producer_cnt <= 0) { + fprintf(stderr, "Invalid producer count: %s\n", arg); + argp_usage(state); + } + break; + case 'c': + env.consumer_cnt = strtol(arg, NULL, 10); + if (env.consumer_cnt <= 0) { + fprintf(stderr, "Invalid consumer count: %s\n", arg); + argp_usage(state); + } + break; + case 'a': + env.affinity = true; + break; + case ARG_PROD_AFFINITY_SET: + env.affinity = true; + if (parse_num_list(arg, &env.prod_cpus.cpus, + &env.prod_cpus.cpus_len)) { + fprintf(stderr, "Invalid format of CPU set for producers."); + argp_usage(state); + } + break; + case ARG_CONS_AFFINITY_SET: + env.affinity = true; + if (parse_num_list(arg, &env.cons_cpus.cpus, + &env.cons_cpus.cpus_len)) { + fprintf(stderr, "Invalid format of CPU set for consumers."); + argp_usage(state); + } + break; + case ARGP_KEY_ARG: + if (pos_args++) { + fprintf(stderr, + "Unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + env.bench_name = strdup(arg); + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void parse_cmdline_args(int argc, char **argv) +{ + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + .children = bench_parsers, + }; + if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) + exit(1); + if (!env.list && !env.bench_name) { + argp_help(&argp, stderr, ARGP_HELP_DOC, "bench"); + exit(1); + } +} + +static void collect_measurements(long delta_ns); + +static __u64 last_time_ns; +static void sigalarm_handler(int signo) +{ + long new_time_ns = get_time_ns(); + long delta_ns = new_time_ns - last_time_ns; + + collect_measurements(delta_ns); + + last_time_ns = new_time_ns; +} + +/* set up periodic 1-second timer */ +static void setup_timer() +{ + static struct sigaction sigalarm_action = { + .sa_handler = sigalarm_handler, + }; + struct itimerval timer_settings = {}; + int err; + + last_time_ns = get_time_ns(); + err = sigaction(SIGALRM, &sigalarm_action, NULL); + if (err < 0) { + fprintf(stderr, "failed to install SIGALRM handler: %d\n", -errno); + exit(1); + } + timer_settings.it_interval.tv_sec = 1; + timer_settings.it_value.tv_sec = 1; + err = setitimer(ITIMER_REAL, &timer_settings, NULL); + if (err < 0) { + fprintf(stderr, "failed to arm interval timer: %d\n", -errno); + exit(1); + } +} + +static void set_thread_affinity(pthread_t thread, int cpu) +{ + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) { + fprintf(stderr, "setting affinity to CPU #%d failed: %d\n", + cpu, errno); + exit(1); + } +} + +static int next_cpu(struct cpu_set *cpu_set) +{ + if (cpu_set->cpus) { + int i; + + /* find next available CPU */ + for (i = cpu_set->next_cpu; i < cpu_set->cpus_len; i++) { + if (cpu_set->cpus[i]) { + cpu_set->next_cpu = i + 1; + return i; + } + } + fprintf(stderr, "Not enough CPUs specified, need CPU #%d or higher.\n", i); + exit(1); + } + + return cpu_set->next_cpu++; +} + +static struct bench_state { + int res_cnt; + struct bench_res *results; + pthread_t *consumers; + pthread_t *producers; +} state; + +const struct bench *bench = NULL; + +extern const struct bench bench_count_global; +extern const struct bench bench_count_local; +extern const struct bench bench_rename_base; +extern const struct bench bench_rename_kprobe; +extern const struct bench bench_rename_kretprobe; +extern const struct bench bench_rename_rawtp; +extern const struct bench bench_rename_fentry; +extern const struct bench bench_rename_fexit; +extern const struct bench bench_rename_fmodret; +extern const struct bench bench_trig_base; +extern const struct bench bench_trig_tp; +extern const struct bench bench_trig_rawtp; +extern const struct bench bench_trig_kprobe; +extern const struct bench bench_trig_fentry; +extern const struct bench bench_trig_fmodret; +extern const struct bench bench_rb_libbpf; +extern const struct bench bench_rb_custom; +extern const struct bench bench_pb_libbpf; +extern const struct bench bench_pb_custom; + +static const struct bench *benchs[] = { + &bench_count_global, + &bench_count_local, + &bench_rename_base, + &bench_rename_kprobe, + &bench_rename_kretprobe, + &bench_rename_rawtp, + &bench_rename_fentry, + &bench_rename_fexit, + &bench_rename_fmodret, + &bench_trig_base, + &bench_trig_tp, + &bench_trig_rawtp, + &bench_trig_kprobe, + &bench_trig_fentry, + &bench_trig_fmodret, + &bench_rb_libbpf, + &bench_rb_custom, + &bench_pb_libbpf, + &bench_pb_custom, +}; + +static void setup_benchmark() +{ + int i, err; + + if (!env.bench_name) { + fprintf(stderr, "benchmark name is not specified\n"); + exit(1); + } + + for (i = 0; i < ARRAY_SIZE(benchs); i++) { + if (strcmp(benchs[i]->name, env.bench_name) == 0) { + bench = benchs[i]; + break; + } + } + if (!bench) { + fprintf(stderr, "benchmark '%s' not found\n", env.bench_name); + exit(1); + } + + printf("Setting up benchmark '%s'...\n", bench->name); + + state.producers = calloc(env.producer_cnt, sizeof(*state.producers)); + state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers)); + state.results = calloc(env.duration_sec + env.warmup_sec + 2, + sizeof(*state.results)); + if (!state.producers || !state.consumers || !state.results) + exit(1); + + if (bench->validate) + bench->validate(); + if (bench->setup) + bench->setup(); + + for (i = 0; i < env.consumer_cnt; i++) { + err = pthread_create(&state.consumers[i], NULL, + bench->consumer_thread, (void *)(long)i); + if (err) { + fprintf(stderr, "failed to create consumer thread #%d: %d\n", + i, -errno); + exit(1); + } + if (env.affinity) + set_thread_affinity(state.consumers[i], + next_cpu(&env.cons_cpus)); + } + + /* unless explicit producer CPU list is specified, continue after + * last consumer CPU + */ + if (!env.prod_cpus.cpus) + env.prod_cpus.next_cpu = env.cons_cpus.next_cpu; + + for (i = 0; i < env.producer_cnt; i++) { + err = pthread_create(&state.producers[i], NULL, + bench->producer_thread, (void *)(long)i); + if (err) { + fprintf(stderr, "failed to create producer thread #%d: %d\n", + i, -errno); + exit(1); + } + if (env.affinity) + set_thread_affinity(state.producers[i], + next_cpu(&env.prod_cpus)); + } + + printf("Benchmark '%s' started.\n", bench->name); +} + +static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t bench_done = PTHREAD_COND_INITIALIZER; + +static void collect_measurements(long delta_ns) { + int iter = state.res_cnt++; + struct bench_res *res = &state.results[iter]; + + bench->measure(res); + + if (bench->report_progress) + bench->report_progress(iter, res, delta_ns); + + if (iter == env.duration_sec + env.warmup_sec) { + pthread_mutex_lock(&bench_done_mtx); + pthread_cond_signal(&bench_done); + pthread_mutex_unlock(&bench_done_mtx); + } +} + +int main(int argc, char **argv) +{ + parse_cmdline_args(argc, argv); + + if (env.list) { + int i; + + printf("Available benchmarks:\n"); + for (i = 0; i < ARRAY_SIZE(benchs); i++) { + printf("- %s\n", benchs[i]->name); + } + return 0; + } + + setup_benchmark(); + + setup_timer(); + + pthread_mutex_lock(&bench_done_mtx); + pthread_cond_wait(&bench_done, &bench_done_mtx); + pthread_mutex_unlock(&bench_done_mtx); + + if (bench->report_final) + /* skip first sample */ + bench->report_final(state.results + env.warmup_sec, + state.res_cnt - env.warmup_sec); + + return 0; +} + diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h new file mode 100644 index 000000000000..c1f48a473b02 --- /dev/null +++ b/tools/testing/selftests/bpf/bench.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#pragma once +#include <stdlib.h> +#include <stdbool.h> +#include <linux/err.h> +#include <errno.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <math.h> +#include <time.h> +#include <sys/syscall.h> + +struct cpu_set { + bool *cpus; + int cpus_len; + int next_cpu; +}; + +struct env { + char *bench_name; + int duration_sec; + int warmup_sec; + bool verbose; + bool list; + bool affinity; + int consumer_cnt; + int producer_cnt; + struct cpu_set prod_cpus; + struct cpu_set cons_cpus; +}; + +struct bench_res { + long hits; + long drops; +}; + +struct bench { + const char *name; + void (*validate)(); + void (*setup)(); + void *(*producer_thread)(void *ctx); + void *(*consumer_thread)(void *ctx); + void (*measure)(struct bench_res* res); + void (*report_progress)(int iter, struct bench_res* res, long delta_ns); + void (*report_final)(struct bench_res res[], int res_cnt); +}; + +struct counter { + long value; +} __attribute__((aligned(128))); + +extern struct env env; +extern const struct bench *bench; + +void setup_libbpf(); +void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); +void hits_drops_report_final(struct bench_res res[], int res_cnt); + +static inline __u64 get_time_ns() { + struct timespec t; + + clock_gettime(CLOCK_MONOTONIC, &t); + + return (u64)t.tv_sec * 1000000000 + t.tv_nsec; +} + +static inline void atomic_inc(long *value) +{ + (void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED); +} + +static inline void atomic_add(long *value, long n) +{ + (void)__atomic_add_fetch(value, n, __ATOMIC_RELAXED); +} + +static inline long atomic_swap(long *value, long n) +{ + return __atomic_exchange_n(value, n, __ATOMIC_RELAXED); +} diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c new file mode 100644 index 000000000000..befba7a82643 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_count.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bench.h" + +/* COUNT-GLOBAL benchmark */ + +static struct count_global_ctx { + struct counter hits; +} count_global_ctx; + +static void *count_global_producer(void *input) +{ + struct count_global_ctx *ctx = &count_global_ctx; + + while (true) { + atomic_inc(&ctx->hits.value); + } + return NULL; +} + +static void *count_global_consumer(void *input) +{ + return NULL; +} + +static void count_global_measure(struct bench_res *res) +{ + struct count_global_ctx *ctx = &count_global_ctx; + + res->hits = atomic_swap(&ctx->hits.value, 0); +} + +/* COUNT-local benchmark */ + +static struct count_local_ctx { + struct counter *hits; +} count_local_ctx; + +static void count_local_setup() +{ + struct count_local_ctx *ctx = &count_local_ctx; + + ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits)); + if (!ctx->hits) + exit(1); +} + +static void *count_local_producer(void *input) +{ + struct count_local_ctx *ctx = &count_local_ctx; + int idx = (long)input; + + while (true) { + atomic_inc(&ctx->hits[idx].value); + } + return NULL; +} + +static void *count_local_consumer(void *input) +{ + return NULL; +} + +static void count_local_measure(struct bench_res *res) +{ + struct count_local_ctx *ctx = &count_local_ctx; + int i; + + for (i = 0; i < env.producer_cnt; i++) { + res->hits += atomic_swap(&ctx->hits[i].value, 0); + } +} + +const struct bench bench_count_global = { + .name = "count-global", + .producer_thread = count_global_producer, + .consumer_thread = count_global_consumer, + .measure = count_global_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_count_local = { + .name = "count-local", + .setup = count_local_setup, + .producer_thread = count_local_producer, + .consumer_thread = count_local_consumer, + .measure = count_local_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c new file mode 100644 index 000000000000..e74cff40f4fe --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <fcntl.h> +#include "bench.h" +#include "test_overhead.skel.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct test_overhead *skel; + struct counter hits; + int fd; +} ctx; + +static void validate() +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + char buf[] = "test_overhead"; + int err; + + while (true) { + err = write(ctx.fd, buf, sizeof(buf)); + if (err < 0) { + fprintf(stderr, "write failed\n"); + exit(1); + } + atomic_inc(&ctx.hits.value); + } +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.hits.value, 0); +} + +static void setup_ctx() +{ + setup_libbpf(); + + ctx.skel = test_overhead__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (ctx.fd < 0) { + fprintf(stderr, "failed to open /proc/self/comm: %d\n", -errno); + exit(1); + } +} + +static void attach_bpf(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (IS_ERR(link)) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void setup_base() +{ + setup_ctx(); +} + +static void setup_kprobe() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog1); +} + +static void setup_kretprobe() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog2); +} + +static void setup_rawtp() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog3); +} + +static void setup_fentry() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog4); +} + +static void setup_fexit() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog5); +} + +static void setup_fmodret() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog6); +} + +static void *consumer(void *input) +{ + return NULL; +} + +const struct bench bench_rename_base = { + .name = "rename-base", + .validate = validate, + .setup = setup_base, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_kprobe = { + .name = "rename-kprobe", + .validate = validate, + .setup = setup_kprobe, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_kretprobe = { + .name = "rename-kretprobe", + .validate = validate, + .setup = setup_kretprobe, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_rawtp = { + .name = "rename-rawtp", + .validate = validate, + .setup = setup_rawtp, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_fentry = { + .name = "rename-fentry", + .validate = validate, + .setup = setup_fentry, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_fexit = { + .name = "rename-fexit", + .validate = validate, + .setup = setup_fexit, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_fmodret = { + .name = "rename-fmodret", + .validate = validate, + .setup = setup_fmodret, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c new file mode 100644 index 000000000000..da87c7f31891 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <asm/barrier.h> +#include <linux/perf_event.h> +#include <linux/ring_buffer.h> +#include <sys/epoll.h> +#include <sys/mman.h> +#include <argp.h> +#include <stdlib.h> +#include "bench.h" +#include "ringbuf_bench.skel.h" +#include "perfbuf_bench.skel.h" + +static struct { + bool back2back; + int batch_cnt; + bool sampled; + int sample_rate; + int ringbuf_sz; /* per-ringbuf, in bytes */ + bool ringbuf_use_output; /* use slower output API */ + int perfbuf_sz; /* per-CPU size, in pages */ +} args = { + .back2back = false, + .batch_cnt = 500, + .sampled = false, + .sample_rate = 500, + .ringbuf_sz = 512 * 1024, + .ringbuf_use_output = false, + .perfbuf_sz = 128, +}; + +enum { + ARG_RB_BACK2BACK = 2000, + ARG_RB_USE_OUTPUT = 2001, + ARG_RB_BATCH_CNT = 2002, + ARG_RB_SAMPLED = 2003, + ARG_RB_SAMPLE_RATE = 2004, +}; + +static const struct argp_option opts[] = { + { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"}, + { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"}, + { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"}, + { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"}, + { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_RB_BACK2BACK: + args.back2back = true; + break; + case ARG_RB_USE_OUTPUT: + args.ringbuf_use_output = true; + break; + case ARG_RB_BATCH_CNT: + args.batch_cnt = strtol(arg, NULL, 10); + if (args.batch_cnt < 0) { + fprintf(stderr, "Invalid batch count."); + argp_usage(state); + } + break; + case ARG_RB_SAMPLED: + args.sampled = true; + break; + case ARG_RB_SAMPLE_RATE: + args.sample_rate = strtol(arg, NULL, 10); + if (args.sample_rate < 0) { + fprintf(stderr, "Invalid perfbuf sample rate."); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_ringbufs_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* RINGBUF-LIBBPF benchmark */ + +static struct counter buf_hits; + +static inline void bufs_trigger_batch() +{ + (void)syscall(__NR_getpgid); +} + +static void bufs_validate() +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.back2back && env.producer_cnt > 1) { + fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n"); + exit(1); + } +} + +static void *bufs_sample_producer(void *input) +{ + if (args.back2back) { + /* initial batch to get everything started */ + bufs_trigger_batch(); + return NULL; + } + + while (true) + bufs_trigger_batch(); + return NULL; +} + +static struct ringbuf_libbpf_ctx { + struct ringbuf_bench *skel; + struct ring_buffer *ringbuf; +} ringbuf_libbpf_ctx; + +static void ringbuf_libbpf_measure(struct bench_res *res) +{ + struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; + + res->hits = atomic_swap(&buf_hits.value, 0); + res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); +} + +static struct ringbuf_bench *ringbuf_setup_skeleton() +{ + struct ringbuf_bench *skel; + + setup_libbpf(); + + skel = ringbuf_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->batch_cnt = args.batch_cnt; + skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0; + + if (args.sampled) + /* record data + header take 16 bytes */ + skel->rodata->wakeup_data_size = args.sample_rate * 16; + + bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz); + + if (ringbuf_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static int buf_process_sample(void *ctx, void *data, size_t len) +{ + atomic_inc(&buf_hits.value); + return 0; +} + +static void ringbuf_libbpf_setup() +{ + struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; + struct bpf_link *link; + + ctx->skel = ringbuf_setup_skeleton(); + ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf), + buf_process_sample, NULL, NULL); + if (!ctx->ringbuf) { + fprintf(stderr, "failed to create ringbuf\n"); + exit(1); + } + + link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); + if (IS_ERR(link)) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void *ringbuf_libbpf_consumer(void *input) +{ + struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; + + while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) { + if (args.back2back) + bufs_trigger_batch(); + } + fprintf(stderr, "ringbuf polling failed!\n"); + return NULL; +} + +/* RINGBUF-CUSTOM benchmark */ +struct ringbuf_custom { + __u64 *consumer_pos; + __u64 *producer_pos; + __u64 mask; + void *data; + int map_fd; +}; + +static struct ringbuf_custom_ctx { + struct ringbuf_bench *skel; + struct ringbuf_custom ringbuf; + int epoll_fd; + struct epoll_event event; +} ringbuf_custom_ctx; + +static void ringbuf_custom_measure(struct bench_res *res) +{ + struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; + + res->hits = atomic_swap(&buf_hits.value, 0); + res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); +} + +static void ringbuf_custom_setup() +{ + struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; + const size_t page_size = getpagesize(); + struct bpf_link *link; + struct ringbuf_custom *r; + void *tmp; + int err; + + ctx->skel = ringbuf_setup_skeleton(); + + ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (ctx->epoll_fd < 0) { + fprintf(stderr, "failed to create epoll fd: %d\n", -errno); + exit(1); + } + + r = &ctx->ringbuf; + r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf); + r->mask = args.ringbuf_sz - 1; + + /* Map writable consumer page */ + tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, + r->map_fd, 0); + if (tmp == MAP_FAILED) { + fprintf(stderr, "failed to mmap consumer page: %d\n", -errno); + exit(1); + } + r->consumer_pos = tmp; + + /* Map read-only producer page and data pages. */ + tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED, + r->map_fd, page_size); + if (tmp == MAP_FAILED) { + fprintf(stderr, "failed to mmap data pages: %d\n", -errno); + exit(1); + } + r->producer_pos = tmp; + r->data = tmp + page_size; + + ctx->event.events = EPOLLIN; + err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event); + if (err < 0) { + fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno); + exit(1); + } + + link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); + if (IS_ERR(link)) { + fprintf(stderr, "failed to attach program\n"); + exit(1); + } +} + +#define RINGBUF_BUSY_BIT (1 << 31) +#define RINGBUF_DISCARD_BIT (1 << 30) +#define RINGBUF_META_LEN 8 + +static inline int roundup_len(__u32 len) +{ + /* clear out top 2 bits */ + len <<= 2; + len >>= 2; + /* add length prefix */ + len += RINGBUF_META_LEN; + /* round up to 8 byte alignment */ + return (len + 7) / 8 * 8; +} + +static void ringbuf_custom_process_ring(struct ringbuf_custom *r) +{ + unsigned long cons_pos, prod_pos; + int *len_ptr, len; + bool got_new_data; + + cons_pos = smp_load_acquire(r->consumer_pos); + while (true) { + got_new_data = false; + prod_pos = smp_load_acquire(r->producer_pos); + while (cons_pos < prod_pos) { + len_ptr = r->data + (cons_pos & r->mask); + len = smp_load_acquire(len_ptr); + + /* sample not committed yet, bail out for now */ + if (len & RINGBUF_BUSY_BIT) + return; + + got_new_data = true; + cons_pos += roundup_len(len); + + atomic_inc(&buf_hits.value); + } + if (got_new_data) + smp_store_release(r->consumer_pos, cons_pos); + else + break; + }; +} + +static void *ringbuf_custom_consumer(void *input) +{ + struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; + int cnt; + + do { + if (args.back2back) + bufs_trigger_batch(); + cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1); + if (cnt > 0) + ringbuf_custom_process_ring(&ctx->ringbuf); + } while (cnt >= 0); + fprintf(stderr, "ringbuf polling failed!\n"); + return 0; +} + +/* PERFBUF-LIBBPF benchmark */ +static struct perfbuf_libbpf_ctx { + struct perfbuf_bench *skel; + struct perf_buffer *perfbuf; +} perfbuf_libbpf_ctx; + +static void perfbuf_measure(struct bench_res *res) +{ + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + + res->hits = atomic_swap(&buf_hits.value, 0); + res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); +} + +static struct perfbuf_bench *perfbuf_setup_skeleton() +{ + struct perfbuf_bench *skel; + + setup_libbpf(); + + skel = perfbuf_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->batch_cnt = args.batch_cnt; + + if (perfbuf_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static enum bpf_perf_event_ret +perfbuf_process_sample_raw(void *input_ctx, int cpu, + struct perf_event_header *e) +{ + switch (e->type) { + case PERF_RECORD_SAMPLE: + atomic_inc(&buf_hits.value); + break; + case PERF_RECORD_LOST: + break; + default: + return LIBBPF_PERF_EVENT_ERROR; + } + return LIBBPF_PERF_EVENT_CONT; +} + +static void perfbuf_libbpf_setup() +{ + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + struct perf_event_attr attr; + struct perf_buffer_raw_opts pb_opts = { + .event_cb = perfbuf_process_sample_raw, + .ctx = (void *)(long)0, + .attr = &attr, + }; + struct bpf_link *link; + + ctx->skel = perfbuf_setup_skeleton(); + + memset(&attr, 0, sizeof(attr)); + attr.config = PERF_COUNT_SW_BPF_OUTPUT, + attr.type = PERF_TYPE_SOFTWARE; + attr.sample_type = PERF_SAMPLE_RAW; + /* notify only every Nth sample */ + if (args.sampled) { + attr.sample_period = args.sample_rate; + attr.wakeup_events = args.sample_rate; + } else { + attr.sample_period = 1; + attr.wakeup_events = 1; + } + + if (args.sample_rate > args.batch_cnt) { + fprintf(stderr, "sample rate %d is too high for given batch count %d\n", + args.sample_rate, args.batch_cnt); + exit(1); + } + + ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf), + args.perfbuf_sz, &pb_opts); + if (!ctx->perfbuf) { + fprintf(stderr, "failed to create perfbuf\n"); + exit(1); + } + + link = bpf_program__attach(ctx->skel->progs.bench_perfbuf); + if (IS_ERR(link)) { + fprintf(stderr, "failed to attach program\n"); + exit(1); + } +} + +static void *perfbuf_libbpf_consumer(void *input) +{ + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + + while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) { + if (args.back2back) + bufs_trigger_batch(); + } + fprintf(stderr, "perfbuf polling failed!\n"); + return NULL; +} + +/* PERFBUF-CUSTOM benchmark */ + +/* copies of internal libbpf definitions */ +struct perf_cpu_buf { + struct perf_buffer *pb; + void *base; /* mmap()'ed memory */ + void *buf; /* for reconstructing segmented data */ + size_t buf_size; + int fd; + int cpu; + int map_key; +}; + +struct perf_buffer { + perf_buffer_event_fn event_cb; + perf_buffer_sample_fn sample_cb; + perf_buffer_lost_fn lost_cb; + void *ctx; /* passed into callbacks */ + + size_t page_size; + size_t mmap_size; + struct perf_cpu_buf **cpu_bufs; + struct epoll_event *events; + int cpu_cnt; /* number of allocated CPU buffers */ + int epoll_fd; /* perf event FD */ + int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ +}; + +static void *perfbuf_custom_consumer(void *input) +{ + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + struct perf_buffer *pb = ctx->perfbuf; + struct perf_cpu_buf *cpu_buf; + struct perf_event_mmap_page *header; + size_t mmap_mask = pb->mmap_size - 1; + struct perf_event_header *ehdr; + __u64 data_head, data_tail; + size_t ehdr_size; + void *base; + int i, cnt; + + while (true) { + if (args.back2back) + bufs_trigger_batch(); + cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1); + if (cnt <= 0) { + fprintf(stderr, "perf epoll failed: %d\n", -errno); + exit(1); + } + + for (i = 0; i < cnt; ++i) { + cpu_buf = pb->events[i].data.ptr; + header = cpu_buf->base; + base = ((void *)header) + pb->page_size; + + data_head = ring_buffer_read_head(header); + data_tail = header->data_tail; + while (data_head != data_tail) { + ehdr = base + (data_tail & mmap_mask); + ehdr_size = ehdr->size; + + if (ehdr->type == PERF_RECORD_SAMPLE) + atomic_inc(&buf_hits.value); + + data_tail += ehdr_size; + } + ring_buffer_write_tail(header, data_tail); + } + } + return NULL; +} + +const struct bench bench_rb_libbpf = { + .name = "rb-libbpf", + .validate = bufs_validate, + .setup = ringbuf_libbpf_setup, + .producer_thread = bufs_sample_producer, + .consumer_thread = ringbuf_libbpf_consumer, + .measure = ringbuf_libbpf_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rb_custom = { + .name = "rb-custom", + .validate = bufs_validate, + .setup = ringbuf_custom_setup, + .producer_thread = bufs_sample_producer, + .consumer_thread = ringbuf_custom_consumer, + .measure = ringbuf_custom_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_pb_libbpf = { + .name = "pb-libbpf", + .validate = bufs_validate, + .setup = perfbuf_libbpf_setup, + .producer_thread = bufs_sample_producer, + .consumer_thread = perfbuf_libbpf_consumer, + .measure = perfbuf_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_pb_custom = { + .name = "pb-custom", + .validate = bufs_validate, + .setup = perfbuf_libbpf_setup, + .producer_thread = bufs_sample_producer, + .consumer_thread = perfbuf_custom_consumer, + .measure = perfbuf_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c new file mode 100644 index 000000000000..49c22832f216 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bench.h" +#include "trigger_bench.skel.h" + +/* BPF triggering benchmarks */ +static struct trigger_ctx { + struct trigger_bench *skel; +} ctx; + +static struct counter base_hits; + +static void trigger_validate() +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void *trigger_base_producer(void *input) +{ + while (true) { + (void)syscall(__NR_getpgid); + atomic_inc(&base_hits.value); + } + return NULL; +} + +static void trigger_base_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&base_hits.value, 0); +} + +static void *trigger_producer(void *input) +{ + while (true) + (void)syscall(__NR_getpgid); + return NULL; +} + +static void trigger_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +static void setup_ctx() +{ + setup_libbpf(); + + ctx.skel = trigger_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } +} + +static void attach_bpf(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (IS_ERR(link)) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void trigger_tp_setup() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_tp); +} + +static void trigger_rawtp_setup() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_raw_tp); +} + +static void trigger_kprobe_setup() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kprobe); +} + +static void trigger_fentry_setup() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry); +} + +static void trigger_fmodret_setup() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fmodret); +} + +static void *trigger_consumer(void *input) +{ + return NULL; +} + +const struct bench bench_trig_base = { + .name = "trig-base", + .validate = trigger_validate, + .producer_thread = trigger_base_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_base_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_tp = { + .name = "trig-tp", + .validate = trigger_validate, + .setup = trigger_tp_setup, + .producer_thread = trigger_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_rawtp = { + .name = "trig-rawtp", + .validate = trigger_validate, + .setup = trigger_rawtp_setup, + .producer_thread = trigger_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_kprobe = { + .name = "trig-kprobe", + .validate = trigger_validate, + .setup = trigger_kprobe_setup, + .producer_thread = trigger_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_fentry = { + .name = "trig-fentry", + .validate = trigger_validate, + .setup = trigger_fentry_setup, + .producer_thread = trigger_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_fmodret = { + .name = "trig-fmodret", + .validate = trigger_validate, + .setup = trigger_fmodret_setup, + .producer_thread = trigger_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh new file mode 100755 index 000000000000..16f774b1cdbe --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -eufo pipefail + +for i in base kprobe kretprobe rawtp fentry fexit fmodret +do + summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) + printf "%-10s: %s\n" $i "$summary" +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh new file mode 100755 index 000000000000..af4aa04caba6 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +set -eufo pipefail + +RUN_BENCH="sudo ./bench -w3 -d10 -a" + +function hits() +{ + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function drops() +{ + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function header() +{ + local len=${#1} + + printf "\n%s\n" "$1" + for i in $(seq 1 $len); do printf '='; done + printf '\n' +} + +function summarize() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" +} + +header "Single-producer, parallel producer" +for b in rb-libbpf rb-custom pb-libbpf pb-custom; do + summarize $b "$($RUN_BENCH $b)" +done + +header "Single-producer, parallel producer, sampled notification" +for b in rb-libbpf rb-custom pb-libbpf pb-custom; do + summarize $b "$($RUN_BENCH --rb-sampled $b)" +done + +header "Single-producer, back-to-back mode" +for b in rb-libbpf rb-custom pb-libbpf pb-custom; do + summarize $b "$($RUN_BENCH --rb-b2b $b)" + summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)" +done + +header "Ringbuf back-to-back, effect of sample rate" +for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do + summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" +done +header "Perfbuf back-to-back, effect of sample rate" +for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do + summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" +done + +header "Ringbuf back-to-back, reserve+commit vs output" +summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)" +summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)" + +header "Ringbuf sampled, reserve+commit vs output" +summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)" +summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)" + +header "Single-producer, consumer/producer competing on the same CPU, low batch count" +for b in rb-libbpf rb-custom pb-libbpf pb-custom; do + summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" +done + +header "Ringbuf, multi-producer contention" +for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do + summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" +done + diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh new file mode 100755 index 000000000000..78e83f243294 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -eufo pipefail + +for i in base tp rawtp kprobe fentry fmodret +do + summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) + printf "%-10s: %s\n" $i "$summary" +done diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 60e3ae5d4e48..2118e23ac07a 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -25,6 +25,7 @@ CONFIG_XDP_SOCKETS=y CONFIG_FTRACE_SYSCALLS=y CONFIG_IPV6_TUNNEL=y CONFIG_IPV6_GRE=y +CONFIG_IPV6_SEG6_BPF=y CONFIG_NET_FOU=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_IPV6_FOU=m @@ -37,3 +38,4 @@ CONFIG_IPV6_SIT=m CONFIG_BPF_JIT=y CONFIG_BPF_LSM=y CONFIG_SECURITY=y +CONFIG_LIRC=y diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c new file mode 100644 index 000000000000..e36dd1a1780d --- /dev/null +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> + +#include <sys/epoll.h> + +#include <linux/err.h> +#include <linux/in.h> +#include <linux/in6.h> + +#include "bpf_util.h" +#include "network_helpers.h" + +#define clean_errno() (errno == 0 ? "None" : strerror(errno)) +#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) + +struct ipv4_packet pkt_v4 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_TCP, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, + .tcp.doff = 5, +}; + +struct ipv6_packet pkt_v6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, + .tcp.doff = 5, +}; + +int start_server_with_port(int family, int type, __u16 port) +{ + struct sockaddr_storage addr = {}; + socklen_t len; + int fd; + + if (family == AF_INET) { + struct sockaddr_in *sin = (void *)&addr; + + sin->sin_family = AF_INET; + sin->sin_port = htons(port); + len = sizeof(*sin); + } else { + struct sockaddr_in6 *sin6 = (void *)&addr; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + len = sizeof(*sin6); + } + + fd = socket(family, type | SOCK_NONBLOCK, 0); + if (fd < 0) { + log_err("Failed to create server socket"); + return -1; + } + + if (bind(fd, (const struct sockaddr *)&addr, len) < 0) { + log_err("Failed to bind socket"); + close(fd); + return -1; + } + + if (type == SOCK_STREAM) { + if (listen(fd, 1) < 0) { + log_err("Failed to listed on socket"); + close(fd); + return -1; + } + } + + return fd; +} + +int start_server(int family, int type) +{ + return start_server_with_port(family, type, 0); +} + +static const struct timeval timeo_sec = { .tv_sec = 3 }; +static const size_t timeo_optlen = sizeof(timeo_sec); + +int connect_to_fd(int family, int type, int server_fd) +{ + int fd, save_errno; + + fd = socket(family, type, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) { + save_errno = errno; + close(fd); + errno = save_errno; + return -1; + } + + return fd; +} + +int connect_fd_to_fd(int client_fd, int server_fd) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int save_errno; + + if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, + timeo_optlen)) { + log_err("Failed to set SO_RCVTIMEO"); + return -1; + } + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + return -1; + } + + if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) { + if (errno != EINPROGRESS) { + save_errno = errno; + log_err("Failed to connect to server"); + errno = save_errno; + } + return -1; + } + + return 0; +} + +int connect_wait(int fd) +{ + struct epoll_event ev = {}, events[2]; + int timeout_ms = 1000; + int efd, nfd; + + efd = epoll_create1(EPOLL_CLOEXEC); + if (efd < 0) { + log_err("Failed to open epoll fd"); + return -1; + } + + ev.events = EPOLLRDHUP | EPOLLOUT; + ev.data.fd = fd; + + if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) { + log_err("Failed to register fd=%d on epoll fd=%d", fd, efd); + close(efd); + return -1; + } + + nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms); + if (nfd < 0) + log_err("Failed to wait for I/O event on epoll fd=%d", efd); + + close(efd); + return nfd; +} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h new file mode 100644 index 000000000000..6a8009605670 --- /dev/null +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NETWORK_HELPERS_H +#define __NETWORK_HELPERS_H +#include <sys/socket.h> +#include <sys/types.h> +#include <linux/types.h> +typedef __u16 __sum16; +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <netinet/tcp.h> +#include <bpf/bpf_endian.h> + +#define MAGIC_VAL 0x1234 +#define NUM_ITER 100000 +#define VIP_NUM 5 +#define MAGIC_BYTES 123 + +/* ipv4 test vector */ +struct ipv4_packet { + struct ethhdr eth; + struct iphdr iph; + struct tcphdr tcp; +} __packed; +extern struct ipv4_packet pkt_v4; + +/* ipv6 test vector */ +struct ipv6_packet { + struct ethhdr eth; + struct ipv6hdr iph; + struct tcphdr tcp; +} __packed; +extern struct ipv6_packet pkt_v6; + +int start_server(int family, int type); +int start_server_with_port(int family, int type, __u16 port); +int connect_to_fd(int family, int type, int server_fd); +int connect_fd_to_fd(int client_fd, int server_fd); +int connect_wait(int client_fd); + +#endif diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 0262f7b374f9..c548aded6585 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -1,24 +1,5 @@ -#include <asm/types.h> -#include <linux/types.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <stddef.h> -#include <stdbool.h> - -#include <linux/unistd.h> -#include <linux/filter.h> -#include <linux/bpf_perf_event.h> -#include <linux/bpf.h> - -#include <bpf/bpf.h> - -#include "../../../include/linux/filter.h" -#include "bpf_rlimit.h" -#include "bpf_util.h" +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> #define MAX_INSNS 512 #define MAX_MATCHES 16 @@ -359,15 +340,15 @@ static struct bpf_align_test tests[] = { * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"}, - {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"}, + {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"}, }, }, { @@ -410,15 +391,15 @@ static struct bpf_align_test tests[] = { /* Adding 14 makes R6 be (4n+2) */ {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ - {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, - {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, + {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ @@ -426,15 +407,15 @@ static struct bpf_align_test tests[] = { /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, - {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, + {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, }, }, { @@ -469,16 +450,16 @@ static struct bpf_align_test tests[] = { .matches = { {4, "R5_w=pkt_end(id=0,off=0,imm=0)"}, /* (ptr - ptr) << 2 == unknown, (4n) */ - {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, + {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ - {7, "R5_w=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"}, + {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ - {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, - {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -486,7 +467,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, } }, { @@ -528,7 +509,7 @@ static struct bpf_align_test tests[] = { /* New unknown value in R7 is (4n) */ {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ - {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"}, + {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>= 0 */ {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, @@ -537,7 +518,8 @@ static struct bpf_align_test tests[] = { * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, + }, }, { @@ -579,18 +561,18 @@ static struct bpf_align_test tests[] = { /* Adding 14 makes R6 be (4n+2) */ {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, + {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, }, }, }; @@ -669,51 +651,16 @@ static int do_test_single(struct bpf_align_test *test) return ret; } -static int do_test(unsigned int from, unsigned int to) +void test_align(void) { - int all_pass = 0; - int all_fail = 0; unsigned int i; - for (i = from; i < to; i++) { + for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_align_test *test = &tests[i]; - int fail; - printf("Test %3d: %s ... ", - i, test->descr); - fail = do_test_single(test); - if (fail) { - all_fail++; - printf("FAIL\n"); - } else { - all_pass++; - printf("PASS\n"); - } - } - printf("Results: %d pass %d fail\n", - all_pass, all_fail); - return all_fail ? EXIT_FAILURE : EXIT_SUCCESS; -} + if (!test__start_subtest(test->descr)) + continue; -int main(int argc, char **argv) -{ - unsigned int from = 0, to = ARRAY_SIZE(tests); - - if (argc == 3) { - unsigned int l = atoi(argv[argc - 2]); - unsigned int u = atoi(argv[argc - 1]); - - if (l < to && u < to) { - from = l; - to = u + 1; - } - } else if (argc == 2) { - unsigned int t = atoi(argv[argc - 1]); - - if (t < to) { - from = t; - to = t + 1; - } + CHECK_FAIL(do_test_single(test)); } - return do_test(from, to); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c new file mode 100644 index 000000000000..87c29dde1cf9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include "bpf_iter_ipv6_route.skel.h" +#include "bpf_iter_netlink.skel.h" +#include "bpf_iter_bpf_map.skel.h" +#include "bpf_iter_task.skel.h" +#include "bpf_iter_task_file.skel.h" +#include "bpf_iter_test_kern1.skel.h" +#include "bpf_iter_test_kern2.skel.h" +#include "bpf_iter_test_kern3.skel.h" +#include "bpf_iter_test_kern4.skel.h" + +static int duration; + +static void test_btf_id_or_null(void) +{ + struct bpf_iter_test_kern3 *skel; + + skel = bpf_iter_test_kern3__open_and_load(); + if (CHECK(skel, "bpf_iter_test_kern3__open_and_load", + "skeleton open_and_load unexpectedly succeeded\n")) { + bpf_iter_test_kern3__destroy(skel); + return; + } +} + +static void do_dummy_read(struct bpf_program *prog) +{ + struct bpf_link *link; + char buf[16] = {}; + int iter_fd, len; + + link = bpf_program__attach_iter(prog, NULL); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* not check contents, but ensure read() ends without error */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)); + + close(iter_fd); + +free_link: + bpf_link__destroy(link); +} + +static void test_ipv6_route(void) +{ + struct bpf_iter_ipv6_route *skel; + + skel = bpf_iter_ipv6_route__open_and_load(); + if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_ipv6_route); + + bpf_iter_ipv6_route__destroy(skel); +} + +static void test_netlink(void) +{ + struct bpf_iter_netlink *skel; + + skel = bpf_iter_netlink__open_and_load(); + if (CHECK(!skel, "bpf_iter_netlink__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_netlink); + + bpf_iter_netlink__destroy(skel); +} + +static void test_bpf_map(void) +{ + struct bpf_iter_bpf_map *skel; + + skel = bpf_iter_bpf_map__open_and_load(); + if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_bpf_map); + + bpf_iter_bpf_map__destroy(skel); +} + +static void test_task(void) +{ + struct bpf_iter_task *skel; + + skel = bpf_iter_task__open_and_load(); + if (CHECK(!skel, "bpf_iter_task__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_task); + + bpf_iter_task__destroy(skel); +} + +static void test_task_file(void) +{ + struct bpf_iter_task_file *skel; + + skel = bpf_iter_task_file__open_and_load(); + if (CHECK(!skel, "bpf_iter_task_file__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_task_file); + + bpf_iter_task_file__destroy(skel); +} + +/* The expected string is less than 16 bytes */ +static int do_read_with_fd(int iter_fd, const char *expected, + bool read_one_char) +{ + int err = -1, len, read_buf_len, start; + char buf[16] = {}; + + read_buf_len = read_one_char ? 1 : 16; + start = 0; + while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) { + start += len; + if (CHECK(start >= 16, "read", "read len %d\n", len)) + return -1; + read_buf_len = read_one_char ? 1 : 16 - start; + } + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + return -1; + + err = strcmp(buf, expected); + if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n", + buf, expected)) + return -1; + + return 0; +} + +static void test_anon_iter(bool read_one_char) +{ + struct bpf_iter_test_kern1 *skel; + struct bpf_link *link; + int iter_fd, err; + + skel = bpf_iter_test_kern1__open_and_load(); + if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load", + "skeleton open_and_load failed\n")) + return; + + err = bpf_iter_test_kern1__attach(skel); + if (CHECK(err, "bpf_iter_test_kern1__attach", + "skeleton attach failed\n")) { + goto out; + } + + link = skel->links.dump_task; + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto out; + + do_read_with_fd(iter_fd, "abcd", read_one_char); + close(iter_fd); + +out: + bpf_iter_test_kern1__destroy(skel); +} + +static int do_read(const char *path, const char *expected) +{ + int err, iter_fd; + + iter_fd = open(path, O_RDONLY); + if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n", + path, strerror(errno))) + return -1; + + err = do_read_with_fd(iter_fd, expected, false); + close(iter_fd); + return err; +} + +static void test_file_iter(void) +{ + const char *path = "/sys/fs/bpf/bpf_iter_test1"; + struct bpf_iter_test_kern1 *skel1; + struct bpf_iter_test_kern2 *skel2; + struct bpf_link *link; + int err; + + skel1 = bpf_iter_test_kern1__open_and_load(); + if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load", + "skeleton open_and_load failed\n")) + return; + + link = bpf_program__attach_iter(skel1->progs.dump_task, NULL); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + /* unlink this path if it exists. */ + unlink(path); + + err = bpf_link__pin(link, path); + if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err)) + goto free_link; + + err = do_read(path, "abcd"); + if (err) + goto unlink_path; + + /* file based iterator seems working fine. Let us a link update + * of the underlying link and `cat` the iterator again, its content + * should change. + */ + skel2 = bpf_iter_test_kern2__open_and_load(); + if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load", + "skeleton open_and_load failed\n")) + goto unlink_path; + + err = bpf_link__update_program(link, skel2->progs.dump_task); + if (CHECK(err, "update_prog", "update_prog failed\n")) + goto destroy_skel2; + + do_read(path, "ABCD"); + +destroy_skel2: + bpf_iter_test_kern2__destroy(skel2); +unlink_path: + unlink(path); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_test_kern1__destroy(skel1); +} + +static void test_overflow(bool test_e2big_overflow, bool ret1) +{ + __u32 map_info_len, total_read_len, expected_read_len; + int err, iter_fd, map1_fd, map2_fd, len; + struct bpf_map_info map_info = {}; + struct bpf_iter_test_kern4 *skel; + struct bpf_link *link; + __u32 page_size; + char *buf; + + skel = bpf_iter_test_kern4__open(); + if (CHECK(!skel, "bpf_iter_test_kern4__open", + "skeleton open failed\n")) + return; + + /* create two maps: bpf program will only do bpf_seq_write + * for these two maps. The goal is one map output almost + * fills seq_file buffer and then the other will trigger + * overflow and needs restart. + */ + map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + if (CHECK(map1_fd < 0, "bpf_create_map", + "map_creation failed: %s\n", strerror(errno))) + goto out; + map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + if (CHECK(map2_fd < 0, "bpf_create_map", + "map_creation failed: %s\n", strerror(errno))) + goto free_map1; + + /* bpf_seq_printf kernel buffer is one page, so one map + * bpf_seq_write will mostly fill it, and the other map + * will partially fill and then trigger overflow and need + * bpf_seq_read restart. + */ + page_size = sysconf(_SC_PAGE_SIZE); + + if (test_e2big_overflow) { + skel->rodata->print_len = (page_size + 8) / 8; + expected_read_len = 2 * (page_size + 8); + } else if (!ret1) { + skel->rodata->print_len = (page_size - 8) / 8; + expected_read_len = 2 * (page_size - 8); + } else { + skel->rodata->print_len = 1; + expected_read_len = 2 * 8; + } + skel->rodata->ret1 = ret1; + + if (CHECK(bpf_iter_test_kern4__load(skel), + "bpf_iter_test_kern4__load", "skeleton load failed\n")) + goto free_map2; + + /* setup filtering map_id in bpf program */ + map_info_len = sizeof(map_info); + err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len); + if (CHECK(err, "get_map_info", "get map info failed: %s\n", + strerror(errno))) + goto free_map2; + skel->bss->map1_id = map_info.id; + + err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len); + if (CHECK(err, "get_map_info", "get map info failed: %s\n", + strerror(errno))) + goto free_map2; + skel->bss->map2_id = map_info.id; + + link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto free_map2; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + buf = malloc(expected_read_len); + if (!buf) + goto close_iter; + + /* do read */ + total_read_len = 0; + if (test_e2big_overflow) { + while ((len = read(iter_fd, buf, expected_read_len)) > 0) + total_read_len += len; + + CHECK(len != -1 || errno != E2BIG, "read", + "expected ret -1, errno E2BIG, but get ret %d, error %s\n", + len, strerror(errno)); + goto free_buf; + } else if (!ret1) { + while ((len = read(iter_fd, buf, expected_read_len)) > 0) + total_read_len += len; + + if (CHECK(len < 0, "read", "read failed: %s\n", + strerror(errno))) + goto free_buf; + } else { + do { + len = read(iter_fd, buf, expected_read_len); + if (len > 0) + total_read_len += len; + } while (len > 0 || len == -EAGAIN); + + if (CHECK(len < 0, "read", "read failed: %s\n", + strerror(errno))) + goto free_buf; + } + + if (CHECK(total_read_len != expected_read_len, "read", + "total len %u, expected len %u\n", total_read_len, + expected_read_len)) + goto free_buf; + + if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed", + "expected 1 actual %d\n", skel->bss->map1_accessed)) + goto free_buf; + + if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed", + "expected 2 actual %d\n", skel->bss->map2_accessed)) + goto free_buf; + + CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2, + "map2_seqnum", "two different seqnum %lld %lld\n", + skel->bss->map2_seqnum1, skel->bss->map2_seqnum2); + +free_buf: + free(buf); +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +free_map2: + close(map2_fd); +free_map1: + close(map1_fd); +out: + bpf_iter_test_kern4__destroy(skel); +} + +void test_bpf_iter(void) +{ + if (test__start_subtest("btf_id_or_null")) + test_btf_id_or_null(); + if (test__start_subtest("ipv6_route")) + test_ipv6_route(); + if (test__start_subtest("netlink")) + test_netlink(); + if (test__start_subtest("bpf_map")) + test_bpf_map(); + if (test__start_subtest("task")) + test_task(); + if (test__start_subtest("task_file")) + test_task_file(); + if (test__start_subtest("anon")) + test_anon_iter(false); + if (test__start_subtest("anon-read-one-char")) + test_anon_iter(true); + if (test__start_subtest("file")) + test_file_iter(); + if (test__start_subtest("overflow")) + test_overflow(false, false); + if (test__start_subtest("overflow-e2big")) + test_overflow(true, false); + if (test__start_subtest("prog-ret-1")) + test_overflow(false, true); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index f10029821e16..7afa4160416f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -1,26 +1,30 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#define nr_iters 2 + void test_bpf_obj_id(void) { const __u64 array_magic_value = 0xfaceb00c; const __u32 array_key = 0; - const int nr_iters = 2; const char *file = "./test_obj_id.o"; const char *expected_prog_name = "test_obj_id"; const char *expected_map_name = "test_map_id"; const __u64 nsec_per_sec = 1000000000; - struct bpf_object *objs[nr_iters]; + struct bpf_object *objs[nr_iters] = {}; + struct bpf_link *links[nr_iters] = {}; + struct bpf_program *prog; int prog_fds[nr_iters], map_fds[nr_iters]; /* +1 to test for the info_len returned by kernel */ struct bpf_prog_info prog_infos[nr_iters + 1]; struct bpf_map_info map_infos[nr_iters + 1]; + struct bpf_link_info link_infos[nr_iters + 1]; /* Each prog only uses one map. +1 to test nr_map_ids * returned by kernel. */ __u32 map_ids[nr_iters + 1]; - char jited_insns[128], xlated_insns[128], zeros[128]; + char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128]; __u32 i, next_id, info_len, nr_id_found, duration = 0; struct timespec real_time_ts, boot_time_ts; int err = 0; @@ -36,14 +40,15 @@ void test_bpf_obj_id(void) CHECK(err >= 0 || errno != ENOENT, "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno); - for (i = 0; i < nr_iters; i++) - objs[i] = NULL; + err = bpf_link_get_fd_by_id(0); + CHECK(err >= 0 || errno != ENOENT, + "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno); /* Check bpf_obj_get_info_by_fd() */ bzero(zeros, sizeof(zeros)); for (i = 0; i < nr_iters; i++) { now = time(NULL); - err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER, + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &objs[i], &prog_fds[i]); /* test_obj_id.o is a dumb prog. It should never fail * to load. @@ -60,6 +65,17 @@ void test_bpf_obj_id(void) if (CHECK_FAIL(err)) goto done; + prog = bpf_object__find_program_by_title(objs[i], + "raw_tp/sys_enter"); + if (CHECK_FAIL(!prog)) + goto done; + links[i] = bpf_program__attach(prog); + err = libbpf_get_error(links[i]); + if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) { + links[i] = NULL; + goto done; + } + /* Check getting map info */ info_len = sizeof(struct bpf_map_info) * 2; bzero(&map_infos[i], info_len); @@ -107,7 +123,7 @@ void test_bpf_obj_id(void) load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + (prog_infos[i].load_time / nsec_per_sec); if (CHECK(err || - prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER || + prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT || info_len != sizeof(struct bpf_prog_info) || (env.jit_enabled && !prog_infos[i].jited_prog_len) || (env.jit_enabled && @@ -120,7 +136,11 @@ void test_bpf_obj_id(void) *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", + "err %d errno %d i %d type %d(%d) info_len %u(%zu) " + "jit_enabled %d jited_prog_len %u xlated_prog_len %u " + "jited_prog %d xlated_prog %d load_time %lu(%lu) " + "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) " + "name %s(%s)\n", err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), @@ -135,6 +155,33 @@ void test_bpf_obj_id(void) *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, prog_infos[i].name, expected_prog_name)) goto done; + + /* Check getting link info */ + info_len = sizeof(struct bpf_link_info) * 2; + bzero(&link_infos[i], info_len); + link_infos[i].raw_tracepoint.tp_name = (__u64)&tp_name; + link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name); + err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]), + &link_infos[i], &info_len); + if (CHECK(err || + link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT || + link_infos[i].prog_id != prog_infos[i].id || + link_infos[i].raw_tracepoint.tp_name != (__u64)&tp_name || + strcmp((char *)link_infos[i].raw_tracepoint.tp_name, + "sys_enter") || + info_len != sizeof(struct bpf_link_info), + "get-link-info(fd)", + "err %d errno %d info_len %u(%zu) type %d(%d) id %d " + "prog_id %d (%d) tp_name %s(%s)\n", + err, errno, + info_len, sizeof(struct bpf_link_info), + link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, + link_infos[i].id, + link_infos[i].prog_id, prog_infos[i].id, + (char *)link_infos[i].raw_tracepoint.tp_name, + "sys_enter")) + goto done; + } /* Check bpf_prog_get_next_id() */ @@ -247,7 +294,52 @@ void test_bpf_obj_id(void) "nr_id_found %u(%u)\n", nr_id_found, nr_iters); + /* Check bpf_link_get_next_id() */ + nr_id_found = 0; + next_id = 0; + while (!bpf_link_get_next_id(next_id, &next_id)) { + struct bpf_link_info link_info; + int link_fd, cmp_res; + + info_len = sizeof(link_info); + memset(&link_info, 0, info_len); + + link_fd = bpf_link_get_fd_by_id(next_id); + if (link_fd < 0 && errno == ENOENT) + /* The bpf_link is in the dead row */ + continue; + if (CHECK(link_fd < 0, "get-link-fd(next_id)", + "link_fd %d next_id %u errno %d\n", + link_fd, next_id, errno)) + break; + + for (i = 0; i < nr_iters; i++) + if (link_infos[i].id == next_id) + break; + + if (i == nr_iters) + continue; + + nr_id_found++; + + err = bpf_obj_get_info_by_fd(link_fd, &link_info, &info_len); + cmp_res = memcmp(&link_info, &link_infos[i], + offsetof(struct bpf_link_info, raw_tracepoint)); + CHECK(err || info_len != sizeof(link_info) || cmp_res, + "check get-link-info(next_id->fd)", + "err %d errno %d info_len %u(%zu) memcmp %d\n", + err, errno, info_len, sizeof(struct bpf_link_info), + cmp_res); + + close(link_fd); + } + CHECK(nr_id_found != nr_iters, + "check total link id found by get_next_id", + "nr_id_found %u(%u)\n", nr_id_found, nr_iters); + done: - for (i = 0; i < nr_iters; i++) + for (i = 0; i < nr_iters; i++) { + bpf_link__destroy(links[i]); bpf_object__close(objs[i]); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c new file mode 100644 index 000000000000..f7ee8fa377ad --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> + +#include "test_btf_map_in_map.skel.h" + +void test_btf_map_in_map(void) +{ + int duration = 0, err, key = 0, val; + struct test_btf_map_in_map* skel; + + skel = test_btf_map_in_map__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) + return; + + err = test_btf_map_in_map__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* inner1 = input, inner2 = input + 1 */ + val = bpf_map__fd(skel->maps.inner_map1); + bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0); + val = bpf_map__fd(skel->maps.inner_map2); + bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0); + skel->bss->input = 1; + usleep(1); + + bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val); + CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val); + CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2); + + /* inner1 = input + 1, inner2 = input */ + val = bpf_map__fd(skel->maps.inner_map2); + bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0); + val = bpf_map__fd(skel->maps.inner_map1); + bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0); + skel->bss->input = 3; + usleep(1); + + bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val); + CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val); + CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3); + +cleanup: + test_btf_map_in_map__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c new file mode 100644 index 000000000000..059047af7df3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <test_progs.h> + +#include "network_helpers.h" +#include "cgroup_skb_sk_lookup_kern.skel.h" + +static void run_lookup_test(__u16 *g_serv_port, int out_sk) +{ + int serv_sk = -1, in_sk = -1, serv_in_sk = -1, err; + struct sockaddr_in6 addr = {}; + socklen_t addr_len = sizeof(addr); + __u32 duration = 0; + + serv_sk = start_server(AF_INET6, SOCK_STREAM); + if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) + return; + + err = getsockname(serv_sk, (struct sockaddr *)&addr, &addr_len); + if (CHECK(err, "getsockname", "errno %d\n", errno)) + goto cleanup; + + *g_serv_port = addr.sin6_port; + + /* Client outside of test cgroup should fail to connect by timeout. */ + err = connect_fd_to_fd(out_sk, serv_sk); + if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd", + "unexpected result err %d errno %d\n", err, errno)) + goto cleanup; + + err = connect_wait(out_sk); + if (CHECK(err, "connect_wait", "unexpected result %d\n", err)) + goto cleanup; + + /* Client inside test cgroup should connect just fine. */ + in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk); + if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno)) + goto cleanup; + + serv_in_sk = accept(serv_sk, NULL, NULL); + if (CHECK(serv_in_sk < 0, "accept", "errno %d\n", errno)) + goto cleanup; + +cleanup: + close(serv_in_sk); + close(in_sk); + close(serv_sk); +} + +static void run_cgroup_bpf_test(const char *cg_path, int out_sk) +{ + struct cgroup_skb_sk_lookup_kern *skel; + struct bpf_link *link; + __u32 duration = 0; + int cgfd = -1; + + skel = cgroup_skb_sk_lookup_kern__open_and_load(); + if (CHECK(!skel, "skel_open_load", "open_load failed\n")) + return; + + cgfd = test__join_cgroup(cg_path); + if (CHECK(cgfd < 0, "cgroup_join", "cgroup setup failed\n")) + goto cleanup; + + link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd); + if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link))) + goto cleanup; + + run_lookup_test(&skel->bss->g_serv_port, out_sk); + + bpf_link__destroy(link); + +cleanup: + close(cgfd); + cgroup_skb_sk_lookup_kern__destroy(skel); +} + +void test_cgroup_skb_sk_lookup(void) +{ + const char *cg_path = "/foo"; + int out_sk; + + /* Create a socket before joining testing cgroup so that its cgroup id + * differs from that of testing cgroup. Moving selftests process to + * testing cgroup won't change cgroup id of an already created socket. + */ + out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + if (CHECK_FAIL(out_sk < 0)) + return; + + run_cgroup_bpf_test(cg_path, out_sk); + + close(out_sk); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c new file mode 100644 index 000000000000..f259085cca6a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2020 Cloudflare + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <string.h> + +#include <linux/pkt_cls.h> + +#include <test_progs.h> + +#include "progs/test_cls_redirect.h" +#include "test_cls_redirect.skel.h" + +#define ENCAP_IP INADDR_LOOPBACK +#define ENCAP_PORT (1234) + +struct addr_port { + in_port_t port; + union { + struct in_addr in_addr; + struct in6_addr in6_addr; + }; +}; + +struct tuple { + int family; + struct addr_port src; + struct addr_port dst; +}; + +static int start_server(const struct sockaddr *addr, socklen_t len, int type) +{ + int fd = socket(addr->sa_family, type, 0); + if (CHECK_FAIL(fd == -1)) + return -1; + if (CHECK_FAIL(bind(fd, addr, len) == -1)) + goto err; + if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1)) + goto err; + + return fd; + +err: + close(fd); + return -1; +} + +static int connect_to_server(const struct sockaddr *addr, socklen_t len, + int type) +{ + int fd = socket(addr->sa_family, type, 0); + if (CHECK_FAIL(fd == -1)) + return -1; + if (CHECK_FAIL(connect(fd, addr, len))) + goto err; + + return fd; + +err: + close(fd); + return -1; +} + +static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap) +{ + const struct sockaddr_in6 *in6; + const struct sockaddr_in *in; + + switch (sa->sa_family) { + case AF_INET: + in = (const struct sockaddr_in *)sa; + ap->in_addr = in->sin_addr; + ap->port = in->sin_port; + return true; + + case AF_INET6: + in6 = (const struct sockaddr_in6 *)sa; + ap->in6_addr = in6->sin6_addr; + ap->port = in6->sin6_port; + return true; + + default: + return false; + } +} + +static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type, + int *server, int *conn, struct tuple *tuple) +{ + struct sockaddr_storage ss; + socklen_t slen = sizeof(ss); + struct sockaddr *sa = (struct sockaddr *)&ss; + + *server = start_server(addr, len, type); + if (*server < 0) + return false; + + if (CHECK_FAIL(getsockname(*server, sa, &slen))) + goto close_server; + + *conn = connect_to_server(sa, slen, type); + if (*conn < 0) + goto close_server; + + /* We want to simulate packets arriving at conn, so we have to + * swap src and dst. + */ + slen = sizeof(ss); + if (CHECK_FAIL(getsockname(*conn, sa, &slen))) + goto close_conn; + + if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst))) + goto close_conn; + + slen = sizeof(ss); + if (CHECK_FAIL(getpeername(*conn, sa, &slen))) + goto close_conn; + + if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src))) + goto close_conn; + + tuple->family = ss.ss_family; + return true; + +close_conn: + close(*conn); + *conn = -1; +close_server: + close(*server); + *server = -1; + return false; +} + +static socklen_t prepare_addr(struct sockaddr_storage *addr, int family) +{ + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)addr; + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = family; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + return sizeof(*addr4); + case AF_INET6: + addr6 = (struct sockaddr_in6 *)addr; + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = family; + addr6->sin6_addr = in6addr_loopback; + return sizeof(*addr6); + default: + fprintf(stderr, "Invalid family %d", family); + return 0; + } +} + +static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr) +{ + return tattr->data_size_out < tattr->data_size_in; +} + +enum type { + UDP, + TCP, + __NR_KIND, +}; + +enum hops { + NO_HOPS, + ONE_HOP, +}; + +enum flags { + NONE, + SYN, + ACK, +}; + +enum conn { + KNOWN_CONN, + UNKNOWN_CONN, +}; + +enum result { + ACCEPT, + FORWARD, +}; + +struct test_cfg { + enum type type; + enum result result; + enum conn conn; + enum hops hops; + enum flags flags; +}; + +static int test_str(void *buf, size_t len, const struct test_cfg *test, + int family) +{ + const char *family_str, *type, *conn, *hops, *result, *flags; + + family_str = "IPv4"; + if (family == AF_INET6) + family_str = "IPv6"; + + type = "TCP"; + if (test->type == UDP) + type = "UDP"; + + conn = "known"; + if (test->conn == UNKNOWN_CONN) + conn = "unknown"; + + hops = "no hops"; + if (test->hops == ONE_HOP) + hops = "one hop"; + + result = "accept"; + if (test->result == FORWARD) + result = "forward"; + + flags = "none"; + if (test->flags == SYN) + flags = "SYN"; + else if (test->flags == ACK) + flags = "ACK"; + + return snprintf(buf, len, "%s %s %s %s (%s, flags: %s)", family_str, + type, result, conn, hops, flags); +} + +static struct test_cfg tests[] = { + { TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, SYN }, + { TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, ACK }, + { TCP, FORWARD, UNKNOWN_CONN, ONE_HOP, ACK }, + { TCP, ACCEPT, KNOWN_CONN, ONE_HOP, ACK }, + { UDP, ACCEPT, UNKNOWN_CONN, NO_HOPS, NONE }, + { UDP, FORWARD, UNKNOWN_CONN, ONE_HOP, NONE }, + { UDP, ACCEPT, KNOWN_CONN, ONE_HOP, NONE }, +}; + +static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto) +{ + const uint8_t hlen = + (sizeof(struct guehdr) / sizeof(uint32_t)) + hop_count; + *encap = (encap_headers_t){ + .eth = { .h_proto = htons(ETH_P_IP) }, + .ip = { + .ihl = 5, + .version = 4, + .ttl = IPDEFTTL, + .protocol = IPPROTO_UDP, + .daddr = htonl(ENCAP_IP) + }, + .udp = { + .dest = htons(ENCAP_PORT), + }, + .gue = { + .hlen = hlen, + .proto_ctype = proto + }, + .unigue = { + .hop_count = hop_count + }, + }; +} + +static size_t build_input(const struct test_cfg *test, void *const buf, + const struct tuple *tuple) +{ + in_port_t sport = tuple->src.port; + encap_headers_t encap; + struct iphdr ip; + struct ipv6hdr ipv6; + struct tcphdr tcp; + struct udphdr udp; + struct in_addr next_hop; + uint8_t *p = buf; + int proto; + + proto = IPPROTO_IPIP; + if (tuple->family == AF_INET6) + proto = IPPROTO_IPV6; + + encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto); + p = mempcpy(p, &encap, sizeof(encap)); + + if (test->hops == ONE_HOP) { + next_hop = (struct in_addr){ .s_addr = htonl(0x7f000002) }; + p = mempcpy(p, &next_hop, sizeof(next_hop)); + } + + proto = IPPROTO_TCP; + if (test->type == UDP) + proto = IPPROTO_UDP; + + switch (tuple->family) { + case AF_INET: + ip = (struct iphdr){ + .ihl = 5, + .version = 4, + .ttl = IPDEFTTL, + .protocol = proto, + .saddr = tuple->src.in_addr.s_addr, + .daddr = tuple->dst.in_addr.s_addr, + }; + p = mempcpy(p, &ip, sizeof(ip)); + break; + case AF_INET6: + ipv6 = (struct ipv6hdr){ + .version = 6, + .hop_limit = IPDEFTTL, + .nexthdr = proto, + .saddr = tuple->src.in6_addr, + .daddr = tuple->dst.in6_addr, + }; + p = mempcpy(p, &ipv6, sizeof(ipv6)); + break; + default: + return 0; + } + + if (test->conn == UNKNOWN_CONN) + sport--; + + switch (test->type) { + case TCP: + tcp = (struct tcphdr){ + .source = sport, + .dest = tuple->dst.port, + }; + if (test->flags == SYN) + tcp.syn = true; + if (test->flags == ACK) + tcp.ack = true; + p = mempcpy(p, &tcp, sizeof(tcp)); + break; + case UDP: + udp = (struct udphdr){ + .source = sport, + .dest = tuple->dst.port, + }; + p = mempcpy(p, &udp, sizeof(udp)); + break; + default: + return 0; + } + + return (void *)p - buf; +} + +static void close_fds(int *fds, int n) +{ + int i; + + for (i = 0; i < n; i++) + if (fds[i] > 0) + close(fds[i]); +} + +void test_cls_redirect(void) +{ + struct test_cls_redirect *skel = NULL; + struct bpf_prog_test_run_attr tattr = {}; + int families[] = { AF_INET, AF_INET6 }; + struct sockaddr_storage ss; + struct sockaddr *addr; + socklen_t slen; + int i, j, err; + + int servers[__NR_KIND][ARRAY_SIZE(families)] = {}; + int conns[__NR_KIND][ARRAY_SIZE(families)] = {}; + struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)]; + + skel = test_cls_redirect__open(); + if (CHECK_FAIL(!skel)) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + if (CHECK_FAIL(test_cls_redirect__load(skel))) + goto cleanup; + + addr = (struct sockaddr *)&ss; + for (i = 0; i < ARRAY_SIZE(families); i++) { + slen = prepare_addr(&ss, families[i]); + if (CHECK_FAIL(!slen)) + goto cleanup; + + if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM, + &servers[UDP][i], &conns[UDP][i], + &tuples[UDP][i]))) + goto cleanup; + + if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM, + &servers[TCP][i], &conns[TCP][i], + &tuples[TCP][i]))) + goto cleanup; + } + + tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect); + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct test_cfg *test = &tests[i]; + + for (j = 0; j < ARRAY_SIZE(families); j++) { + struct tuple *tuple = &tuples[test->type][j]; + char input[256]; + char tmp[256]; + + test_str(tmp, sizeof(tmp), test, tuple->family); + if (!test__start_subtest(tmp)) + continue; + + tattr.data_out = tmp; + tattr.data_size_out = sizeof(tmp); + + tattr.data_in = input; + tattr.data_size_in = build_input(test, input, tuple); + if (CHECK_FAIL(!tattr.data_size_in)) + continue; + + err = bpf_prog_test_run_xattr(&tattr); + if (CHECK_FAIL(err)) + continue; + + if (tattr.retval != TC_ACT_REDIRECT) { + PRINT_FAIL("expected TC_ACT_REDIRECT, got %d\n", + tattr.retval); + continue; + } + + switch (test->result) { + case ACCEPT: + if (CHECK_FAIL(!was_decapsulated(&tattr))) + continue; + break; + case FORWARD: + if (CHECK_FAIL(was_decapsulated(&tattr))) + continue; + break; + default: + PRINT_FAIL("unknown result %d\n", test->result); + continue; + } + } + } + +cleanup: + test_cls_redirect__destroy(skel); + close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0])); + close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0])); +} diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c new file mode 100644 index 000000000000..17bbf76812ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "network_helpers.h" + +static int verify_ports(int family, int fd, + __u16 expected_local, __u16 expected_peer) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + __u16 port; + + if (getsockname(fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + return -1; + } + + if (family == AF_INET) + port = ((struct sockaddr_in *)&addr)->sin_port; + else + port = ((struct sockaddr_in6 *)&addr)->sin6_port; + + if (ntohs(port) != expected_local) { + log_err("Unexpected local port %d, expected %d", ntohs(port), + expected_local); + return -1; + } + + if (getpeername(fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get peer addr"); + return -1; + } + + if (family == AF_INET) + port = ((struct sockaddr_in *)&addr)->sin_port; + else + port = ((struct sockaddr_in6 *)&addr)->sin6_port; + + if (ntohs(port) != expected_peer) { + log_err("Unexpected peer port %d, expected %d", ntohs(port), + expected_peer); + return -1; + } + + return 0; +} + +static int run_test(int cgroup_fd, int server_fd, int family, int type) +{ + bool v4 = family == AF_INET; + __u16 expected_local_port = v4 ? 22222 : 22223; + __u16 expected_peer_port = 60000; + struct bpf_prog_load_attr attr = { + .file = v4 ? "./connect_force_port4.o" : + "./connect_force_port6.o", + }; + struct bpf_program *prog; + struct bpf_object *obj; + int xlate_fd, fd, err; + __u32 duration = 0; + + err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd); + if (err) { + log_err("Failed to load BPF object"); + return -1; + } + + prog = bpf_object__find_program_by_title(obj, v4 ? + "cgroup/connect4" : + "cgroup/connect6"); + if (CHECK(!prog, "find_prog", "connect prog not found\n")) { + err = -EIO; + goto close_bpf_object; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ? + BPF_CGROUP_INET4_CONNECT : + BPF_CGROUP_INET6_CONNECT, 0); + if (err) { + log_err("Failed to attach BPF program"); + goto close_bpf_object; + } + + prog = bpf_object__find_program_by_title(obj, v4 ? + "cgroup/getpeername4" : + "cgroup/getpeername6"); + if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) { + err = -EIO; + goto close_bpf_object; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ? + BPF_CGROUP_INET4_GETPEERNAME : + BPF_CGROUP_INET6_GETPEERNAME, 0); + if (err) { + log_err("Failed to attach BPF program"); + goto close_bpf_object; + } + + prog = bpf_object__find_program_by_title(obj, v4 ? + "cgroup/getsockname4" : + "cgroup/getsockname6"); + if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) { + err = -EIO; + goto close_bpf_object; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ? + BPF_CGROUP_INET4_GETSOCKNAME : + BPF_CGROUP_INET6_GETSOCKNAME, 0); + if (err) { + log_err("Failed to attach BPF program"); + goto close_bpf_object; + } + + fd = connect_to_fd(family, type, server_fd); + if (fd < 0) { + err = -1; + goto close_bpf_object; + } + + err = verify_ports(family, fd, expected_local_port, + expected_peer_port); + close(fd); + +close_bpf_object: + bpf_object__close(obj); + return err; +} + +void test_connect_force_port(void) +{ + int server_fd, cgroup_fd; + + cgroup_fd = test__join_cgroup("/connect_force_port"); + if (CHECK_FAIL(cgroup_fd < 0)) + return; + + server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123); + if (CHECK_FAIL(server_fd < 0)) + goto close_cgroup_fd; + CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM)); + close(server_fd); + + server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124); + if (CHECK_FAIL(server_fd < 0)) + goto close_cgroup_fd; + CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM)); + close(server_fd); + + server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123); + if (CHECK_FAIL(server_fd < 0)) + goto close_cgroup_fd; + CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM)); + close(server_fd); + + server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124); + if (CHECK_FAIL(server_fd < 0)) + goto close_cgroup_fd; + CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM)); + close(server_fd); + +close_cgroup_fd: + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 31e177adbdf1..084ed26a7d78 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -392,7 +392,7 @@ static struct core_reloc_test_case test_cases[] = { .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) { .a = 42, }, - .input_len = sizeof(struct core_reloc_existence), + .input_len = sizeof(struct core_reloc_existence___minimal), .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) { .a_exists = 1, .b_exists = 0, diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c new file mode 100644 index 000000000000..2cb2085917e7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "test_enable_stats.skel.h" + +void test_enable_stats(void) +{ + struct test_enable_stats *skel; + int stats_fd, err, prog_fd; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); + int duration = 0; + + skel = test_enable_stats__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) + return; + + stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME); + if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) { + test_enable_stats__destroy(skel); + return; + } + + err = test_enable_stats__attach(skel); + if (CHECK(err, "attach_raw_tp", "err %d\n", err)) + goto cleanup; + + test_enable_stats__detach(skel); + + prog_fd = bpf_program__fd(skel->progs.test_enable_stats); + memset(&info, 0, info_len); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err, "get_prog_info", + "failed to get bpf_prog_info for fd %d\n", prog_fd)) + goto cleanup; + if (CHECK(info.run_time_ns == 0, "check_stats_enabled", + "failed to enable run_time_ns stats\n")) + goto cleanup; + + CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid", + "invalid run_cnt stats\n"); + +cleanup: + test_enable_stats__destroy(skel); + close(stats_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index c2642517e1d8..a895bfed55db 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> +#include <network_helpers.h> static void test_fexit_bpf2bpf_common(const char *obj_file, const char *target_obj_file, diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 92563898867c..ea14e3ece812 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> #include <error.h> #include <linux/if.h> #include <linux/if_tun.h> #include <sys/uio.h> +#include "bpf_flow.skel.h" + #ifndef IP_MF #define IP_MF 0x2000 #endif @@ -100,6 +103,7 @@ struct test { #define VLAN_HLEN 4 +static __u32 duration; struct test tests[] = { { .name = "ipv4", @@ -443,17 +447,130 @@ static int ifup(const char *ifname) return 0; } +static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array) +{ + int i, err, map_fd, prog_fd; + struct bpf_program *prog; + char prog_name[32]; + + map_fd = bpf_map__fd(prog_array); + if (map_fd < 0) + return -1; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (!prog) + return -1; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) + return -1; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (err) + return -1; + } + return 0; +} + +static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) +{ + int i, err, keys_fd; + + keys_fd = bpf_map__fd(keys); + if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd)) + return; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + /* Keep in sync with 'flags' from eth_get_headlen. */ + __u32 eth_get_headlen_flags = + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG; + struct bpf_prog_test_run_attr tattr = {}; + struct bpf_flow_keys flow_keys = {}; + __u32 key = (__u32)(tests[i].keys.sport) << 16 | + tests[i].keys.dport; + + /* For skb-less case we can't pass input flags; run + * only the tests that have a matching set of flags. + */ + + if (tests[i].flags != eth_get_headlen_flags) + continue; + + err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); + CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno); + + err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); + CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); + + CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err); + CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + + err = bpf_map_delete_elem(keys_fd, &key); + CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err); + } +} + +static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd) +{ + int err, prog_fd; + + prog_fd = bpf_program__fd(skel->progs._dissect); + if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd)) + return; + + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno)) + return; + + run_tests_skb_less(tap_fd, skel->maps.last_dissection); + + err = bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR); + CHECK(err, "bpf_prog_detach", "err %d errno %d\n", err, errno); +} + +static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) +{ + struct bpf_link *link; + int err, net_fd; + + net_fd = open("/proc/self/ns/net", O_RDONLY); + if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno)) + return; + + link = bpf_program__attach_netns(skel->progs._dissect, net_fd); + if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link))) + goto out_close; + + run_tests_skb_less(tap_fd, skel->maps.last_dissection); + + err = bpf_link__destroy(link); + CHECK(err, "bpf_link__destroy", "err %d\n", err); +out_close: + close(net_fd); +} + void test_flow_dissector(void) { int i, err, prog_fd, keys_fd = -1, tap_fd; - struct bpf_object *obj; - __u32 duration = 0; + struct bpf_flow *skel; - err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector", - "jmp_table", "last_dissection", &prog_fd, &keys_fd); - if (CHECK_FAIL(err)) + skel = bpf_flow__open_and_load(); + if (CHECK(!skel, "skel", "failed to open/load skeleton\n")) return; + prog_fd = bpf_program__fd(skel->progs._dissect); + if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd)) + goto out_destroy_skel; + keys_fd = bpf_map__fd(skel->maps.last_dissection); + if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd)) + goto out_destroy_skel; + err = init_prog_array(skel->obj, skel->maps.jmp_table); + if (CHECK(err, "init_prog_array", "err %d\n", err)) + goto out_destroy_skel; + for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_flow_keys flow_keys; struct bpf_prog_test_run_attr tattr = { @@ -486,43 +603,17 @@ void test_flow_dissector(void) * via BPF map in this case. */ - err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); - CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno); - tap_fd = create_tap("tap0"); CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno); err = ifup("tap0"); CHECK(err, "ifup", "err %d errno %d\n", err, errno); - for (i = 0; i < ARRAY_SIZE(tests); i++) { - /* Keep in sync with 'flags' from eth_get_headlen. */ - __u32 eth_get_headlen_flags = - BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG; - struct bpf_prog_test_run_attr tattr = {}; - struct bpf_flow_keys flow_keys = {}; - __u32 key = (__u32)(tests[i].keys.sport) << 16 | - tests[i].keys.dport; - - /* For skb-less case we can't pass input flags; run - * only the tests that have a matching set of flags. - */ - - if (tests[i].flags != eth_get_headlen_flags) - continue; - - err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); - CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno); - - err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); - CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); - - CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err); - CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); - - err = bpf_map_delete_elem(keys_fd, &key); - CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err); - } + /* Test direct prog attachment */ + test_skb_less_prog_attach(skel, tap_fd); + /* Test indirect prog attachment via link */ + test_skb_less_link_create(skel, tap_fd); - bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR); - bpf_object__close(obj); + close(tap_fd); +out_destroy_skel: + bpf_flow__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c index dc5ef155ec28..0e8a4d2f023d 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> void test_flow_dissector_load_bytes(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 1f51ba66b98b..15cb554a66d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -11,6 +11,7 @@ #include <fcntl.h> #include <sched.h> #include <stdbool.h> +#include <sys/stat.h> #include <unistd.h> #include <linux/bpf.h> @@ -18,21 +19,30 @@ #include "test_progs.h" -static bool is_attached(int netns) +static int init_net = -1; + +static __u32 query_attached_prog_id(int netns) { - __u32 cnt; + __u32 prog_ids[1] = {}; + __u32 prog_cnt = ARRAY_SIZE(prog_ids); int err; - err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt); + err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, + prog_ids, &prog_cnt); if (CHECK_FAIL(err)) { perror("bpf_prog_query"); - return true; /* fail-safe */ + return 0; } - return cnt > 0; + return prog_cnt == 1 ? prog_ids[0] : 0; +} + +static bool prog_is_attached(int netns) +{ + return query_attached_prog_id(netns) > 0; } -static int load_prog(void) +static int load_prog(enum bpf_prog_type type) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_0, BPF_OK), @@ -40,61 +50,566 @@ static int load_prog(void) }; int fd; - fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, - ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); if (CHECK_FAIL(fd < 0)) perror("bpf_load_program"); return fd; } -static void do_flow_dissector_reattach(void) +static __u32 query_prog_id(int prog) { - int prog_fd[2] = { -1, -1 }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int err; - prog_fd[0] = load_prog(); - if (prog_fd[0] < 0) - return; + err = bpf_obj_get_info_by_fd(prog, &info, &info_len); + if (CHECK_FAIL(err || info_len != sizeof(info))) { + perror("bpf_obj_get_info_by_fd"); + return 0; + } - prog_fd[1] = load_prog(); - if (prog_fd[1] < 0) - goto out_close; + return info.id; +} + +static int unshare_net(int old_net) +{ + int err, new_net; - err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0); + err = unshare(CLONE_NEWNET); if (CHECK_FAIL(err)) { - perror("bpf_prog_attach-0"); - goto out_close; + perror("unshare(CLONE_NEWNET)"); + return -1; + } + new_net = open("/proc/self/ns/net", O_RDONLY); + if (CHECK_FAIL(new_net < 0)) { + perror("open(/proc/self/ns/net)"); + setns(old_net, CLONE_NEWNET); + return -1; } + return new_net; +} + +static void test_prog_attach_prog_attach(int netns, int prog1, int prog2) +{ + int err; + + err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(err)) { + perror("bpf_prog_attach(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); /* Expect success when attaching a different program */ - err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0); + err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0); if (CHECK_FAIL(err)) { - perror("bpf_prog_attach-1"); + perror("bpf_prog_attach(prog2) #1"); goto out_detach; } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2)); /* Expect failure when attaching the same program twice */ - err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0); + err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0); if (CHECK_FAIL(!err || errno != EINVAL)) - perror("bpf_prog_attach-2"); + perror("bpf_prog_attach(prog2) #2"); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2)); out_detach: err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); if (CHECK_FAIL(err)) perror("bpf_prog_detach"); + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_link_create_link_create(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); + int link1, link2; + + link1 = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect failure creating link when another link exists */ + errno = 0; + link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts); + if (CHECK_FAIL(link2 != -1 || errno != E2BIG)) + perror("bpf_prog_attach(prog2) expected E2BIG"); + if (link2 != -1) + close(link2); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + close(link1); + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_prog_attach_link_create(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); + int err, link; + + err = bpf_prog_attach(prog1, -1, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(err)) { + perror("bpf_prog_attach(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect failure creating link when prog attached */ + errno = 0; + link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts); + if (CHECK_FAIL(link != -1 || errno != EEXIST)) + perror("bpf_link_create(prog2) expected EEXIST"); + if (link != -1) + close(link); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR); + if (CHECK_FAIL(err)) + perror("bpf_prog_detach"); + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_link_create_prog_attach(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); + int err, link; + + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect failure attaching prog when link exists */ + errno = 0; + err = bpf_prog_attach(prog2, -1, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(!err || errno != EEXIST)) + perror("bpf_prog_attach(prog2) expected EEXIST"); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + close(link); + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_link_create_prog_detach(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); + int err, link; + + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect failure detaching prog when link exists */ + errno = 0; + err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR); + if (CHECK_FAIL(!err || errno != EINVAL)) + perror("bpf_prog_detach expected EINVAL"); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + close(link); + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_prog_attach_detach_query(int netns, int prog1, int prog2) +{ + int err; + + err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(err)) { + perror("bpf_prog_attach(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); + if (CHECK_FAIL(err)) { + perror("bpf_prog_detach"); + return; + } + + /* Expect no prog attached after successful detach */ + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_link_create_close_query(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); + int link; + + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + close(link); + /* Expect no prog attached after closing last link FD */ + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_link_update_no_old_prog(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + int err, link; + + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect success replacing the prog when old prog not specified */ + update_opts.flags = 0; + update_opts.old_prog_fd = 0; + err = bpf_link_update(link, prog2, &update_opts); + if (CHECK_FAIL(err)) + perror("bpf_link_update"); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2)); + + close(link); + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_link_update_replace_old_prog(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + int err, link; + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect success F_REPLACE and old prog specified to succeed */ + update_opts.flags = BPF_F_REPLACE; + update_opts.old_prog_fd = prog1; + err = bpf_link_update(link, prog2, &update_opts); + if (CHECK_FAIL(err)) + perror("bpf_link_update"); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2)); + + close(link); + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_link_update_invalid_opts(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + int err, link; + + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect update to fail w/ old prog FD but w/o F_REPLACE*/ + errno = 0; + update_opts.flags = 0; + update_opts.old_prog_fd = prog1; + err = bpf_link_update(link, prog2, &update_opts); + if (CHECK_FAIL(!err || errno != EINVAL)) { + perror("bpf_link_update expected EINVAL"); + goto out_close; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect update to fail on old prog FD mismatch */ + errno = 0; + update_opts.flags = BPF_F_REPLACE; + update_opts.old_prog_fd = prog2; + err = bpf_link_update(link, prog2, &update_opts); + if (CHECK_FAIL(!err || errno != EPERM)) { + perror("bpf_link_update expected EPERM"); + goto out_close; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect update to fail for invalid old prog FD */ + errno = 0; + update_opts.flags = BPF_F_REPLACE; + update_opts.old_prog_fd = -1; + err = bpf_link_update(link, prog2, &update_opts); + if (CHECK_FAIL(!err || errno != EBADF)) { + perror("bpf_link_update expected EBADF"); + goto out_close; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect update to fail with invalid flags */ + errno = 0; + update_opts.flags = BPF_F_ALLOW_MULTI; + update_opts.old_prog_fd = 0; + err = bpf_link_update(link, prog2, &update_opts); + if (CHECK_FAIL(!err || errno != EINVAL)) + perror("bpf_link_update expected EINVAL"); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + +out_close: + close(link); + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_link_update_invalid_prog(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + int err, link, prog3; + + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect failure when new prog FD is not valid */ + errno = 0; + update_opts.flags = 0; + update_opts.old_prog_fd = 0; + err = bpf_link_update(link, -1, &update_opts); + if (CHECK_FAIL(!err || errno != EBADF)) { + perror("bpf_link_update expected EINVAL"); + goto out_close_link; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + prog3 = load_prog(BPF_PROG_TYPE_SOCKET_FILTER); + if (prog3 < 0) + goto out_close_link; + + /* Expect failure when new prog FD type doesn't match */ + errno = 0; + update_opts.flags = 0; + update_opts.old_prog_fd = 0; + err = bpf_link_update(link, prog3, &update_opts); + if (CHECK_FAIL(!err || errno != EINVAL)) + perror("bpf_link_update expected EINVAL"); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + close(prog3); +out_close_link: + close(link); + CHECK_FAIL(prog_is_attached(netns)); +} + +static void test_link_update_netns_gone(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + int err, link, old_net; + + old_net = netns; + netns = unshare_net(old_net); + if (netns < 0) + return; + + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + close(netns); + err = setns(old_net, CLONE_NEWNET); + if (CHECK_FAIL(err)) { + perror("setns(CLONE_NEWNET)"); + close(link); + return; + } + + /* Expect failure when netns destroyed */ + errno = 0; + update_opts.flags = 0; + update_opts.old_prog_fd = 0; + err = bpf_link_update(link, prog2, &update_opts); + if (CHECK_FAIL(!err || errno != ENOLINK)) + perror("bpf_link_update"); + + close(link); +} + +static void test_link_get_info(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + struct bpf_link_info info = {}; + struct stat netns_stat = {}; + __u32 info_len, link_id; + int err, link, old_net; + + old_net = netns; + netns = unshare_net(old_net); + if (netns < 0) + return; + + err = fstat(netns, &netns_stat); + if (CHECK_FAIL(err)) { + perror("stat(netns)"); + goto out_resetns; + } + + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + goto out_resetns; + } + + info_len = sizeof(info); + err = bpf_obj_get_info_by_fd(link, &info, &info_len); + if (CHECK_FAIL(err)) { + perror("bpf_obj_get_info"); + goto out_unlink; + } + CHECK_FAIL(info_len != sizeof(info)); + + /* Expect link info to be sane and match prog and netns details */ + CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS); + CHECK_FAIL(info.id == 0); + CHECK_FAIL(info.prog_id != query_prog_id(prog1)); + CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino); + CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR); + + update_opts.flags = 0; + update_opts.old_prog_fd = 0; + err = bpf_link_update(link, prog2, &update_opts); + if (CHECK_FAIL(err)) { + perror("bpf_link_update(prog2)"); + goto out_unlink; + } + + link_id = info.id; + info_len = sizeof(info); + err = bpf_obj_get_info_by_fd(link, &info, &info_len); + if (CHECK_FAIL(err)) { + perror("bpf_obj_get_info"); + goto out_unlink; + } + CHECK_FAIL(info_len != sizeof(info)); + + /* Expect no info change after update except in prog id */ + CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS); + CHECK_FAIL(info.id != link_id); + CHECK_FAIL(info.prog_id != query_prog_id(prog2)); + CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino); + CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR); + + /* Leave netns link is attached to and close last FD to it */ + err = setns(old_net, CLONE_NEWNET); + if (CHECK_FAIL(err)) { + perror("setns(NEWNET)"); + goto out_unlink; + } + close(netns); + old_net = -1; + netns = -1; + + info_len = sizeof(info); + err = bpf_obj_get_info_by_fd(link, &info, &info_len); + if (CHECK_FAIL(err)) { + perror("bpf_obj_get_info"); + goto out_unlink; + } + CHECK_FAIL(info_len != sizeof(info)); + + /* Expect netns_ino to change to 0 */ + CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS); + CHECK_FAIL(info.id != link_id); + CHECK_FAIL(info.prog_id != query_prog_id(prog2)); + CHECK_FAIL(info.netns.netns_ino != 0); + CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR); + +out_unlink: + close(link); +out_resetns: + if (old_net != -1) + setns(old_net, CLONE_NEWNET); + if (netns != -1) + close(netns); +} + +static void run_tests(int netns) +{ + struct test { + const char *test_name; + void (*test_func)(int netns, int prog1, int prog2); + } tests[] = { + { "prog attach, prog attach", + test_prog_attach_prog_attach }, + { "link create, link create", + test_link_create_link_create }, + { "prog attach, link create", + test_prog_attach_link_create }, + { "link create, prog attach", + test_link_create_prog_attach }, + { "link create, prog detach", + test_link_create_prog_detach }, + { "prog attach, detach, query", + test_prog_attach_detach_query }, + { "link create, close, query", + test_link_create_close_query }, + { "link update no old prog", + test_link_update_no_old_prog }, + { "link update with replace old prog", + test_link_update_replace_old_prog }, + { "link update invalid opts", + test_link_update_invalid_opts }, + { "link update invalid prog", + test_link_update_invalid_prog }, + { "link update netns gone", + test_link_update_netns_gone }, + { "link get info", + test_link_get_info }, + }; + int i, progs[2] = { -1, -1 }; + char test_name[80]; + + for (i = 0; i < ARRAY_SIZE(progs); i++) { + progs[i] = load_prog(BPF_PROG_TYPE_FLOW_DISSECTOR); + if (progs[i] < 0) + goto out_close; + } + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + snprintf(test_name, sizeof(test_name), + "flow dissector %s%s", + tests[i].test_name, + netns == init_net ? " (init_net)" : ""); + if (test__start_subtest(test_name)) + tests[i].test_func(netns, progs[0], progs[1]); + } out_close: - close(prog_fd[1]); - close(prog_fd[0]); + for (i = 0; i < ARRAY_SIZE(progs); i++) { + if (progs[i] != -1) + CHECK_FAIL(close(progs[i])); + } } void test_flow_dissector_reattach(void) { - int init_net, self_net, err; + int err, new_net, saved_net; - self_net = open("/proc/self/ns/net", O_RDONLY); - if (CHECK_FAIL(self_net < 0)) { + saved_net = open("/proc/self/ns/net", O_RDONLY); + if (CHECK_FAIL(saved_net < 0)) { perror("open(/proc/self/ns/net"); return; } @@ -111,30 +626,29 @@ void test_flow_dissector_reattach(void) goto out_close; } - if (is_attached(init_net)) { + if (prog_is_attached(init_net)) { test__skip(); printf("Can't test with flow dissector attached to init_net\n"); goto out_setns; } /* First run tests in root network namespace */ - do_flow_dissector_reattach(); + run_tests(init_net); /* Then repeat tests in a non-root namespace */ - err = unshare(CLONE_NEWNET); - if (CHECK_FAIL(err)) { - perror("unshare(CLONE_NEWNET)"); + new_net = unshare_net(init_net); + if (new_net < 0) goto out_setns; - } - do_flow_dissector_reattach(); + run_tests(new_net); + close(new_net); out_setns: /* Move back to netns we started in. */ - err = setns(self_net, CLONE_NEWNET); + err = setns(saved_net, CLONE_NEWNET); if (CHECK_FAIL(err)) perror("setns(/proc/self/ns/net)"); out_close: close(init_net); - close(self_net); + close(saved_net); } diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index c680926fce73..e3cb62b0a110 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> static void test_global_data_number(struct bpf_object *obj, __u32 duration) { diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c index c490e012c23f..428d488830c6 100644 --- a/tools/testing/selftests/bpf/test_hashmap.c +++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c @@ -5,26 +5,17 @@ * * Copyright (c) 2019 Facebook */ -#include <stdio.h> -#include <errno.h> -#include <linux/err.h> +#include "test_progs.h" #include "bpf/hashmap.h" -#define CHECK(condition, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \ - fprintf(stderr, format); \ - } \ - __ret; \ -}) +static int duration = 0; -size_t hash_fn(const void *k, void *ctx) +static size_t hash_fn(const void *k, void *ctx) { return (long)k; } -bool equal_fn(const void *a, const void *b, void *ctx) +static bool equal_fn(const void *a, const void *b, void *ctx) { return (long)a == (long)b; } @@ -49,53 +40,55 @@ static inline size_t exp_cap(size_t sz) #define ELEM_CNT 62 -int test_hashmap_generic(void) +static void test_hashmap_generic(void) { struct hashmap_entry *entry, *tmp; int err, bkt, found_cnt, i; long long found_msk; struct hashmap *map; - fprintf(stderr, "%s: ", __func__); - map = hashmap__new(hash_fn, equal_fn, NULL); - if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map))) - return 1; + if (CHECK(IS_ERR(map), "hashmap__new", + "failed to create map: %ld\n", PTR_ERR(map))) + return; for (i = 0; i < ELEM_CNT; i++) { const void *oldk, *k = (const void *)(long)i; void *oldv, *v = (void *)(long)(1024 + i); err = hashmap__update(map, k, v, &oldk, &oldv); - if (CHECK(err != -ENOENT, "unexpected result: %d\n", err)) - return 1; + if (CHECK(err != -ENOENT, "hashmap__update", + "unexpected result: %d\n", err)) + goto cleanup; if (i % 2) { err = hashmap__add(map, k, v); } else { err = hashmap__set(map, k, v, &oldk, &oldv); - if (CHECK(oldk != NULL || oldv != NULL, + if (CHECK(oldk != NULL || oldv != NULL, "check_kv", "unexpected k/v: %p=%p\n", oldk, oldv)) - return 1; + goto cleanup; } - if (CHECK(err, "failed to add k/v %ld = %ld: %d\n", + if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n", (long)k, (long)v, err)) - return 1; + goto cleanup; - if (CHECK(!hashmap__find(map, k, &oldv), + if (CHECK(!hashmap__find(map, k, &oldv), "elem_find", "failed to find key %ld\n", (long)k)) - return 1; - if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv)) - return 1; + goto cleanup; + if (CHECK(oldv != v, "elem_val", + "found value is wrong: %ld\n", (long)oldv)) + goto cleanup; } - if (CHECK(hashmap__size(map) != ELEM_CNT, + if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size", "invalid map size: %zu\n", hashmap__size(map))) - return 1; + goto cleanup; if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)), + "hashmap_cap", "unexpected map capacity: %zu\n", hashmap__capacity(map))) - return 1; + goto cleanup; found_msk = 0; hashmap__for_each_entry(map, entry, bkt) { @@ -103,42 +96,47 @@ int test_hashmap_generic(void) long v = (long)entry->value; found_msk |= 1ULL << k; - if (CHECK(v - k != 1024, "invalid k/v pair: %ld = %ld\n", k, v)) - return 1; + if (CHECK(v - k != 1024, "check_kv", + "invalid k/v pair: %ld = %ld\n", k, v)) + goto cleanup; } - if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, + if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt", "not all keys iterated: %llx\n", found_msk)) - return 1; + goto cleanup; for (i = 0; i < ELEM_CNT; i++) { const void *oldk, *k = (const void *)(long)i; void *oldv, *v = (void *)(long)(256 + i); err = hashmap__add(map, k, v); - if (CHECK(err != -EEXIST, "unexpected add result: %d\n", err)) - return 1; + if (CHECK(err != -EEXIST, "hashmap__add", + "unexpected add result: %d\n", err)) + goto cleanup; if (i % 2) err = hashmap__update(map, k, v, &oldk, &oldv); else err = hashmap__set(map, k, v, &oldk, &oldv); - if (CHECK(err, "failed to update k/v %ld = %ld: %d\n", - (long)k, (long)v, err)) - return 1; - if (CHECK(!hashmap__find(map, k, &oldv), + if (CHECK(err, "elem_upd", + "failed to update k/v %ld = %ld: %d\n", + (long)k, (long)v, err)) + goto cleanup; + if (CHECK(!hashmap__find(map, k, &oldv), "elem_find", "failed to find key %ld\n", (long)k)) - return 1; - if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv)) - return 1; + goto cleanup; + if (CHECK(oldv != v, "elem_val", + "found value is wrong: %ld\n", (long)oldv)) + goto cleanup; } - if (CHECK(hashmap__size(map) != ELEM_CNT, + if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size", "invalid updated map size: %zu\n", hashmap__size(map))) - return 1; + goto cleanup; if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)), + "hashmap__capacity", "unexpected map capacity: %zu\n", hashmap__capacity(map))) - return 1; + goto cleanup; found_msk = 0; hashmap__for_each_entry_safe(map, entry, tmp, bkt) { @@ -146,20 +144,21 @@ int test_hashmap_generic(void) long v = (long)entry->value; found_msk |= 1ULL << k; - if (CHECK(v - k != 256, + if (CHECK(v - k != 256, "elem_check", "invalid updated k/v pair: %ld = %ld\n", k, v)) - return 1; + goto cleanup; } - if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, + if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt", "not all keys iterated after update: %llx\n", found_msk)) - return 1; + goto cleanup; found_cnt = 0; hashmap__for_each_key_entry(map, entry, (void *)0) { found_cnt++; } - if (CHECK(!found_cnt, "didn't find any entries for key 0\n")) - return 1; + if (CHECK(!found_cnt, "found_cnt", + "didn't find any entries for key 0\n")) + goto cleanup; found_msk = 0; found_cnt = 0; @@ -173,30 +172,31 @@ int test_hashmap_generic(void) found_cnt++; found_msk |= 1ULL << (long)k; - if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), + if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del", "failed to delete k/v %ld = %ld\n", (long)k, (long)v)) - return 1; - if (CHECK(oldk != k || oldv != v, + goto cleanup; + if (CHECK(oldk != k || oldv != v, "check_old", "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n", (long)k, (long)v, (long)oldk, (long)oldv)) - return 1; - if (CHECK(hashmap__delete(map, k, &oldk, &oldv), + goto cleanup; + if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del", "unexpectedly deleted k/v %ld = %ld\n", (long)oldk, (long)oldv)) - return 1; + goto cleanup; } - if (CHECK(!found_cnt || !found_msk, + if (CHECK(!found_cnt || !found_msk, "found_entries", "didn't delete any key entries\n")) - return 1; - if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt, + goto cleanup; + if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt, "elem_cnt", "invalid updated map size (already deleted: %d): %zu\n", found_cnt, hashmap__size(map))) - return 1; + goto cleanup; if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)), + "hashmap__capacity", "unexpected map capacity: %zu\n", hashmap__capacity(map))) - return 1; + goto cleanup; hashmap__for_each_entry_safe(map, entry, tmp, bkt) { const void *oldk, *k; @@ -208,53 +208,56 @@ int test_hashmap_generic(void) found_cnt++; found_msk |= 1ULL << (long)k; - if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), + if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del", "failed to delete k/v %ld = %ld\n", (long)k, (long)v)) - return 1; - if (CHECK(oldk != k || oldv != v, + goto cleanup; + if (CHECK(oldk != k || oldv != v, "elem_check", "invalid old k/v: expect %ld = %ld, got %ld = %ld\n", (long)k, (long)v, (long)oldk, (long)oldv)) - return 1; - if (CHECK(hashmap__delete(map, k, &oldk, &oldv), + goto cleanup; + if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del", "unexpectedly deleted k/v %ld = %ld\n", (long)k, (long)v)) - return 1; + goto cleanup; } if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1, + "found_cnt", "not all keys were deleted: found_cnt:%d, found_msk:%llx\n", found_cnt, found_msk)) - return 1; - if (CHECK(hashmap__size(map) != 0, + goto cleanup; + if (CHECK(hashmap__size(map) != 0, "hashmap__size", "invalid updated map size (already deleted: %d): %zu\n", found_cnt, hashmap__size(map))) - return 1; + goto cleanup; found_cnt = 0; hashmap__for_each_entry(map, entry, bkt) { - CHECK(false, "unexpected map entries left: %ld = %ld\n", - (long)entry->key, (long)entry->value); - return 1; + CHECK(false, "elem_exists", + "unexpected map entries left: %ld = %ld\n", + (long)entry->key, (long)entry->value); + goto cleanup; } - hashmap__free(map); + hashmap__clear(map); hashmap__for_each_entry(map, entry, bkt) { - CHECK(false, "unexpected map entries left: %ld = %ld\n", - (long)entry->key, (long)entry->value); - return 1; + CHECK(false, "elem_exists", + "unexpected map entries left: %ld = %ld\n", + (long)entry->key, (long)entry->value); + goto cleanup; } - fprintf(stderr, "OK\n"); - return 0; +cleanup: + hashmap__free(map); } -size_t collision_hash_fn(const void *k, void *ctx) +static size_t collision_hash_fn(const void *k, void *ctx) { return 0; } -int test_hashmap_multimap(void) +static void test_hashmap_multimap(void) { void *k1 = (void *)0, *k2 = (void *)1; struct hashmap_entry *entry; @@ -262,121 +265,116 @@ int test_hashmap_multimap(void) long found_msk; int err, bkt; - fprintf(stderr, "%s: ", __func__); - /* force collisions */ map = hashmap__new(collision_hash_fn, equal_fn, NULL); - if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map))) - return 1; - + if (CHECK(IS_ERR(map), "hashmap__new", + "failed to create map: %ld\n", PTR_ERR(map))) + return; /* set up multimap: * [0] -> 1, 2, 4; * [1] -> 8, 16, 32; */ err = hashmap__append(map, k1, (void *)1); - if (CHECK(err, "failed to add k/v: %d\n", err)) - return 1; + if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) + goto cleanup; err = hashmap__append(map, k1, (void *)2); - if (CHECK(err, "failed to add k/v: %d\n", err)) - return 1; + if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) + goto cleanup; err = hashmap__append(map, k1, (void *)4); - if (CHECK(err, "failed to add k/v: %d\n", err)) - return 1; + if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) + goto cleanup; err = hashmap__append(map, k2, (void *)8); - if (CHECK(err, "failed to add k/v: %d\n", err)) - return 1; + if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) + goto cleanup; err = hashmap__append(map, k2, (void *)16); - if (CHECK(err, "failed to add k/v: %d\n", err)) - return 1; + if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) + goto cleanup; err = hashmap__append(map, k2, (void *)32); - if (CHECK(err, "failed to add k/v: %d\n", err)) - return 1; + if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) + goto cleanup; - if (CHECK(hashmap__size(map) != 6, + if (CHECK(hashmap__size(map) != 6, "hashmap_size", "invalid map size: %zu\n", hashmap__size(map))) - return 1; + goto cleanup; /* verify global iteration still works and sees all values */ found_msk = 0; hashmap__for_each_entry(map, entry, bkt) { found_msk |= (long)entry->value; } - if (CHECK(found_msk != (1 << 6) - 1, + if (CHECK(found_msk != (1 << 6) - 1, "found_msk", "not all keys iterated: %lx\n", found_msk)) - return 1; + goto cleanup; /* iterate values for key 1 */ found_msk = 0; hashmap__for_each_key_entry(map, entry, k1) { found_msk |= (long)entry->value; } - if (CHECK(found_msk != (1 | 2 | 4), + if (CHECK(found_msk != (1 | 2 | 4), "found_msk", "invalid k1 values: %lx\n", found_msk)) - return 1; + goto cleanup; /* iterate values for key 2 */ found_msk = 0; hashmap__for_each_key_entry(map, entry, k2) { found_msk |= (long)entry->value; } - if (CHECK(found_msk != (8 | 16 | 32), + if (CHECK(found_msk != (8 | 16 | 32), "found_msk", "invalid k2 values: %lx\n", found_msk)) - return 1; + goto cleanup; - fprintf(stderr, "OK\n"); - return 0; +cleanup: + hashmap__free(map); } -int test_hashmap_empty() +static void test_hashmap_empty() { struct hashmap_entry *entry; int bkt; struct hashmap *map; void *k = (void *)0; - fprintf(stderr, "%s: ", __func__); - /* force collisions */ map = hashmap__new(hash_fn, equal_fn, NULL); - if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map))) - return 1; + if (CHECK(IS_ERR(map), "hashmap__new", + "failed to create map: %ld\n", PTR_ERR(map))) + goto cleanup; - if (CHECK(hashmap__size(map) != 0, + if (CHECK(hashmap__size(map) != 0, "hashmap__size", "invalid map size: %zu\n", hashmap__size(map))) - return 1; - if (CHECK(hashmap__capacity(map) != 0, + goto cleanup; + if (CHECK(hashmap__capacity(map) != 0, "hashmap__capacity", "invalid map capacity: %zu\n", hashmap__capacity(map))) - return 1; - if (CHECK(hashmap__find(map, k, NULL), "unexpected find\n")) - return 1; - if (CHECK(hashmap__delete(map, k, NULL, NULL), "unexpected delete\n")) - return 1; + goto cleanup; + if (CHECK(hashmap__find(map, k, NULL), "elem_find", + "unexpected find\n")) + goto cleanup; + if (CHECK(hashmap__delete(map, k, NULL, NULL), "elem_del", + "unexpected delete\n")) + goto cleanup; hashmap__for_each_entry(map, entry, bkt) { - CHECK(false, "unexpected iterated entry\n"); - return 1; + CHECK(false, "elem_found", "unexpected iterated entry\n"); + goto cleanup; } hashmap__for_each_key_entry(map, entry, k) { - CHECK(false, "unexpected key entry\n"); - return 1; + CHECK(false, "key_found", "unexpected key entry\n"); + goto cleanup; } - fprintf(stderr, "OK\n"); - return 0; +cleanup: + hashmap__free(map); } -int main(int argc, char **argv) +void test_hashmap() { - bool failed = false; - - if (test_hashmap_generic()) - failed = true; - if (test_hashmap_multimap()) - failed = true; - if (test_hashmap_empty()) - failed = true; - - return failed; + if (test__start_subtest("generic")) + test_hashmap_generic(); + if (test__start_subtest("multimap")) + test_hashmap_multimap(); + if (test__start_subtest("empty")) + test_hashmap_empty(); } diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index 7507c8f689bc..42c3a3103c26 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> struct meta { int ifindex; diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index eaf64595be88..c2d373e294bb 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> static void test_l4lb(const char *file) { diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c index 8f91f1881d11..ce17b1ed8709 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c @@ -1,5 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> + +static void *spin_lock_thread(void *arg) +{ + __u32 duration, retval; + int err, prog_fd = *(u32 *) arg; + + err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + pthread_exit(arg); +} static void *parallel_map_access(void *arg) { diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 542240e16564..e74dc501b27f 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -80,9 +80,6 @@ void test_ns_current_pid_tgid(void) "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) goto cleanup; cleanup: - if (!link) { - bpf_link__destroy(link); - link = NULL; - } + bpf_link__destroy(link); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 1450ea2dd4cc..a122ce3b360e 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -6,6 +6,11 @@ #include <test_progs.h> #include "bpf/libbpf_internal.h" +/* AddressSanitizer sometimes crashes due to data dereference below, due to + * this being mmap()'ed memory. Disable instrumentation with + * no_sanitize_address attribute + */ +__attribute__((no_sanitize_address)) static void on_sample(void *ctx, int cpu, void *data, __u32 size) { int cpu_data = *(int *)data, duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c index a2537dfa899c..44b514fabccd 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> void test_pkt_access(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c index 5f7aea605019..939015cd6dba 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> void test_pkt_md_access(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c index 5dd89b941f53..dde2b7ae7bc9 100644 --- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c +++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> void test_prog_run_xattr(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index faccc66f4e39..f47e7b1cb32c 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> enum { QUEUE, diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c new file mode 100644 index 000000000000..2bba908dfa63 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <linux/compiler.h> +#include <asm/barrier.h> +#include <test_progs.h> +#include <sys/mman.h> +#include <sys/epoll.h> +#include <time.h> +#include <sched.h> +#include <signal.h> +#include <pthread.h> +#include <sys/sysinfo.h> +#include <linux/perf_event.h> +#include <linux/ring_buffer.h> +#include "test_ringbuf.skel.h" + +#define EDONE 7777 + +static int duration = 0; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +static volatile int sample_cnt; + +static int process_sample(void *ctx, void *data, size_t len) +{ + struct sample *s = data; + + sample_cnt++; + + switch (s->seq) { + case 0: + CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n", + 333L, s->value); + return 0; + case 1: + CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n", + 777L, s->value); + return -EDONE; + default: + /* we don't care about the rest */ + return 0; + } +} + +static struct test_ringbuf *skel; +static struct ring_buffer *ringbuf; + +static void trigger_samples() +{ + skel->bss->dropped = 0; + skel->bss->total = 0; + skel->bss->discarded = 0; + + /* trigger exactly two samples */ + skel->bss->value = 333; + syscall(__NR_getpgid); + skel->bss->value = 777; + syscall(__NR_getpgid); +} + +static void *poll_thread(void *input) +{ + long timeout = (long)input; + + return (void *)(long)ring_buffer__poll(ringbuf, timeout); +} + +void test_ringbuf(void) +{ + const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); + pthread_t thread; + long bg_ret = -1; + int err; + + skel = test_ringbuf__open_and_load(); + if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n")) + return; + + /* only trigger BPF program for current process */ + skel->bss->pid = getpid(); + + ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), + process_sample, NULL, NULL); + if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) + goto cleanup; + + err = test_ringbuf__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err)) + goto cleanup; + + trigger_samples(); + + /* 2 submitted + 1 discarded records */ + CHECK(skel->bss->avail_data != 3 * rec_sz, + "err_avail_size", "exp %ld, got %ld\n", + 3L * rec_sz, skel->bss->avail_data); + CHECK(skel->bss->ring_size != 4096, + "err_ring_size", "exp %ld, got %ld\n", + 4096L, skel->bss->ring_size); + CHECK(skel->bss->cons_pos != 0, + "err_cons_pos", "exp %ld, got %ld\n", + 0L, skel->bss->cons_pos); + CHECK(skel->bss->prod_pos != 3 * rec_sz, + "err_prod_pos", "exp %ld, got %ld\n", + 3L * rec_sz, skel->bss->prod_pos); + + /* poll for samples */ + err = ring_buffer__poll(ringbuf, -1); + + /* -EDONE is used as an indicator that we are done */ + if (CHECK(err != -EDONE, "err_done", "done err: %d\n", err)) + goto cleanup; + + /* we expect extra polling to return nothing */ + err = ring_buffer__poll(ringbuf, 0); + if (CHECK(err != 0, "extra_samples", "poll result: %d\n", err)) + goto cleanup; + + CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n", + 0L, skel->bss->dropped); + CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n", + 2L, skel->bss->total); + CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n", + 1L, skel->bss->discarded); + + /* now validate consumer position is updated and returned */ + trigger_samples(); + CHECK(skel->bss->cons_pos != 3 * rec_sz, + "err_cons_pos", "exp %ld, got %ld\n", + 3L * rec_sz, skel->bss->cons_pos); + err = ring_buffer__poll(ringbuf, -1); + CHECK(err <= 0, "poll_err", "err %d\n", err); + + /* start poll in background w/ long timeout */ + err = pthread_create(&thread, NULL, poll_thread, (void *)(long)10000); + if (CHECK(err, "bg_poll", "pthread_create failed: %d\n", err)) + goto cleanup; + + /* turn off notifications now */ + skel->bss->flags = BPF_RB_NO_WAKEUP; + + /* give background thread a bit of a time */ + usleep(50000); + trigger_samples(); + /* sleeping arbitrarily is bad, but no better way to know that + * epoll_wait() **DID NOT** unblock in background thread + */ + usleep(50000); + /* background poll should still be blocked */ + err = pthread_tryjoin_np(thread, (void **)&bg_ret); + if (CHECK(err != EBUSY, "try_join", "err %d\n", err)) + goto cleanup; + + /* BPF side did everything right */ + CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n", + 0L, skel->bss->dropped); + CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n", + 2L, skel->bss->total); + CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n", + 1L, skel->bss->discarded); + + /* clear flags to return to "adaptive" notification mode */ + skel->bss->flags = 0; + + /* produce new samples, no notification should be triggered, because + * consumer is now behind + */ + trigger_samples(); + + /* background poll should still be blocked */ + err = pthread_tryjoin_np(thread, (void **)&bg_ret); + if (CHECK(err != EBUSY, "try_join", "err %d\n", err)) + goto cleanup; + + /* now force notifications */ + skel->bss->flags = BPF_RB_FORCE_WAKEUP; + sample_cnt = 0; + trigger_samples(); + + /* now we should get a pending notification */ + usleep(50000); + err = pthread_tryjoin_np(thread, (void **)&bg_ret); + if (CHECK(err, "join_bg", "err %d\n", err)) + goto cleanup; + + if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret)) + goto cleanup; + + /* 3 rounds, 2 samples each */ + CHECK(sample_cnt != 6, "wrong_sample_cnt", + "expected to see %d samples, got %d\n", 6, sample_cnt); + + /* BPF side did everything right */ + CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n", + 0L, skel->bss->dropped); + CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n", + 2L, skel->bss->total); + CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n", + 1L, skel->bss->discarded); + + test_ringbuf__detach(skel); +cleanup: + ring_buffer__free(ringbuf); + test_ringbuf__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c new file mode 100644 index 000000000000..78e450609803 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <test_progs.h> +#include <sys/epoll.h> +#include "test_ringbuf_multi.skel.h" + +static int duration = 0; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +static int process_sample(void *ctx, void *data, size_t len) +{ + int ring = (unsigned long)ctx; + struct sample *s = data; + + switch (s->seq) { + case 0: + CHECK(ring != 1, "sample1_ring", "exp %d, got %d\n", 1, ring); + CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n", + 333L, s->value); + break; + case 1: + CHECK(ring != 2, "sample2_ring", "exp %d, got %d\n", 2, ring); + CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n", + 777L, s->value); + break; + default: + CHECK(true, "extra_sample", "unexpected sample seq %d, val %ld\n", + s->seq, s->value); + return -1; + } + + return 0; +} + +void test_ringbuf_multi(void) +{ + struct test_ringbuf_multi *skel; + struct ring_buffer *ringbuf; + int err; + + skel = test_ringbuf_multi__open_and_load(); + if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n")) + return; + + /* only trigger BPF program for current process */ + skel->bss->pid = getpid(); + + ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf1), + process_sample, (void *)(long)1, NULL); + if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) + goto cleanup; + + err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2), + process_sample, (void *)(long)2); + if (CHECK(err, "ringbuf_add", "failed to add another ring\n")) + goto cleanup; + + err = test_ringbuf_multi__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err)) + goto cleanup; + + /* trigger few samples, some will be skipped */ + skel->bss->target_ring = 0; + skel->bss->value = 333; + syscall(__NR_getpgid); + + /* skipped, no ringbuf in slot 1 */ + skel->bss->target_ring = 1; + skel->bss->value = 555; + syscall(__NR_getpgid); + + skel->bss->target_ring = 2; + skel->bss->value = 777; + syscall(__NR_getpgid); + + /* poll for samples, should get 2 ringbufs back */ + err = ring_buffer__poll(ringbuf, -1); + if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err)) + goto cleanup; + + /* expect extra polling to return nothing */ + err = ring_buffer__poll(ringbuf, 0); + if (CHECK(err < 0, "extra_samples", "poll result: %d\n", err)) + goto cleanup; + + CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n", + 0L, skel->bss->dropped); + CHECK(skel->bss->skipped != 1, "err_skipped", "exp %ld, got %ld\n", + 1L, skel->bss->skipped); + CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n", + 2L, skel->bss->total); + +cleanup: + ring_buffer__free(ringbuf); + test_ringbuf_multi__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c index 996e808f43a2..dfcbddcbe4d3 100644 --- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c +++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> static void sigalrm_handler(int s) {} static struct sigaction sigalrm_action = { diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index d572e1a2c297..47fa04adc147 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -20,6 +20,7 @@ #define CONNECT_PORT 4321 #define TEST_DADDR (0xC0A80203) #define NS_SELF "/proc/self/ns/net" +#define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map" static const struct timeval timeo_sec = { .tv_sec = 3 }; static const size_t timeo_optlen = sizeof(timeo_sec); @@ -265,6 +266,7 @@ void test_sk_assign(void) TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true), }; int server = -1; + int server_map; int self_net; self_net = open(NS_SELF, O_RDONLY); @@ -278,9 +280,17 @@ void test_sk_assign(void) goto cleanup; } + server_map = bpf_obj_get(SERVER_MAP_PATH); + if (CHECK_FAIL(server_map < 0)) { + perror("Unable to open " SERVER_MAP_PATH); + goto cleanup; + } + for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) { struct test_sk_cfg *test = &tests[i]; const struct sockaddr *addr; + const int zero = 0; + int err; if (!test__start_subtest(test->name)) continue; @@ -288,7 +298,13 @@ void test_sk_assign(void) addr = (const struct sockaddr *)test->addr; server = start_server(addr, test->len, test->type); if (server == -1) - goto cleanup; + goto close; + + err = bpf_map_update_elem(server_map, &zero, &server, BPF_ANY); + if (CHECK_FAIL(err)) { + perror("Unable to update server_map"); + goto close; + } /* connect to unbound ports */ prepare_addr(test->addr, test->family, CONNECT_PORT, @@ -302,7 +318,10 @@ void test_sk_assign(void) close: close(server); + close(server_map); cleanup: + if (CHECK_FAIL(unlink(SERVER_MAP_PATH))) + perror("Unable to unlink " SERVER_MAP_PATH); if (CHECK_FAIL(setns(self_net, CLONE_NEWNET))) perror("Failed to setns("NS_SELF")"); close(self_net); diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index 4538bd08203f..7021b92af313 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> void test_skb_ctx(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c new file mode 100644 index 000000000000..f302ad84a298 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> + +void test_skb_helpers(void) +{ + struct __sk_buff skb = { + .wire_len = 100, + .gso_segs = 8, + .gso_size = 10, + }; + struct bpf_prog_test_run_attr tattr = { + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + .ctx_out = &skb, + .ctx_size_out = sizeof(skb), + }; + struct bpf_object *obj; + int err; + + err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno); + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index aa43e0bd210c..96e7b7f84c65 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Cloudflare +#include <error.h> #include "test_progs.h" +#include "test_skmsg_load_helpers.skel.h" #define TCP_REPAIR 19 /* TCP sock is under repair right now */ @@ -70,10 +72,43 @@ out: close(s); } +static void test_skmsg_helpers(enum bpf_map_type map_type) +{ + struct test_skmsg_load_helpers *skel; + int err, map, verdict; + + skel = test_skmsg_load_helpers__open_and_load(); + if (CHECK_FAIL(!skel)) { + perror("test_skmsg_load_helpers__open_and_load"); + return; + } + + verdict = bpf_program__fd(skel->progs.prog_msg_verdict); + map = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0); + if (CHECK_FAIL(err)) { + perror("bpf_prog_attach"); + goto out; + } + + err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT); + if (CHECK_FAIL(err)) { + perror("bpf_prog_detach2"); + goto out; + } +out: + test_skmsg_load_helpers__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash create_update_free")) test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap sk_msg load helpers")) + test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash sk_msg load helpers")) + test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH); } diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c index 1ae00cd3174e..7577a77a4c4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c @@ -1,5 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> + +static void *spin_lock_thread(void *arg) +{ + __u32 duration, retval; + int err, prog_fd = *(u32 *) arg; + + err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + pthread_exit(arg); +} void test_spinlock(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index e56b52ab41da..9013a0c01eed 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include "cgroup_helpers.h" +#include "network_helpers.h" struct tcp_rtt_storage { __u32 invoked; @@ -87,34 +88,6 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, return err; } -static int connect_to_server(int server_fd) -{ - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int fd; - - fd = socket(AF_INET, SOCK_STREAM, 0); - if (fd < 0) { - log_err("Failed to create client socket"); - return -1; - } - - if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { - log_err("Failed to get server addr"); - goto out; - } - - if (connect(fd, (const struct sockaddr *)&addr, len) < 0) { - log_err("Fail to connect to server"); - goto out; - } - - return fd; - -out: - close(fd); - return -1; -} static int run_test(int cgroup_fd, int server_fd) { @@ -145,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd) goto close_bpf_object; } - client_fd = connect_to_server(server_fd); + client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd); if (client_fd < 0) { err = -1; goto close_bpf_object; @@ -180,103 +153,22 @@ close_bpf_object: return err; } -static int start_server(void) -{ - struct sockaddr_in addr = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; - int fd; - - fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); - if (fd < 0) { - log_err("Failed to create server socket"); - return -1; - } - - if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { - log_err("Failed to bind socket"); - close(fd); - return -1; - } - - return fd; -} - -static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER; -static volatile bool server_done = false; - -static void *server_thread(void *arg) -{ - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int fd = *(int *)arg; - int client_fd; - int err; - - err = listen(fd, 1); - - pthread_mutex_lock(&server_started_mtx); - pthread_cond_signal(&server_started); - pthread_mutex_unlock(&server_started_mtx); - - if (CHECK_FAIL(err < 0)) { - perror("Failed to listed on socket"); - return ERR_PTR(err); - } - - while (true) { - client_fd = accept(fd, (struct sockaddr *)&addr, &len); - if (client_fd == -1 && errno == EAGAIN) { - usleep(50); - continue; - } - break; - } - if (CHECK_FAIL(client_fd < 0)) { - perror("Failed to accept client"); - return ERR_PTR(err); - } - - while (!server_done) - usleep(50); - - close(client_fd); - - return NULL; -} - void test_tcp_rtt(void) { int server_fd, cgroup_fd; - pthread_t tid; - void *server_res; cgroup_fd = test__join_cgroup("/tcp_rtt"); if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server(); + server_fd = start_server(AF_INET, SOCK_STREAM); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; - if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, - (void *)&server_fd))) - goto close_server_fd; - - pthread_mutex_lock(&server_started_mtx); - pthread_cond_wait(&server_started, &server_started_mtx); - pthread_mutex_unlock(&server_started_mtx); - CHECK_FAIL(run_test(cgroup_fd, server_fd)); - server_done = true; - CHECK_FAIL(pthread_join(tid, &server_res)); - CHECK_FAIL(IS_ERR(server_res)); - -close_server_fd: close(server_fd); + close_cgroup_fd: close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c index 465b371a561d..2702df2b2343 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -61,9 +61,10 @@ void test_test_overhead(void) const char *raw_tp_name = "raw_tp/task_rename"; const char *fentry_name = "fentry/__set_task_comm"; const char *fexit_name = "fexit/__set_task_comm"; + const char *fmodret_name = "fmod_ret/__set_task_comm"; const char *kprobe_func = "__set_task_comm"; struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog; - struct bpf_program *fentry_prog, *fexit_prog; + struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog; struct bpf_object *obj; struct bpf_link *link; int err, duration = 0; @@ -96,6 +97,10 @@ void test_test_overhead(void) if (CHECK(!fexit_prog, "find_probe", "prog '%s' not found\n", fexit_name)) goto cleanup; + fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name); + if (CHECK(!fmodret_prog, "find_probe", + "prog '%s' not found\n", fmodret_name)) + goto cleanup; err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) @@ -142,6 +147,13 @@ void test_test_overhead(void) goto cleanup; test_run("fexit"); bpf_link__destroy(link); + + /* attach fmod_ret */ + link = bpf_program__attach_trace(fmodret_prog); + if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link))) + goto cleanup; + test_run("fmod_ret"); + bpf_link__destroy(link); cleanup: prctl(PR_SET_NAME, comm, 0L, 0L, 0L); bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c index dcb5ecac778e..48921ff74850 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> void test_xdp(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index 3744196d7cba..d5c98f2cb12f 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -1,13 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> -void test_xdp_adjust_tail(void) +void test_xdp_adjust_tail_shrink(void) { - const char *file = "./test_adjust_tail.o"; + const char *file = "./test_xdp_adjust_tail_shrink.o"; + __u32 duration, retval, size, expect_sz; struct bpf_object *obj; - char buf[128]; - __u32 duration, retval, size; int err, prog_fd; + char buf[128]; err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) @@ -20,10 +21,121 @@ void test_xdp_adjust_tail(void) "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); + expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || retval != XDP_TX || size != 54, - "ipv6", "err %d errno %d retval %d size %d\n", + CHECK(err || retval != XDP_TX || size != expect_sz, + "ipv6", "err %d errno %d retval %d size %d expect-size %d\n", + err, errno, retval, size, expect_sz); + bpf_object__close(obj); +} + +void test_xdp_adjust_tail_grow(void) +{ + const char *file = "./test_xdp_adjust_tail_grow.o"; + struct bpf_object *obj; + char buf[4096]; /* avoid segfault: large buf to hold grow results */ + __u32 duration, retval, size, expect_sz; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || retval != XDP_DROP, + "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); + + expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */ + err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */, + buf, &size, &retval, &duration); + CHECK(err || retval != XDP_TX || size != expect_sz, + "ipv6", "err %d errno %d retval %d size %d expect-size %d\n", + err, errno, retval, size, expect_sz); + + bpf_object__close(obj); +} + +void test_xdp_adjust_tail_grow2(void) +{ + const char *file = "./test_xdp_adjust_tail_grow.o"; + char buf[4096]; /* avoid segfault: large buf to hold grow results */ + int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/; + struct bpf_object *obj; + int err, cnt, i; + int max_grow; + + struct bpf_prog_test_run_attr tattr = { + .repeat = 1, + .data_in = &buf, + .data_out = &buf, + .data_size_in = 0, /* Per test */ + .data_size_out = 0, /* Per test */ + }; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; + + /* Test case-64 */ + memset(buf, 1, sizeof(buf)); + tattr.data_size_in = 64; /* Determine test case via pkt size */ + tattr.data_size_out = 128; /* Limit copy_size */ + /* Kernel side alloc packet memory area that is zero init */ + err = bpf_prog_test_run_xattr(&tattr); + + CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */ + || tattr.retval != XDP_TX + || tattr.data_size_out != 192, /* Expected grow size */ + "case-64", + "err %d errno %d retval %d size %d\n", + err, errno, tattr.retval, tattr.data_size_out); + + /* Extra checks for data contents */ + CHECK_ATTR(tattr.data_size_out != 192 + || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */ + || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */ + || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */ + "case-64-data", + "err %d errno %d retval %d size %d\n", + err, errno, tattr.retval, tattr.data_size_out); + + /* Test case-128 */ + memset(buf, 2, sizeof(buf)); + tattr.data_size_in = 128; /* Determine test case via pkt size */ + tattr.data_size_out = sizeof(buf); /* Copy everything */ + err = bpf_prog_test_run_xattr(&tattr); + + max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */ + CHECK_ATTR(err + || tattr.retval != XDP_TX + || tattr.data_size_out != max_grow,/* Expect max grow size */ + "case-128", + "err %d errno %d retval %d size %d expect-size %d\n", + err, errno, tattr.retval, tattr.data_size_out, max_grow); + + /* Extra checks for data content: Count grow size, will contain zeros */ + for (i = 0, cnt = 0; i < sizeof(buf); i++) { + if (buf[i] == 0) + cnt++; + } + CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */ + || tattr.data_size_out != max_grow, /* Total grow size */ + "case-128-data", + "err %d errno %d retval %d size %d grow-size %d\n", + err, errno, tattr.retval, tattr.data_size_out, cnt); + bpf_object__close(obj); } + +void test_xdp_adjust_tail(void) +{ + if (test__start_subtest("xdp_adjust_tail_shrink")) + test_xdp_adjust_tail_shrink(); + if (test__start_subtest("xdp_adjust_tail_grow")) + test_xdp_adjust_tail_grow(); + if (test__start_subtest("xdp_adjust_tail_grow2")) + test_xdp_adjust_tail_grow2(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index a0f688c37023..2c6c570b21f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> #include <net/if.h> #include "test_xdp.skel.h" #include "test_xdp_bpf2bpf.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c new file mode 100644 index 000000000000..d19dbd668f6a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <uapi/linux/bpf.h> +#include <linux/if_link.h> +#include <test_progs.h> + +#include "test_xdp_devmap_helpers.skel.h" +#include "test_xdp_with_devmap_helpers.skel.h" + +#define IFINDEX_LO 1 + +struct bpf_devmap_val { + u32 ifindex; /* device index */ + union { + int fd; /* prog fd on map write */ + u32 id; /* prog id on map read */ + } bpf_prog; +}; + +void test_xdp_with_devmap_helpers(void) +{ + struct test_xdp_with_devmap_helpers *skel; + struct bpf_prog_info info = {}; + struct bpf_devmap_val val = { + .ifindex = IFINDEX_LO, + }; + __u32 len = sizeof(info); + __u32 duration = 0, idx = 0; + int err, dm_fd, map_fd; + + + skel = test_xdp_with_devmap_helpers__open_and_load(); + if (CHECK_FAIL(!skel)) { + perror("test_xdp_with_devmap_helpers__open_and_load"); + return; + } + + /* can not attach program with DEVMAPs that allow programs + * as xdp generic + */ + dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog); + err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); + CHECK(err == 0, "Generic attach of program with 8-byte devmap", + "should have failed\n"); + + dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); + map_fd = bpf_map__fd(skel->maps.dm_ports); + err = bpf_obj_get_info_by_fd(dm_fd, &info, &len); + if (CHECK_FAIL(err)) + goto out_close; + + val.bpf_prog.fd = dm_fd; + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + CHECK(err, "Add program to devmap entry", + "err %d errno %d\n", err, errno); + + err = bpf_map_lookup_elem(map_fd, &idx, &val); + CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno); + CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry", + "expected %u read %u\n", info.id, val.bpf_prog.id); + + /* can not attach BPF_XDP_DEVMAP program to a device */ + err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); + CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program", + "should have failed\n"); + + val.ifindex = 1; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry", + "should have failed\n"); + +out_close: + test_xdp_with_devmap_helpers__destroy(skel); +} + +void test_neg_xdp_devmap_helpers(void) +{ + struct test_xdp_devmap_helpers *skel; + __u32 duration = 0; + + skel = test_xdp_devmap_helpers__open_and_load(); + if (CHECK(skel, + "Load of XDP program accessing egress ifindex without attach type", + "should have failed\n")) { + test_xdp_devmap_helpers__destroy(skel); + } +} + + +void test_xdp_devmap_attach(void) +{ + if (test__start_subtest("DEVMAP with programs in entries")) + test_xdp_with_devmap_helpers(); + + if (test__start_subtest("Verifier check of DEVMAP programs")) + test_neg_xdp_devmap_helpers(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c index c9404e6b226e..f284f72158ef 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> void test_xdp_noinline(void) { diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 9941f0ba471e..de6de9221518 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -20,20 +20,20 @@ #include <bpf/bpf_endian.h> int _version SEC("version") = 1; -#define PROG(F) SEC(#F) int bpf_func_##F +#define PROG(F) PROG_(F, _##F) +#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME /* These are the identifiers of the BPF programs that will be used in tail * calls. Name is limited to 16 characters, with the terminating character and * bpf_func_ above, we have only 6 to work with, anything after will be cropped. */ -enum { - IP, - IPV6, - IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ - IPV6FR, /* Fragmentation IPv6 Extension Header */ - MPLS, - VLAN, -}; +#define IP 0 +#define IPV6 1 +#define IPV6OP 2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */ +#define IPV6FR 3 /* Fragmentation IPv6 Extension Header */ +#define MPLS 4 +#define VLAN 5 +#define MAX_PROG 6 #define IP_MF 0x2000 #define IP_OFFSET 0x1FFF @@ -59,7 +59,7 @@ struct frag_hdr { struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); - __uint(max_entries, 8); + __uint(max_entries, MAX_PROG); __uint(key_size, sizeof(__u32)); __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c new file mode 100644 index 000000000000..b57bd6fef208 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__bpf_map +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__bpf_map { + struct bpf_iter_meta *meta; + struct bpf_map *map; +} __attribute__((preserve_access_index)); + +SEC("iter/bpf_map") +int dump_bpf_map(struct bpf_iter__bpf_map *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + __u64 seq_num = ctx->meta->seq_num; + struct bpf_map *map = ctx->map; + + if (map == (void *)0) { + BPF_SEQ_PRINTF(seq, " %%%%%% END %%%%%%\n"); + return 0; + } + + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, " id refcnt usercnt locked_vm\n"); + + BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter, + map->usercnt.counter, + map->memory.user->locked_vm.counter); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c new file mode 100644 index 000000000000..c8e9ca74c87b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__ipv6_route +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__ipv6_route { + struct bpf_iter_meta *meta; + struct fib6_info *rt; +} __attribute__((preserve_access_index)); + +char _license[] SEC("license") = "GPL"; + +extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; + +#define RTF_GATEWAY 0x0002 +#define IFNAMSIZ 16 +#define fib_nh_gw_family nh_common.nhc_gw_family +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 +#define fib_nh_dev nh_common.nhc_dev + +SEC("iter/ipv6_route") +int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct fib6_info *rt = ctx->rt; + const struct net_device *dev; + struct fib6_nh *fib6_nh; + unsigned int flags; + struct nexthop *nh; + + if (rt == (void *)0) + return 0; + + fib6_nh = &rt->fib6_nh[0]; + flags = rt->fib6_flags; + + /* FIXME: nexthop_is_multipath is not handled here. */ + nh = rt->nh; + if (rt->nh) + fib6_nh = &nh->nh_info->fib6_nh; + + BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); + + if (CONFIG_IPV6_SUBTREES) + BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_src.addr, + rt->fib6_src.plen); + else + BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 00 "); + + if (fib6_nh->fib_nh_gw_family) { + flags |= RTF_GATEWAY; + BPF_SEQ_PRINTF(seq, "%pi6 ", &fib6_nh->fib_nh_gw6); + } else { + BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 "); + } + + dev = fib6_nh->fib_nh_dev; + if (dev) + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, + rt->fib6_ref.refs.counter, 0, flags, dev->name); + else + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x\n", rt->fib6_metric, + rt->fib6_ref.refs.counter, 0, flags); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c new file mode 100644 index 000000000000..e7b8753eac0b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__netlink bpf_iter__netlink___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__netlink +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#define sk_rmem_alloc sk_backlog.rmem_alloc +#define sk_refcnt __sk_common.skc_refcnt + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__netlink { + struct bpf_iter_meta *meta; + struct netlink_sock *sk; +} __attribute__((preserve_access_index)); + +static inline struct inode *SOCK_INODE(struct socket *socket) +{ + return &container_of(socket, struct socket_alloc, socket)->vfs_inode; +} + +SEC("iter/netlink") +int dump_netlink(struct bpf_iter__netlink *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct netlink_sock *nlk = ctx->sk; + unsigned long group, ino; + struct inode *inode; + struct socket *sk; + struct sock *s; + + if (nlk == (void *)0) + return 0; + + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, "sk Eth Pid Groups " + "Rmem Wmem Dump Locks Drops " + "Inode\n"); + + s = &nlk->sk; + BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol); + + if (!nlk->groups) { + group = 0; + } else { + /* FIXME: temporary use bpf_probe_read here, needs + * verifier support to do direct access. + */ + bpf_probe_read(&group, sizeof(group), &nlk->groups[0]); + } + BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ", + nlk->portid, (u32)group, + s->sk_rmem_alloc.counter, + s->sk_wmem_alloc.refs.counter - 1, + nlk->cb_running, s->sk_refcnt.refs.counter); + + sk = s->sk_socket; + if (!sk) { + ino = 0; + } else { + /* FIXME: container_of inside SOCK_INODE has a forced + * type conversion, and direct access cannot be used + * with current verifier. + */ + inode = SOCK_INODE(sk); + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + } + BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c new file mode 100644 index 000000000000..ee754021f98e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__task bpf_iter__task___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__task +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task { + struct bpf_iter_meta *meta; + struct task_struct *task; +} __attribute__((preserve_access_index)); + +SEC("iter/task") +int dump_task(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + + if (task == (void *)0) { + BPF_SEQ_PRINTF(seq, " === END ===\n"); + return 0; + } + + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, " tgid gid\n"); + + BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c new file mode 100644 index 000000000000..0f0ec3db20ba --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__task_file bpf_iter__task_file___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__task_file +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task_file { + struct bpf_iter_meta *meta; + struct task_struct *task; + __u32 fd; + struct file *file; +} __attribute__((preserve_access_index)); + +SEC("iter/task_file") +int dump_task_file(struct bpf_iter__task_file *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + __u32 fd = ctx->fd; + struct file *file = ctx->file; + + if (task == (void *)0 || file == (void *)0) + return 0; + + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, " tgid gid fd file\n"); + + BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd, + (long)file->f_op); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c new file mode 100644 index 000000000000..c71a7c283108 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define START_CHAR 'a' +#include "bpf_iter_test_kern_common.h" diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c new file mode 100644 index 000000000000..8bdc8dc07444 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define START_CHAR 'A' +#include "bpf_iter_test_kern_common.h" diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c new file mode 100644 index 000000000000..13c2c90c835f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__task bpf_iter__task___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__task +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task { + struct bpf_iter_meta *meta; + struct task_struct *task; +} __attribute__((preserve_access_index)); + +SEC("iter/task") +int dump_task(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + int tgid; + + tgid = task->tgid; + bpf_seq_write(seq, &tgid, sizeof(tgid)); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c new file mode 100644 index 000000000000..0aa71b333cf3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__bpf_map +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__bpf_map { + struct bpf_iter_meta *meta; + struct bpf_map *map; +} __attribute__((preserve_access_index)); + +__u32 map1_id = 0, map2_id = 0; +__u32 map1_accessed = 0, map2_accessed = 0; +__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0; + +static volatile const __u32 print_len; +static volatile const __u32 ret1; + +SEC("iter/bpf_map") +int dump_bpf_map(struct bpf_iter__bpf_map *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct bpf_map *map = ctx->map; + __u64 seq_num; + int i, ret = 0; + + if (map == (void *)0) + return 0; + + /* only dump map1_id and map2_id */ + if (map->id != map1_id && map->id != map2_id) + return 0; + + seq_num = ctx->meta->seq_num; + if (map->id == map1_id) { + map1_seqnum = seq_num; + map1_accessed++; + } + + if (map->id == map2_id) { + if (map2_accessed == 0) { + map2_seqnum1 = seq_num; + if (ret1) + ret = 1; + } else { + map2_seqnum2 = seq_num; + } + map2_accessed++; + } + + /* fill seq_file buffer */ + for (i = 0; i < print_len; i++) + bpf_seq_write(seq, &seq_num, sizeof(seq_num)); + + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h new file mode 100644 index 000000000000..dee1339e6905 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__task bpf_iter__task___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__task +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; +int count = 0; + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task { + struct bpf_iter_meta *meta; + struct task_struct *task; +} __attribute__((preserve_access_index)); + +SEC("iter/task") +int dump_task(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + char c; + + if (count < 4) { + c = START_CHAR + count; + bpf_seq_write(seq, &c, sizeof(c)); + count++; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c new file mode 100644 index 000000000000..3f757e30d7a0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> + +#include <sys/types.h> +#include <sys/socket.h> + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; + +__u16 g_serv_port = 0; + +static inline void set_ip(__u32 *dst, const struct in6_addr *src) +{ + dst[0] = src->in6_u.u6_addr32[0]; + dst[1] = src->in6_u.u6_addr32[1]; + dst[2] = src->in6_u.u6_addr32[2]; + dst[3] = src->in6_u.u6_addr32[3]; +} + +static inline void set_tuple(struct bpf_sock_tuple *tuple, + const struct ipv6hdr *ip6h, + const struct tcphdr *tcph) +{ + set_ip(tuple->ipv6.saddr, &ip6h->daddr); + set_ip(tuple->ipv6.daddr, &ip6h->saddr); + tuple->ipv6.sport = tcph->dest; + tuple->ipv6.dport = tcph->source; +} + +static inline int is_allowed_peer_cg(struct __sk_buff *skb, + const struct ipv6hdr *ip6h, + const struct tcphdr *tcph) +{ + __u64 cgid, acgid, peer_cgid, peer_acgid; + struct bpf_sock_tuple tuple; + size_t tuple_len = sizeof(tuple.ipv6); + struct bpf_sock *peer_sk; + + set_tuple(&tuple, ip6h, tcph); + + peer_sk = bpf_sk_lookup_tcp(skb, &tuple, tuple_len, + BPF_F_CURRENT_NETNS, 0); + if (!peer_sk) + return 0; + + cgid = bpf_skb_cgroup_id(skb); + peer_cgid = bpf_sk_cgroup_id(peer_sk); + + acgid = bpf_skb_ancestor_cgroup_id(skb, 2); + peer_acgid = bpf_sk_ancestor_cgroup_id(peer_sk, 2); + + bpf_sk_release(peer_sk); + + return cgid && cgid == peer_cgid && acgid && acgid == peer_acgid; +} + +SEC("cgroup_skb/ingress") +int ingress_lookup(struct __sk_buff *skb) +{ + __u32 serv_port_key = 0; + struct ipv6hdr ip6h; + struct tcphdr tcph; + + if (skb->protocol != bpf_htons(ETH_P_IPV6)) + return 1; + + /* For SYN packets coming to listening socket skb->remote_port will be + * zero, so IPv6/TCP headers are loaded to identify remote peer + * instead. + */ + if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h))) + return 1; + + if (ip6h.nexthdr != IPPROTO_TCP) + return 1; + + if (bpf_skb_load_bytes(skb, sizeof(ip6h), &tcph, sizeof(tcph))) + return 1; + + if (!g_serv_port) + return 0; + + if (tcph.dest != g_serv_port) + return 1; + + return is_allowed_peer_cg(skb, &ip6h, &tcph); +} diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index ad3c498a8150..1ab2c5eba86c 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -8,6 +8,9 @@ #include <linux/in.h> #include <linux/in6.h> #include <sys/socket.h> +#include <netinet/tcp.h> +#include <linux/if.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> @@ -16,6 +19,14 @@ #define DST_REWRITE_IP4 0x7f000001U #define DST_REWRITE_PORT4 4444 +#ifndef TCP_CA_NAME_MAX +#define TCP_CA_NAME_MAX 16 +#endif + +#ifndef IFNAMSIZ +#define IFNAMSIZ 16 +#endif + int _version SEC("version") = 1; __attribute__ ((noinline)) @@ -33,6 +44,66 @@ int do_bind(struct bpf_sock_addr *ctx) return 1; } +static __inline int verify_cc(struct bpf_sock_addr *ctx, + char expected[TCP_CA_NAME_MAX]) +{ + char buf[TCP_CA_NAME_MAX]; + int i; + + if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf))) + return 1; + + for (i = 0; i < TCP_CA_NAME_MAX; i++) { + if (buf[i] != expected[i]) + return 1; + if (buf[i] == 0) + break; + } + + return 0; +} + +static __inline int set_cc(struct bpf_sock_addr *ctx) +{ + char reno[TCP_CA_NAME_MAX] = "reno"; + char cubic[TCP_CA_NAME_MAX] = "cubic"; + + if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno))) + return 1; + if (verify_cc(ctx, reno)) + return 1; + + if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic))) + return 1; + if (verify_cc(ctx, cubic)) + return 1; + + return 0; +} + +static __inline int bind_to_device(struct bpf_sock_addr *ctx) +{ + char veth1[IFNAMSIZ] = "test_sock_addr1"; + char veth2[IFNAMSIZ] = "test_sock_addr2"; + char missing[IFNAMSIZ] = "nonexistent_dev"; + char del_bind[IFNAMSIZ] = ""; + + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &veth1, sizeof(veth1))) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &veth2, sizeof(veth2))) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &missing, sizeof(missing)) != -ENODEV) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &del_bind, sizeof(del_bind))) + return 1; + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -46,6 +117,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4); tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4); + /* Bind to device and unbind it. */ + if (bind_to_device(ctx)) + return 0; + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) @@ -66,6 +141,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) bpf_sk_release(sk); + /* Rewrite congestion control. */ + if (ctx->type == SOCK_STREAM && set_cc(ctx)) + return 0; + /* Rewrite destination. */ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); ctx->user_port = bpf_htons(DST_REWRITE_PORT4); diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c new file mode 100644 index 000000000000..7396308677a3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include <stdbool.h> + +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <sys/socket.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; + +struct svc_addr { + __be32 addr; + __be16 port; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct svc_addr); +} service_mapping SEC(".maps"); + +SEC("cgroup/connect4") +int connect4(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in sa = {}; + struct svc_addr *orig; + + /* Force local address to 127.0.0.1:22222. */ + sa.sin_family = AF_INET; + sa.sin_port = bpf_htons(22222); + sa.sin_addr.s_addr = bpf_htonl(0x7f000001); + + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; + + /* Rewire service 1.2.3.4:60000 to backend 127.0.0.1:60123. */ + if (ctx->user_port == bpf_htons(60000)) { + orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!orig) + return 0; + + orig->addr = ctx->user_ip4; + orig->port = ctx->user_port; + + ctx->user_ip4 = bpf_htonl(0x7f000001); + ctx->user_port = bpf_htons(60123); + } + return 1; +} + +SEC("cgroup/getsockname4") +int getsockname4(struct bpf_sock_addr *ctx) +{ + /* Expose local server as 1.2.3.4:60000 to client. */ + if (ctx->user_port == bpf_htons(60123)) { + ctx->user_ip4 = bpf_htonl(0x01020304); + ctx->user_port = bpf_htons(60000); + } + return 1; +} + +SEC("cgroup/getpeername4") +int getpeername4(struct bpf_sock_addr *ctx) +{ + struct svc_addr *orig; + + /* Expose service 1.2.3.4:60000 as peer instead of backend. */ + if (ctx->user_port == bpf_htons(60123)) { + orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0); + if (orig) { + ctx->user_ip4 = orig->addr; + ctx->user_port = orig->port; + } + } + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c new file mode 100644 index 000000000000..c1a2b555e9ad --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> + +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <sys/socket.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; + +struct svc_addr { + __be32 addr[4]; + __be16 port; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct svc_addr); +} service_mapping SEC(".maps"); + +SEC("cgroup/connect6") +int connect6(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in6 sa = {}; + struct svc_addr *orig; + + /* Force local address to [::1]:22223. */ + sa.sin6_family = AF_INET6; + sa.sin6_port = bpf_htons(22223); + sa.sin6_addr.s6_addr32[3] = bpf_htonl(1); + + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; + + /* Rewire service [fc00::1]:60000 to backend [::1]:60124. */ + if (ctx->user_port == bpf_htons(60000)) { + orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!orig) + return 0; + + orig->addr[0] = ctx->user_ip6[0]; + orig->addr[1] = ctx->user_ip6[1]; + orig->addr[2] = ctx->user_ip6[2]; + orig->addr[3] = ctx->user_ip6[3]; + orig->port = ctx->user_port; + + ctx->user_ip6[0] = 0; + ctx->user_ip6[1] = 0; + ctx->user_ip6[2] = 0; + ctx->user_ip6[3] = bpf_htonl(1); + ctx->user_port = bpf_htons(60124); + } + return 1; +} + +SEC("cgroup/getsockname6") +int getsockname6(struct bpf_sock_addr *ctx) +{ + /* Expose local server as [fc00::1]:60000 to client. */ + if (ctx->user_port == bpf_htons(60124)) { + ctx->user_ip6[0] = bpf_htonl(0xfc000000); + ctx->user_ip6[1] = 0; + ctx->user_ip6[2] = 0; + ctx->user_ip6[3] = bpf_htonl(1); + ctx->user_port = bpf_htons(60000); + } + return 1; +} + +SEC("cgroup/getpeername6") +int getpeername6(struct bpf_sock_addr *ctx) +{ + struct svc_addr *orig; + + /* Expose service [fc00::1]:60000 as peer instead of backend. */ + if (ctx->user_port == bpf_htons(60124)) { + orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0); + if (orig) { + ctx->user_ip6[0] = orig->addr[0]; + ctx->user_ip6[1] = orig->addr[1]; + ctx->user_ip6[2] = orig->addr[2]; + ctx->user_ip6[3] = orig->addr[3]; + ctx->user_port = orig->port; + } + } + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 6d598cfbdb3e..34d84717c946 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -379,7 +379,7 @@ struct core_reloc_arrays___equiv_zero_sz_arr { struct core_reloc_arrays_substruct c[3]; struct core_reloc_arrays_substruct d[1][2]; /* equivalent to flexible array */ - struct core_reloc_arrays_substruct f[0][2]; + struct core_reloc_arrays_substruct f[][2]; }; struct core_reloc_arrays___fixed_arr { diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c new file mode 100644 index 000000000000..e5ab4836a641 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(value_size, sizeof(int)); + __uint(key_size, sizeof(int)); +} perfbuf SEC(".maps"); + +const volatile int batch_cnt = 0; + +long sample_val = 42; +long dropped __attribute__((aligned(128))) = 0; + +SEC("fentry/__x64_sys_getpgid") +int bench_perfbuf(void *ctx) +{ + __u64 *sample; + int i; + + for (i = 0; i < batch_cnt; i++) { + if (bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU, + &sample_val, sizeof(sample_val))) + __sync_add_and_fetch(&dropped, 1); + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c new file mode 100644 index 000000000000..123607d314d6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +const volatile int batch_cnt = 0; +const volatile long use_output = 0; + +long sample_val = 42; +long dropped __attribute__((aligned(128))) = 0; + +const volatile long wakeup_data_size = 0; + +static __always_inline long get_flags() +{ + long sz; + + if (!wakeup_data_size) + return 0; + + sz = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA); + return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP; +} + +SEC("fentry/__x64_sys_getpgid") +int bench_ringbuf(void *ctx) +{ + long *sample, flags; + int i; + + if (!use_output) { + for (i = 0; i < batch_cnt; i++) { + sample = bpf_ringbuf_reserve(&ringbuf, + sizeof(sample_val), 0); + if (!sample) { + __sync_add_and_fetch(&dropped, 1); + } else { + *sample = sample_val; + flags = get_flags(); + bpf_ringbuf_submit(sample, flags); + } + } + } else { + for (i = 0; i < batch_cnt; i++) { + flags = get_flags(); + if (bpf_ringbuf_output(&ringbuf, &sample_val, + sizeof(sample_val), flags)) + __sync_add_and_fetch(&dropped, 1); + } + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c new file mode 100644 index 000000000000..e5093796be97 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} inner_map1 SEC(".maps"), + inner_map2 SEC(".maps"); + +struct outer_arr { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 3); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + /* it's possible to use anonymous struct as inner map definition here */ + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + /* changing max_entries to 2 will fail during load + * due to incompatibility with inner_map definition */ + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + }); +} outer_arr SEC(".maps") = { + /* (void *) cast is necessary because we didn't use `struct inner_map` + * in __inner(values, ...) + * Actually, a conscious effort is required to screw up initialization + * of inner map slots, which is a great thing! + */ + .values = { (void *)&inner_map1, 0, (void *)&inner_map2 }, +}; + +struct outer_hash { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 5); + __uint(key_size, sizeof(int)); + /* Here everything works flawlessly due to reuse of struct inner_map + * and compiler will complain at the attempt to use non-inner_map + * references below. This is great experience. + */ + __array(values, struct inner_map); +} outer_hash SEC(".maps") = { + .values = { + [0] = &inner_map2, + [4] = &inner_map1, + }, +}; + +int input = 0; + +SEC("raw_tp/sys_enter") +int handle__sys_enter(void *ctx) +{ + struct inner_map *inner_map; + int key = 0, val; + + inner_map = bpf_map_lookup_elem(&outer_arr, &key); + if (!inner_map) + return 1; + val = input; + bpf_map_update_elem(inner_map, &key, &val, 0); + + inner_map = bpf_map_lookup_elem(&outer_hash, &key); + if (!inner_map) + return 1; + val = input + 1; + bpf_map_update_elem(inner_map, &key, &val, 0); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c new file mode 100644 index 000000000000..f0b72e86bee5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -0,0 +1,1061 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2019, 2020 Cloudflare + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include <linux/bpf.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pkt_cls.h> +#include <linux/tcp.h> +#include <linux/udp.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include "test_cls_redirect.h" + +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) + +#define IP_OFFSET_MASK (0x1FFF) +#define IP_MF (0x2000) + +char _license[] SEC("license") = "Dual BSD/GPL"; + +/** + * Destination port and IP used for UDP encapsulation. + */ +static volatile const __be16 ENCAPSULATION_PORT; +static volatile const __be32 ENCAPSULATION_IP; + +typedef struct { + uint64_t processed_packets_total; + uint64_t l3_protocol_packets_total_ipv4; + uint64_t l3_protocol_packets_total_ipv6; + uint64_t l4_protocol_packets_total_tcp; + uint64_t l4_protocol_packets_total_udp; + uint64_t accepted_packets_total_syn; + uint64_t accepted_packets_total_syn_cookies; + uint64_t accepted_packets_total_last_hop; + uint64_t accepted_packets_total_icmp_echo_request; + uint64_t accepted_packets_total_established; + uint64_t forwarded_packets_total_gue; + uint64_t forwarded_packets_total_gre; + + uint64_t errors_total_unknown_l3_proto; + uint64_t errors_total_unknown_l4_proto; + uint64_t errors_total_malformed_ip; + uint64_t errors_total_fragmented_ip; + uint64_t errors_total_malformed_icmp; + uint64_t errors_total_unwanted_icmp; + uint64_t errors_total_malformed_icmp_pkt_too_big; + uint64_t errors_total_malformed_tcp; + uint64_t errors_total_malformed_udp; + uint64_t errors_total_icmp_echo_replies; + uint64_t errors_total_malformed_encapsulation; + uint64_t errors_total_encap_adjust_failed; + uint64_t errors_total_encap_buffer_too_small; + uint64_t errors_total_redirect_loop; +} metrics_t; + +typedef enum { + INVALID = 0, + UNKNOWN, + ECHO_REQUEST, + SYN, + SYN_COOKIE, + ESTABLISHED, +} verdict_t; + +typedef struct { + uint16_t src, dst; +} flow_ports_t; + +_Static_assert( + sizeof(flow_ports_t) != + offsetofend(struct bpf_sock_tuple, ipv4.dport) - + offsetof(struct bpf_sock_tuple, ipv4.sport) - 1, + "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); +_Static_assert( + sizeof(flow_ports_t) != + offsetofend(struct bpf_sock_tuple, ipv6.dport) - + offsetof(struct bpf_sock_tuple, ipv6.sport) - 1, + "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); + +typedef int ret_t; + +/* This is a bit of a hack. We need a return value which allows us to + * indicate that the regular flow of the program should continue, + * while allowing functions to use XDP_PASS and XDP_DROP, etc. + */ +static const ret_t CONTINUE_PROCESSING = -1; + +/* Convenience macro to call functions which return ret_t. + */ +#define MAYBE_RETURN(x) \ + do { \ + ret_t __ret = x; \ + if (__ret != CONTINUE_PROCESSING) \ + return __ret; \ + } while (0) + +/* Linux packet pointers are either aligned to NET_IP_ALIGN (aka 2 bytes), + * or not aligned if the arch supports efficient unaligned access. + * + * Since the verifier ensures that eBPF packet accesses follow these rules, + * we can tell LLVM to emit code as if we always had a larger alignment. + * It will yell at us if we end up on a platform where this is not valid. + */ +typedef uint8_t *net_ptr __attribute__((align_value(8))); + +typedef struct buf { + struct __sk_buff *skb; + net_ptr head; + /* NB: tail musn't have alignment other than 1, otherwise + * LLVM will go and eliminate code, e.g. when checking packet lengths. + */ + uint8_t *const tail; +} buf_t; + +static size_t buf_off(const buf_t *buf) +{ + /* Clang seems to optimize constructs like + * a - b + c + * if c is known: + * r? = c + * r? -= b + * r? += a + * + * This is a problem if a and b are packet pointers, + * since the verifier allows subtracting two pointers to + * get a scalar, but not a scalar and a pointer. + * + * Use inline asm to break this optimization. + */ + size_t off = (size_t)buf->head; + asm("%0 -= %1" : "+r"(off) : "r"(buf->skb->data)); + return off; +} + +static bool buf_copy(buf_t *buf, void *dst, size_t len) +{ + if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) { + return false; + } + + buf->head += len; + return true; +} + +static bool buf_skip(buf_t *buf, const size_t len) +{ + /* Check whether off + len is valid in the non-linear part. */ + if (buf_off(buf) + len > buf->skb->len) { + return false; + } + + buf->head += len; + return true; +} + +/* Returns a pointer to the start of buf, or NULL if len is + * larger than the remaining data. Consumes len bytes on a successful + * call. + * + * If scratch is not NULL, the function will attempt to load non-linear + * data via bpf_skb_load_bytes. On success, scratch is returned. + */ +static void *buf_assign(buf_t *buf, const size_t len, void *scratch) +{ + if (buf->head + len > buf->tail) { + if (scratch == NULL) { + return NULL; + } + + return buf_copy(buf, scratch, len) ? scratch : NULL; + } + + void *ptr = buf->head; + buf->head += len; + return ptr; +} + +static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) +{ + if (ipv4->ihl <= 5) { + return true; + } + + return buf_skip(buf, (ipv4->ihl - 5) * 4); +} + +static bool ipv4_is_fragment(const struct iphdr *ip) +{ + uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK); + return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0; +} + +static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) +{ + struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch); + if (ipv4 == NULL) { + return NULL; + } + + if (ipv4->ihl < 5) { + return NULL; + } + + if (!pkt_skip_ipv4_options(pkt, ipv4)) { + return NULL; + } + + return ipv4; +} + +/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */ +static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) +{ + if (!buf_copy(pkt, ports, sizeof(*ports))) { + return false; + } + + /* Ports in the L4 headers are reversed, since we are parsing an ICMP + * payload which is going towards the eyeball. + */ + uint16_t dst = ports->src; + ports->src = ports->dst; + ports->dst = dst; + return true; +} + +static uint16_t pkt_checksum_fold(uint32_t csum) +{ + /* The highest reasonable value for an IPv4 header + * checksum requires two folds, so we just do that always. + */ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (uint16_t)~csum; +} + +static void pkt_ipv4_checksum(struct iphdr *iph) +{ + iph->check = 0; + + /* An IP header without options is 20 bytes. Two of those + * are the checksum, which we always set to zero. Hence, + * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7, + * which fits in 32 bit. + */ + _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes"); + uint32_t acc = 0; + uint16_t *ipw = (uint16_t *)iph; + +#pragma clang loop unroll(full) + for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) { + acc += ipw[i]; + } + + iph->check = pkt_checksum_fold(acc); +} + +static bool pkt_skip_ipv6_extension_headers(buf_t *pkt, + const struct ipv6hdr *ipv6, + uint8_t *upper_proto, + bool *is_fragment) +{ + /* We understand five extension headers. + * https://tools.ietf.org/html/rfc8200#section-4.1 states that all + * headers should occur once, except Destination Options, which may + * occur twice. Hence we give up after 6 headers. + */ + struct { + uint8_t next; + uint8_t len; + } exthdr = { + .next = ipv6->nexthdr, + }; + *is_fragment = false; + +#pragma clang loop unroll(full) + for (int i = 0; i < 6; i++) { + switch (exthdr.next) { + case IPPROTO_FRAGMENT: + *is_fragment = true; + /* NB: We don't check that hdrlen == 0 as per spec. */ + /* fallthrough; */ + + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + case IPPROTO_MH: + if (!buf_copy(pkt, &exthdr, sizeof(exthdr))) { + return false; + } + + /* hdrlen is in 8-octet units, and excludes the first 8 octets. */ + if (!buf_skip(pkt, + (exthdr.len + 1) * 8 - sizeof(exthdr))) { + return false; + } + + /* Decode next header */ + break; + + default: + /* The next header is not one of the known extension + * headers, treat it as the upper layer header. + * + * This handles IPPROTO_NONE. + * + * Encapsulating Security Payload (50) and Authentication + * Header (51) also end up here (and will trigger an + * unknown proto error later). They have a custom header + * format and seem too esoteric to care about. + */ + *upper_proto = exthdr.next; + return true; + } + } + + /* We never found an upper layer header. */ + return false; +} + +/* This function has to be inlined, because the verifier otherwise rejects it + * due to returning a pointer to the stack. This is technically correct, since + * scratch is allocated on the stack. However, this usage should be safe since + * it's the callers stack after all. + */ +static inline __attribute__((__always_inline__)) struct ipv6hdr * +pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto, + bool *is_fragment) +{ + struct ipv6hdr *ipv6 = buf_assign(pkt, sizeof(*ipv6), scratch); + if (ipv6 == NULL) { + return NULL; + } + + if (!pkt_skip_ipv6_extension_headers(pkt, ipv6, proto, is_fragment)) { + return NULL; + } + + return ipv6; +} + +/* Global metrics, per CPU + */ +struct bpf_map_def metrics_map SEC("maps") = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(unsigned int), + .value_size = sizeof(metrics_t), + .max_entries = 1, +}; + +static metrics_t *get_global_metrics(void) +{ + uint64_t key = 0; + return bpf_map_lookup_elem(&metrics_map, &key); +} + +static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) +{ + const int payload_off = + sizeof(*encap) + + sizeof(struct in_addr) * encap->unigue.hop_count; + int32_t encap_overhead = payload_off - sizeof(struct ethhdr); + + // Changing the ethertype if the encapsulated packet is ipv6 + if (encap->gue.proto_ctype == IPPROTO_IPV6) { + encap->eth.h_proto = bpf_htons(ETH_P_IPV6); + } + + if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_NO_CSUM_RESET) || + bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC)) + return TC_ACT_SHOT; + + return bpf_redirect(skb->ifindex, BPF_F_INGRESS); +} + +static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, + struct in_addr *next_hop, metrics_t *metrics) +{ + metrics->forwarded_packets_total_gre++; + + const int payload_off = + sizeof(*encap) + + sizeof(struct in_addr) * encap->unigue.hop_count; + int32_t encap_overhead = + payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr); + int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead; + uint16_t proto = ETH_P_IP; + + /* Loop protection: the inner packet's TTL is decremented as a safeguard + * against any forwarding loop. As the only interesting field is the TTL + * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes + * as they handle the split packets if needed (no need for the data to be + * in the linear section). + */ + if (encap->gue.proto_ctype == IPPROTO_IPV6) { + proto = ETH_P_IPV6; + uint8_t ttl; + int rc; + + rc = bpf_skb_load_bytes( + skb, payload_off + offsetof(struct ipv6hdr, hop_limit), + &ttl, 1); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (ttl == 0) { + metrics->errors_total_redirect_loop++; + return TC_ACT_SHOT; + } + + ttl--; + rc = bpf_skb_store_bytes( + skb, payload_off + offsetof(struct ipv6hdr, hop_limit), + &ttl, 1, 0); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + } else { + uint8_t ttl; + int rc; + + rc = bpf_skb_load_bytes( + skb, payload_off + offsetof(struct iphdr, ttl), &ttl, + 1); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (ttl == 0) { + metrics->errors_total_redirect_loop++; + return TC_ACT_SHOT; + } + + /* IPv4 also has a checksum to patch. While the TTL is only one byte, + * this function only works for 2 and 4 bytes arguments (the result is + * the same). + */ + rc = bpf_l3_csum_replace( + skb, payload_off + offsetof(struct iphdr, check), ttl, + ttl - 1, 2); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + ttl--; + rc = bpf_skb_store_bytes( + skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1, + 0); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + } + + if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_NO_CSUM_RESET) || + bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) { + metrics->errors_total_encap_adjust_failed++; + return TC_ACT_SHOT; + } + + if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) { + metrics->errors_total_encap_buffer_too_small++; + return TC_ACT_SHOT; + } + + buf_t pkt = { + .skb = skb, + .head = (uint8_t *)(long)skb->data, + .tail = (uint8_t *)(long)skb->data_end, + }; + + encap_gre_t *encap_gre = buf_assign(&pkt, sizeof(encap_gre_t), NULL); + if (encap_gre == NULL) { + metrics->errors_total_encap_buffer_too_small++; + return TC_ACT_SHOT; + } + + encap_gre->ip.protocol = IPPROTO_GRE; + encap_gre->ip.daddr = next_hop->s_addr; + encap_gre->ip.saddr = ENCAPSULATION_IP; + encap_gre->ip.tot_len = + bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta); + encap_gre->gre.flags = 0; + encap_gre->gre.protocol = bpf_htons(proto); + pkt_ipv4_checksum((void *)&encap_gre->ip); + + return bpf_redirect(skb->ifindex, 0); +} + +static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, + struct in_addr *next_hop, metrics_t *metrics) +{ + /* swap L2 addresses */ + /* This assumes that packets are received from a router. + * So just swapping the MAC addresses here will make the packet go back to + * the router, which will send it to the appropriate machine. + */ + unsigned char temp[ETH_ALEN]; + memcpy(temp, encap->eth.h_dest, sizeof(temp)); + memcpy(encap->eth.h_dest, encap->eth.h_source, + sizeof(encap->eth.h_dest)); + memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source)); + + if (encap->unigue.next_hop == encap->unigue.hop_count - 1 && + encap->unigue.last_hop_gre) { + return forward_with_gre(skb, encap, next_hop, metrics); + } + + metrics->forwarded_packets_total_gue++; + uint32_t old_saddr = encap->ip.saddr; + encap->ip.saddr = encap->ip.daddr; + encap->ip.daddr = next_hop->s_addr; + if (encap->unigue.next_hop < encap->unigue.hop_count) { + encap->unigue.next_hop++; + } + + /* Remove ip->saddr, add next_hop->s_addr */ + const uint64_t off = offsetof(typeof(*encap), ip.check); + int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4); + if (ret < 0) { + return TC_ACT_SHOT; + } + + return bpf_redirect(skb->ifindex, 0); +} + +static ret_t skip_next_hops(buf_t *pkt, int n) +{ + switch (n) { + case 1: + if (!buf_skip(pkt, sizeof(struct in_addr))) + return TC_ACT_SHOT; + case 0: + return CONTINUE_PROCESSING; + + default: + return TC_ACT_SHOT; + } +} + +/* Get the next hop from the GLB header. + * + * Sets next_hop->s_addr to 0 if there are no more hops left. + * pkt is positioned just after the variable length GLB header + * iff the call is successful. + */ +static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, + struct in_addr *next_hop) +{ + if (encap->unigue.next_hop > encap->unigue.hop_count) { + return TC_ACT_SHOT; + } + + /* Skip "used" next hops. */ + MAYBE_RETURN(skip_next_hops(pkt, encap->unigue.next_hop)); + + if (encap->unigue.next_hop == encap->unigue.hop_count) { + /* No more next hops, we are at the end of the GLB header. */ + next_hop->s_addr = 0; + return CONTINUE_PROCESSING; + } + + if (!buf_copy(pkt, next_hop, sizeof(*next_hop))) { + return TC_ACT_SHOT; + } + + /* Skip the remainig next hops (may be zero). */ + return skip_next_hops(pkt, encap->unigue.hop_count - + encap->unigue.next_hop - 1); +} + +/* Fill a bpf_sock_tuple to be used with the socket lookup functions. + * This is a kludge that let's us work around verifier limitations: + * + * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321) + * + * clang will substitue a costant for sizeof, which allows the verifier + * to track it's value. Based on this, it can figure out the constant + * return value, and calling code works while still being "generic" to + * IPv4 and IPv6. + */ +static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, + uint64_t iphlen, uint16_t sport, uint16_t dport) +{ + switch (iphlen) { + case sizeof(struct iphdr): { + struct iphdr *ipv4 = (struct iphdr *)iph; + tuple->ipv4.daddr = ipv4->daddr; + tuple->ipv4.saddr = ipv4->saddr; + tuple->ipv4.sport = sport; + tuple->ipv4.dport = dport; + return sizeof(tuple->ipv4); + } + + case sizeof(struct ipv6hdr): { + struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph; + memcpy(&tuple->ipv6.daddr, &ipv6->daddr, + sizeof(tuple->ipv6.daddr)); + memcpy(&tuple->ipv6.saddr, &ipv6->saddr, + sizeof(tuple->ipv6.saddr)); + tuple->ipv6.sport = sport; + tuple->ipv6.dport = dport; + return sizeof(tuple->ipv6); + } + + default: + return 0; + } +} + +static verdict_t classify_tcp(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, uint64_t tuplen, + void *iph, struct tcphdr *tcp) +{ + struct bpf_sock *sk = + bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); + if (sk == NULL) { + return UNKNOWN; + } + + if (sk->state != BPF_TCP_LISTEN) { + bpf_sk_release(sk); + return ESTABLISHED; + } + + if (iph != NULL && tcp != NULL) { + /* Kludge: we've run out of arguments, but need the length of the ip header. */ + uint64_t iphlen = sizeof(struct iphdr); + if (tuplen == sizeof(tuple->ipv6)) { + iphlen = sizeof(struct ipv6hdr); + } + + if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp, + sizeof(*tcp)) == 0) { + bpf_sk_release(sk); + return SYN_COOKIE; + } + } + + bpf_sk_release(sk); + return UNKNOWN; +} + +static verdict_t classify_udp(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, uint64_t tuplen) +{ + struct bpf_sock *sk = + bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); + if (sk == NULL) { + return UNKNOWN; + } + + if (sk->state == BPF_TCP_ESTABLISHED) { + bpf_sk_release(sk); + return ESTABLISHED; + } + + bpf_sk_release(sk); + return UNKNOWN; +} + +static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, + struct bpf_sock_tuple *tuple, uint64_t tuplen, + metrics_t *metrics) +{ + switch (proto) { + case IPPROTO_TCP: + return classify_tcp(skb, tuple, tuplen, NULL, NULL); + + case IPPROTO_UDP: + return classify_udp(skb, tuple, tuplen); + + default: + metrics->errors_total_malformed_icmp++; + return INVALID; + } +} + +static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) +{ + struct icmphdr icmp; + if (!buf_copy(pkt, &icmp, sizeof(icmp))) { + metrics->errors_total_malformed_icmp++; + return INVALID; + } + + /* We should never receive encapsulated echo replies. */ + if (icmp.type == ICMP_ECHOREPLY) { + metrics->errors_total_icmp_echo_replies++; + return INVALID; + } + + if (icmp.type == ICMP_ECHO) { + return ECHO_REQUEST; + } + + if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) { + metrics->errors_total_unwanted_icmp++; + return INVALID; + } + + struct iphdr _ip4; + const struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4); + if (ipv4 == NULL) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + /* The source address in the outer IP header is from the entity that + * originated the ICMP message. Use the original IP header to restore + * the correct flow tuple. + */ + struct bpf_sock_tuple tuple; + tuple.ipv4.saddr = ipv4->daddr; + tuple.ipv4.daddr = ipv4->saddr; + + if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv4.sport)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + return classify_icmp(pkt->skb, ipv4->protocol, &tuple, + sizeof(tuple.ipv4), metrics); +} + +static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) +{ + struct icmp6hdr icmp6; + if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) { + metrics->errors_total_malformed_icmp++; + return INVALID; + } + + /* We should never receive encapsulated echo replies. */ + if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) { + metrics->errors_total_icmp_echo_replies++; + return INVALID; + } + + if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) { + return ECHO_REQUEST; + } + + if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) { + metrics->errors_total_unwanted_icmp++; + return INVALID; + } + + bool is_fragment; + uint8_t l4_proto; + struct ipv6hdr _ipv6; + const struct ipv6hdr *ipv6 = + pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment); + if (ipv6 == NULL) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + if (is_fragment) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + /* Swap source and dest addresses. */ + struct bpf_sock_tuple tuple; + memcpy(&tuple.ipv6.saddr, &ipv6->daddr, sizeof(tuple.ipv6.saddr)); + memcpy(&tuple.ipv6.daddr, &ipv6->saddr, sizeof(tuple.ipv6.daddr)); + + if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv6.sport)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + return classify_icmp(pkt->skb, l4_proto, &tuple, sizeof(tuple.ipv6), + metrics); +} + +static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, + metrics_t *metrics) +{ + metrics->l4_protocol_packets_total_tcp++; + + struct tcphdr _tcp; + struct tcphdr *tcp = buf_assign(pkt, sizeof(_tcp), &_tcp); + if (tcp == NULL) { + metrics->errors_total_malformed_tcp++; + return INVALID; + } + + if (tcp->syn) { + return SYN; + } + + struct bpf_sock_tuple tuple; + uint64_t tuplen = + fill_tuple(&tuple, iph, iphlen, tcp->source, tcp->dest); + return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp); +} + +static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, + metrics_t *metrics) +{ + metrics->l4_protocol_packets_total_udp++; + + struct udphdr _udp; + struct udphdr *udph = buf_assign(pkt, sizeof(_udp), &_udp); + if (udph == NULL) { + metrics->errors_total_malformed_udp++; + return INVALID; + } + + struct bpf_sock_tuple tuple; + uint64_t tuplen = + fill_tuple(&tuple, iph, iphlen, udph->source, udph->dest); + return classify_udp(pkt->skb, &tuple, tuplen); +} + +static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) +{ + metrics->l3_protocol_packets_total_ipv4++; + + struct iphdr _ip4; + struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4); + if (ipv4 == NULL) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv4->version != 4) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv4_is_fragment(ipv4)) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + switch (ipv4->protocol) { + case IPPROTO_ICMP: + return process_icmpv4(pkt, metrics); + + case IPPROTO_TCP: + return process_tcp(pkt, ipv4, sizeof(*ipv4), metrics); + + case IPPROTO_UDP: + return process_udp(pkt, ipv4, sizeof(*ipv4), metrics); + + default: + metrics->errors_total_unknown_l4_proto++; + return INVALID; + } +} + +static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) +{ + metrics->l3_protocol_packets_total_ipv6++; + + uint8_t l4_proto; + bool is_fragment; + struct ipv6hdr _ipv6; + struct ipv6hdr *ipv6 = + pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment); + if (ipv6 == NULL) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv6->version != 6) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (is_fragment) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + switch (l4_proto) { + case IPPROTO_ICMPV6: + return process_icmpv6(pkt, metrics); + + case IPPROTO_TCP: + return process_tcp(pkt, ipv6, sizeof(*ipv6), metrics); + + case IPPROTO_UDP: + return process_udp(pkt, ipv6, sizeof(*ipv6), metrics); + + default: + metrics->errors_total_unknown_l4_proto++; + return INVALID; + } +} + +SEC("classifier/cls_redirect") +int cls_redirect(struct __sk_buff *skb) +{ + metrics_t *metrics = get_global_metrics(); + if (metrics == NULL) { + return TC_ACT_SHOT; + } + + metrics->processed_packets_total++; + + /* Pass bogus packets as long as we're not sure they're + * destined for us. + */ + if (skb->protocol != bpf_htons(ETH_P_IP)) { + return TC_ACT_OK; + } + + encap_headers_t *encap; + + /* Make sure that all encapsulation headers are available in + * the linear portion of the skb. This makes it easy to manipulate them. + */ + if (bpf_skb_pull_data(skb, sizeof(*encap))) { + return TC_ACT_OK; + } + + buf_t pkt = { + .skb = skb, + .head = (uint8_t *)(long)skb->data, + .tail = (uint8_t *)(long)skb->data_end, + }; + + encap = buf_assign(&pkt, sizeof(*encap), NULL); + if (encap == NULL) { + return TC_ACT_OK; + } + + if (encap->ip.ihl != 5) { + /* We never have any options. */ + return TC_ACT_OK; + } + + if (encap->ip.daddr != ENCAPSULATION_IP || + encap->ip.protocol != IPPROTO_UDP) { + return TC_ACT_OK; + } + + /* TODO Check UDP length? */ + if (encap->udp.dest != ENCAPSULATION_PORT) { + return TC_ACT_OK; + } + + /* We now know that the packet is destined to us, we can + * drop bogus ones. + */ + if (ipv4_is_fragment((void *)&encap->ip)) { + metrics->errors_total_fragmented_ip++; + return TC_ACT_SHOT; + } + + if (encap->gue.variant != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.control != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.flags != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.hlen != + sizeof(encap->unigue) / 4 + encap->unigue.hop_count) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->unigue.version != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->unigue.reserved != 0) { + return TC_ACT_SHOT; + } + + struct in_addr next_hop; + MAYBE_RETURN(get_next_hop(&pkt, encap, &next_hop)); + + if (next_hop.s_addr == 0) { + metrics->accepted_packets_total_last_hop++; + return accept_locally(skb, encap); + } + + verdict_t verdict; + switch (encap->gue.proto_ctype) { + case IPPROTO_IPIP: + verdict = process_ipv4(&pkt, metrics); + break; + + case IPPROTO_IPV6: + verdict = process_ipv6(&pkt, metrics); + break; + + default: + metrics->errors_total_unknown_l3_proto++; + return TC_ACT_SHOT; + } + + switch (verdict) { + case INVALID: + /* metrics have already been bumped */ + return TC_ACT_SHOT; + + case UNKNOWN: + return forward_to_next_hop(skb, encap, &next_hop, metrics); + + case ECHO_REQUEST: + metrics->accepted_packets_total_icmp_echo_request++; + break; + + case SYN: + if (encap->unigue.forward_syn) { + return forward_to_next_hop(skb, encap, &next_hop, + metrics); + } + + metrics->accepted_packets_total_syn++; + break; + + case SYN_COOKIE: + metrics->accepted_packets_total_syn_cookies++; + break; + + case ESTABLISHED: + metrics->accepted_packets_total_established++; + break; + } + + return accept_locally(skb, encap); +} diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h new file mode 100644 index 000000000000..76eab0aacba0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright 2019, 2020 Cloudflare */ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> + +struct gre_base_hdr { + uint16_t flags; + uint16_t protocol; +} __attribute__((packed)); + +struct guehdr { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + uint8_t hlen : 5, control : 1, variant : 2; +#else + uint8_t variant : 2, control : 1, hlen : 5; +#endif + uint8_t proto_ctype; + uint16_t flags; +}; + +struct unigue { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + uint8_t _r : 2, last_hop_gre : 1, forward_syn : 1, version : 4; +#else + uint8_t version : 4, forward_syn : 1, last_hop_gre : 1, _r : 2; +#endif + uint8_t reserved; + uint8_t next_hop; + uint8_t hop_count; + // Next hops go here +} __attribute__((packed)); + +typedef struct { + struct ethhdr eth; + struct iphdr ip; + struct gre_base_hdr gre; +} __attribute__((packed)) encap_gre_t; + +typedef struct { + struct ethhdr eth; + struct iphdr ip; + struct udphdr udp; + struct guehdr gue; + struct unigue unigue; +} __attribute__((packed)) encap_headers_t; diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c new file mode 100644 index 000000000000..01a002ade529 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u64 count = 0; + +SEC("raw_tracepoint/sys_enter") +int test_enable_stats(void *ctx) +{ + count += 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c index 98b9de2fafd0..ded71b3ff6b4 100644 --- a/tools/testing/selftests/bpf/progs/test_obj_id.c +++ b/tools/testing/selftests/bpf/progs/test_obj_id.c @@ -3,16 +3,8 @@ */ #include <stddef.h> #include <linux/bpf.h> -#include <linux/pkt_cls.h> #include <bpf/bpf_helpers.h> -/* It is a dumb bpf program such that it must have no - * issue to be loaded since testing the verifier is - * not the focus here. - */ - -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); @@ -20,13 +12,13 @@ struct { __type(value, __u64); } test_map_id SEC(".maps"); -SEC("test_obj_id_dummy") -int test_obj_id(struct __sk_buff *skb) +SEC("raw_tp/sys_enter") +int test_obj_id(void *ctx) { __u32 key = 0; __u64 *value; value = bpf_map_lookup_elem(&test_map_id, &key); - return TC_ACT_OK; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index abb7344b531f..42403d088abc 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -39,4 +39,10 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec) return 0; } +SEC("fmod_ret/__set_task_comm") +int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec) +{ + return !tsk; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c new file mode 100644 index 000000000000..8ba9959b036b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} ringbuf SEC(".maps"); + +/* inputs */ +int pid = 0; +long value = 0; +long flags = 0; + +/* outputs */ +long total = 0; +long discarded = 0; +long dropped = 0; + +long avail_data = 0; +long ring_size = 0; +long cons_pos = 0; +long prod_pos = 0; + +/* inner state */ +long seq = 0; + +SEC("tp/syscalls/sys_enter_getpgid") +int test_ringbuf(void *ctx) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + struct sample *sample; + int zero = 0; + + if (cur_pid != pid) + return 0; + + sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0); + if (!sample) { + __sync_fetch_and_add(&dropped, 1); + return 1; + } + + sample->pid = pid; + bpf_get_current_comm(sample->comm, sizeof(sample->comm)); + sample->value = value; + + sample->seq = seq++; + __sync_fetch_and_add(&total, 1); + + if (sample->seq & 1) { + /* copy from reserved sample to a new one... */ + bpf_ringbuf_output(&ringbuf, sample, sizeof(*sample), flags); + /* ...and then discard reserved sample */ + bpf_ringbuf_discard(sample, flags); + __sync_fetch_and_add(&discarded, 1); + } else { + bpf_ringbuf_submit(sample, flags); + } + + avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA); + ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE); + cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS); + prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c new file mode 100644 index 000000000000..edf3b6953533 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +struct ringbuf_map { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} ringbuf1 SEC(".maps"), + ringbuf2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 4); + __type(key, int); + __array(values, struct ringbuf_map); +} ringbuf_arr SEC(".maps") = { + .values = { + [0] = &ringbuf1, + [2] = &ringbuf2, + }, +}; + +/* inputs */ +int pid = 0; +int target_ring = 0; +long value = 0; + +/* outputs */ +long total = 0; +long dropped = 0; +long skipped = 0; + +SEC("tp/syscalls/sys_enter_getpgid") +int test_ringbuf(void *ctx) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + struct sample *sample; + void *rb; + int zero = 0; + + if (cur_pid != pid) + return 0; + + rb = bpf_map_lookup_elem(&ringbuf_arr, &target_ring); + if (!rb) { + skipped += 1; + return 1; + } + + sample = bpf_ringbuf_reserve(rb, sizeof(*sample), 0); + if (!sample) { + dropped += 1; + return 1; + } + + sample->pid = pid; + bpf_get_current_comm(sample->comm, sizeof(sample->comm)); + sample->value = value; + + sample->seq = total; + total += 1; + + bpf_ringbuf_submit(sample, 0); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c index 8f530843b4da..1ecd987005d2 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_assign.c +++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c @@ -16,6 +16,26 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +/* Pin map under /sys/fs/bpf/tc/globals/<map name> */ +#define PIN_GLOBAL_NS 2 + +/* Must match struct bpf_elf_map layout from iproute2 */ +struct { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +} server_map SEC("maps") = { + .type = BPF_MAP_TYPE_SOCKMAP, + .size_key = sizeof(int), + .size_value = sizeof(__u64), + .max_elem = 1, + .pinning = PIN_GLOBAL_NS, +}; + int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; @@ -72,7 +92,9 @@ handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) { struct bpf_sock_tuple ln = {0}; struct bpf_sock *sk; + const int zero = 0; size_t tuple_len; + __be16 dport; int ret; tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); @@ -83,32 +105,11 @@ handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) if (sk) goto assign; - if (ipv4) { - if (tuple->ipv4.dport != bpf_htons(4321)) - return TC_ACT_OK; - - ln.ipv4.daddr = bpf_htonl(0x7f000001); - ln.ipv4.dport = bpf_htons(1234); - - sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4), - BPF_F_CURRENT_NETNS, 0); - } else { - if (tuple->ipv6.dport != bpf_htons(4321)) - return TC_ACT_OK; - - /* Upper parts of daddr are already zero. */ - ln.ipv6.daddr[3] = bpf_htonl(0x1); - ln.ipv6.dport = bpf_htons(1234); - - sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6), - BPF_F_CURRENT_NETNS, 0); - } + dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport; + if (dport != bpf_htons(4321)) + return TC_ACT_OK; - /* workaround: We can't do a single socket lookup here, because then - * the compiler will likely spill tuple_len to the stack. This makes it - * lose all bounds information in the verifier, which then rejects the - * call as unsafe. - */ + sk = bpf_map_lookup_elem(&server_map, &zero); if (!sk) return TC_ACT_SHOT; @@ -123,7 +124,9 @@ handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) { struct bpf_sock_tuple ln = {0}; struct bpf_sock *sk; + const int zero = 0; size_t tuple_len; + __be16 dport; int ret; tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); @@ -137,32 +140,11 @@ handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) bpf_sk_release(sk); } - if (ipv4) { - if (tuple->ipv4.dport != bpf_htons(4321)) - return TC_ACT_OK; + dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport; + if (dport != bpf_htons(4321)) + return TC_ACT_OK; - ln.ipv4.daddr = bpf_htonl(0x7f000001); - ln.ipv4.dport = bpf_htons(1234); - - sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4), - BPF_F_CURRENT_NETNS, 0); - } else { - if (tuple->ipv6.dport != bpf_htons(4321)) - return TC_ACT_OK; - - /* Upper parts of daddr are already zero. */ - ln.ipv6.daddr[3] = bpf_htonl(0x1); - ln.ipv6.dport = bpf_htons(1234); - - sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6), - BPF_F_CURRENT_NETNS, 0); - } - - /* workaround: We can't do a single socket lookup here, because then - * the compiler will likely spill tuple_len to the stack. This makes it - * lose all bounds information in the verifier, which then rejects the - * call as unsafe. - */ + sk = bpf_map_lookup_elem(&server_map, &zero); if (!sk) return TC_ACT_SHOT; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index d2b38fa6a5b0..e83d0b48d80c 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -73,6 +73,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); + bpf_printk("sk=%d\n", sk ? 1 : 0); if (sk) bpf_sk_release(sk); return sk ? TC_ACT_OK : TC_ACT_UNSPEC; diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c new file mode 100644 index 000000000000..bb3fbf1a29e3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define TEST_COMM_LEN 16 + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, u32); +} cgroup_map SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +SEC("classifier/test_skb_helpers") +int test_skb_helpers(struct __sk_buff *skb) +{ + struct task_struct *task; + char comm[TEST_COMM_LEN]; + __u32 tpid; + + task = (struct task_struct *)bpf_get_current_task(); + bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid); + bpf_probe_read_kernel_str(&comm, sizeof(comm), &task->comm); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c new file mode 100644 index 000000000000..45e8fc75a739 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Isovalent, Inc. +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} sock_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} sock_hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, __u32); + __type(value, __u64); +} socket_storage SEC(".maps"); + +SEC("sk_msg") +int prog_msg_verdict(struct sk_msg_md *msg) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + int verdict = SK_PASS; + __u32 pid, tpid; + __u64 *sk_stg; + + pid = bpf_get_current_pid_tgid() >> 32; + sk_stg = bpf_sk_storage_get(&socket_storage, msg->sk, 0, BPF_SK_STORAGE_GET_F_CREATE); + if (!sk_stg) + return SK_DROP; + *sk_stg = pid; + bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid); + if (pid != tpid) + verdict = SK_DROP; + bpf_sk_storage_delete(&socket_storage, (void *)msg->sk); + return verdict; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 9b4d3a68a91a..057036ca1111 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -79,11 +79,18 @@ struct { struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); + __uint(max_entries, 2); __type(key, int); __type(value, int); } sock_skb_opts SEC(".maps"); +struct { + __uint(type, TEST_MAP_TYPE); + __uint(max_entries, 20); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} tls_sock_map SEC(".maps"); + SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { @@ -110,8 +117,6 @@ int bpf_prog2(struct __sk_buff *skb) flags = *f; } - bpf_printk("sk_skb2: redirect(%iB) flags=%i\n", - len, flags); #ifdef SOCKMAP return bpf_sk_redirect_map(skb, &sock_map, ret, flags); #else @@ -120,6 +125,43 @@ int bpf_prog2(struct __sk_buff *skb) } +SEC("sk_skb3") +int bpf_prog3(struct __sk_buff *skb) +{ + const int one = 1; + int err, *f, ret = SK_PASS; + void *data_end; + char *c; + + err = bpf_skb_pull_data(skb, 19); + if (err) + goto tls_out; + + c = (char *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + if (c + 18 < data_end) + memcpy(&c[13], "PASS", 4); + f = bpf_map_lookup_elem(&sock_skb_opts, &one); + if (f && *f) { + __u64 flags = 0; + + ret = 0; + flags = *f; +#ifdef SOCKMAP + return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags); +#else + return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags); +#endif + } + + f = bpf_map_lookup_elem(&sock_skb_opts, &one); + if (f && *f) + ret = SK_DROP; +tls_out: + return ret; +} + SEC("sockops") int bpf_sockmap(struct bpf_sock_ops *skops) { @@ -143,8 +185,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops) err = bpf_sock_hash_update(skops, &sock_map, &ret, BPF_NOEXIST); #endif - bpf_printk("passive(%i -> %i) map ctx update err: %d\n", - lport, bpf_ntohl(rport), err); } break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: @@ -160,8 +200,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops) err = bpf_sock_hash_update(skops, &sock_map, &ret, BPF_NOEXIST); #endif - bpf_printk("active(%i -> %i) map ctx update err: %d\n", - lport, bpf_ntohl(rport), err); } break; default: @@ -199,72 +237,6 @@ int bpf_prog4(struct sk_msg_md *msg) } SEC("sk_msg2") -int bpf_prog5(struct sk_msg_md *msg) -{ - int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; - int *start, *end, *start_push, *end_push, *start_pop, *pop; - int *bytes, len1, len2 = 0, len3, len4; - int err1 = -1, err2 = -1; - - bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); - if (bytes) - err1 = bpf_msg_apply_bytes(msg, *bytes); - bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); - if (bytes) - err2 = bpf_msg_cork_bytes(msg, *bytes); - len1 = (__u64)msg->data_end - (__u64)msg->data; - start = bpf_map_lookup_elem(&sock_bytes, &zero); - end = bpf_map_lookup_elem(&sock_bytes, &one); - if (start && end) { - int err; - - bpf_printk("sk_msg2: pull(%i:%i)\n", - start ? *start : 0, end ? *end : 0); - err = bpf_msg_pull_data(msg, *start, *end, 0); - if (err) - bpf_printk("sk_msg2: pull_data err %i\n", - err); - len2 = (__u64)msg->data_end - (__u64)msg->data; - bpf_printk("sk_msg2: length update %i->%i\n", - len1, len2); - } - - start_push = bpf_map_lookup_elem(&sock_bytes, &two); - end_push = bpf_map_lookup_elem(&sock_bytes, &three); - if (start_push && end_push) { - int err; - - bpf_printk("sk_msg2: push(%i:%i)\n", - start_push ? *start_push : 0, - end_push ? *end_push : 0); - err = bpf_msg_push_data(msg, *start_push, *end_push, 0); - if (err) - bpf_printk("sk_msg2: push_data err %i\n", err); - len3 = (__u64)msg->data_end - (__u64)msg->data; - bpf_printk("sk_msg2: length push_update %i->%i\n", - len2 ? len2 : len1, len3); - } - start_pop = bpf_map_lookup_elem(&sock_bytes, &four); - pop = bpf_map_lookup_elem(&sock_bytes, &five); - if (start_pop && pop) { - int err; - - bpf_printk("sk_msg2: pop(%i@%i)\n", - start_pop, pop); - err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); - if (err) - bpf_printk("sk_msg2: pop_data err %i\n", err); - len4 = (__u64)msg->data_end - (__u64)msg->data; - bpf_printk("sk_msg2: length pop_data %i->%i\n", - len1 ? len1 : 0, len4); - } - - bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", - len1, err1, err2); - return SK_PASS; -} - -SEC("sk_msg3") int bpf_prog6(struct sk_msg_md *msg) { int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; @@ -305,86 +277,7 @@ int bpf_prog6(struct sk_msg_md *msg) #endif } -SEC("sk_msg4") -int bpf_prog7(struct sk_msg_md *msg) -{ - int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; - int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; - int len1, len2 = 0, len3, len4; - int err1 = 0, err2 = 0, key = 0; - __u64 flags = 0; - - int err; - bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); - if (bytes) - err1 = bpf_msg_apply_bytes(msg, *bytes); - bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); - if (bytes) - err2 = bpf_msg_cork_bytes(msg, *bytes); - len1 = (__u64)msg->data_end - (__u64)msg->data; - - start = bpf_map_lookup_elem(&sock_bytes, &zero); - end = bpf_map_lookup_elem(&sock_bytes, &one); - if (start && end) { - bpf_printk("sk_msg2: pull(%i:%i)\n", - start ? *start : 0, end ? *end : 0); - err = bpf_msg_pull_data(msg, *start, *end, 0); - if (err) - bpf_printk("sk_msg2: pull_data err %i\n", - err); - len2 = (__u64)msg->data_end - (__u64)msg->data; - bpf_printk("sk_msg2: length update %i->%i\n", - len1, len2); - } - - start_push = bpf_map_lookup_elem(&sock_bytes, &two); - end_push = bpf_map_lookup_elem(&sock_bytes, &three); - if (start_push && end_push) { - bpf_printk("sk_msg4: push(%i:%i)\n", - start_push ? *start_push : 0, - end_push ? *end_push : 0); - err = bpf_msg_push_data(msg, *start_push, *end_push, 0); - if (err) - bpf_printk("sk_msg4: push_data err %i\n", - err); - len3 = (__u64)msg->data_end - (__u64)msg->data; - bpf_printk("sk_msg4: length push_update %i->%i\n", - len2 ? len2 : len1, len3); - } - - start_pop = bpf_map_lookup_elem(&sock_bytes, &four); - pop = bpf_map_lookup_elem(&sock_bytes, &five); - if (start_pop && pop) { - int err; - - bpf_printk("sk_msg4: pop(%i@%i)\n", - start_pop, pop); - err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); - if (err) - bpf_printk("sk_msg4: pop_data err %i\n", err); - len4 = (__u64)msg->data_end - (__u64)msg->data; - bpf_printk("sk_msg4: length pop_data %i->%i\n", - len1 ? len1 : 0, len4); - } - - - f = bpf_map_lookup_elem(&sock_redir_flags, &zero); - if (f && *f) { - key = 2; - flags = *f; - } - bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n", - len1, flags, err1 ? err1 : err2); -#ifdef SOCKMAP - err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags); -#else - err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags); -#endif - bpf_printk("sk_msg3: err %i\n", err); - return err; -} - -SEC("sk_msg5") +SEC("sk_msg3") int bpf_prog8(struct sk_msg_md *msg) { void *data_end = (void *)(long) msg->data_end; @@ -401,7 +294,7 @@ int bpf_prog8(struct sk_msg_md *msg) } return SK_PASS; } -SEC("sk_msg6") +SEC("sk_msg4") int bpf_prog9(struct sk_msg_md *msg) { void *data_end = (void *)(long) msg->data_end; @@ -419,7 +312,7 @@ int bpf_prog9(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg7") +SEC("sk_msg5") int bpf_prog10(struct sk_msg_md *msg) { int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; @@ -443,7 +336,6 @@ int bpf_prog10(struct sk_msg_md *msg) pop = bpf_map_lookup_elem(&sock_bytes, &five); if (start_pop && pop) bpf_msg_pop_data(msg, *start_pop, *pop, 0); - bpf_printk("return sk drop\n"); return SK_DROP; } diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index 2d0b0b82a78a..50525235380e 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -45,7 +45,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx) unsigned long tcp_mem[3] = {0, 0, 0}; char value[MAX_VALUE_STR_LEN]; unsigned char i, off = 0; - int ret; + volatile int ret; if (ctx->write) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c new file mode 100644 index 000000000000..3d66599eee2e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp_adjust_tail_grow") +int _xdp_adjust_tail_grow(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + unsigned int data_len; + int offset = 0; + + /* Data length determine test case */ + data_len = data_end - data; + + if (data_len == 54) { /* sizeof(pkt_v4) */ + offset = 4096; /* test too large offset */ + } else if (data_len == 74) { /* sizeof(pkt_v6) */ + offset = 40; + } else if (data_len == 64) { + offset = 128; + } else if (data_len == 128) { + offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */ + } else { + return XDP_ABORTED; /* No matching test */ + } + + if (bpf_xdp_adjust_tail(xdp, offset)) + return XDP_DROP; + return XDP_TX; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c index b7fc85769bdc..22065a9cfb25 100644 --- a/tools/testing/selftests/bpf/progs/test_adjust_tail.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2018 Facebook +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -11,15 +11,15 @@ int _version SEC("version") = 1; -SEC("xdp_adjust_tail") -int _xdp_adjust_tail(struct xdp_md *xdp) +SEC("xdp_adjust_tail_shrink") +int _xdp_adjust_tail_shrink(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; void *data = (void *)(long)xdp->data; int offset = 0; - if (data_end - data == 54) - offset = 256; + if (data_end - data == 54) /* sizeof(pkt_v4) */ + offset = 256; /* shrink too much */ else offset = 20; if (bpf_xdp_adjust_tail(xdp, 0 - offset)) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c new file mode 100644 index 000000000000..e5c0f131c8a7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* fails to load without expected_attach_type = BPF_XDP_DEVMAP + * because of access to egress_ifindex + */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +SEC("xdp_dm_log") +int xdpdm_devlog(struct xdp_md *ctx) +{ + char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n"; + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + unsigned int len = data_end - data; + + bpf_trace_printk(fmt, sizeof(fmt), + ctx->ingress_ifindex, ctx->egress_ifindex, len); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c new file mode 100644 index 000000000000..deef0e050863 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 4); +} dm_ports SEC(".maps"); + +SEC("xdp_redir") +int xdp_redir_prog(struct xdp_md *ctx) +{ + return bpf_redirect_map(&dm_ports, 1, 0); +} + +/* invalid program on DEVMAP entry; + * SEC name means expected attach type not set + */ +SEC("xdp_dummy") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +/* valid program on DEVMAP entry via SEC name; + * has access to egress and ingress ifindex + */ +SEC("xdp_devmap") +int xdp_dummy_dm(struct xdp_md *ctx) +{ + char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n"; + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + unsigned int len = data_end - data; + + bpf_trace_printk(fmt, sizeof(fmt), + ctx->ingress_ifindex, ctx->egress_ifindex, len); + + return XDP_PASS; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c new file mode 100644 index 000000000000..8b36b6640e7e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <asm/unistd.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +long hits = 0; + +SEC("tp/syscalls/sys_enter_getpgid") +int bench_trigger_tp(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + +SEC("raw_tp/sys_enter") +int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id) +{ + if (id == __NR_getpgid) + __sync_add_and_fetch(&hits, 1); + return 0; +} + +SEC("kprobe/__x64_sys_getpgid") +int bench_trigger_kprobe(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bench_trigger_fentry(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + +SEC("fmod_ret/__x64_sys_getpgid") +int bench_trigger_fmodret(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return -22; +} diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index c6766b2cff85..6a12a0e01e07 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1394,23 +1394,25 @@ static void test_map_rdonly(void) key = 1; value = 1234; - /* Insert key=1 element. */ + /* Try to insert key=1 element. */ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 && errno == EPERM); - /* Check that key=2 is not found. */ + /* Check that key=1 is not found. */ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT); + + close(fd); } -static void test_map_wronly(void) +static void test_map_wronly_hash(void) { int fd, key = 0, value = 0; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), MAP_SIZE, map_flags | BPF_F_WRONLY); if (fd < 0) { - printf("Failed to create map for read only test '%s'!\n", + printf("Failed to create map for write only test '%s'!\n", strerror(errno)); exit(1); } @@ -1420,9 +1422,49 @@ static void test_map_wronly(void) /* Insert key=1 element. */ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); - /* Check that key=2 is not found. */ + /* Check that reading elements and keys from the map is not allowed. */ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM); assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM); + + close(fd); +} + +static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type) +{ + int fd, value = 0; + + assert(map_type == BPF_MAP_TYPE_QUEUE || + map_type == BPF_MAP_TYPE_STACK); + fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE, + map_flags | BPF_F_WRONLY); + /* Stack/Queue maps do not support BPF_F_NO_PREALLOC */ + if (map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } + if (fd < 0) { + printf("Failed to create map '%s'!\n", strerror(errno)); + exit(1); + } + + value = 1234; + assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0); + + /* Peek element should fail */ + assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM); + + /* Pop element should fail */ + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 && + errno == EPERM); + + close(fd); +} + +static void test_map_wronly(void) +{ + test_map_wronly_hash(); + test_map_wronly_stack_or_queue(BPF_MAP_TYPE_STACK); + test_map_wronly_stack_or_queue(BPF_MAP_TYPE_QUEUE); } static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size, diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index b521e0a512b6..54fa5fa688ce 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -222,23 +222,6 @@ int test__join_cgroup(const char *path) return fd; } -struct ipv4_packet pkt_v4 = { - .eth.h_proto = __bpf_constant_htons(ETH_P_IP), - .iph.ihl = 5, - .iph.protocol = IPPROTO_TCP, - .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), - .tcp.urg_ptr = 123, - .tcp.doff = 5, -}; - -struct ipv6_packet pkt_v6 = { - .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), - .iph.nexthdr = IPPROTO_TCP, - .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), - .tcp.urg_ptr = 123, - .tcp.doff = 5, -}; - int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { struct bpf_map *map; @@ -351,25 +334,13 @@ int extract_build_id(char *build_id, size_t size) len = size; memcpy(build_id, line, len); build_id[len] = '\0'; + free(line); return 0; err: fclose(fp); return -1; } -void *spin_lock_thread(void *arg) -{ - __u32 duration, retval; - int err, prog_fd = *(u32 *) arg; - - err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); - pthread_exit(arg); -} - /* extern declarations for test funcs */ #define DEFINE_TEST(name) extern void test_##name(void); #include <prog_tests/tests.h> @@ -420,6 +391,18 @@ static int libbpf_print_fn(enum libbpf_print_level level, return 0; } +static void free_str_set(const struct str_set *set) +{ + int i; + + if (!set) + return; + + for (i = 0; i < set->cnt; i++) + free((void *)set->strs[i]); + free(set->strs); +} + static int parse_str_list(const char *s, struct str_set *set) { char *input, *state = NULL, *next, **tmp, **strs = NULL; @@ -455,67 +438,6 @@ err: return -ENOMEM; } -int parse_num_list(const char *s, struct test_selector *sel) -{ - int i, set_len = 0, new_len, num, start = 0, end = -1; - bool *set = NULL, *tmp, parsing_end = false; - char *next; - - while (s[0]) { - errno = 0; - num = strtol(s, &next, 10); - if (errno) - return -errno; - - if (parsing_end) - end = num; - else - start = num; - - if (!parsing_end && *next == '-') { - s = next + 1; - parsing_end = true; - continue; - } else if (*next == ',') { - parsing_end = false; - s = next + 1; - end = num; - } else if (*next == '\0') { - parsing_end = false; - s = next; - end = num; - } else { - return -EINVAL; - } - - if (start > end) - return -EINVAL; - - if (end + 1 > set_len) { - new_len = end + 1; - tmp = realloc(set, new_len); - if (!tmp) { - free(set); - return -ENOMEM; - } - for (i = set_len; i < start; i++) - tmp[i] = false; - set = tmp; - set_len = new_len; - } - for (i = start; i <= end; i++) - set[i] = true; - } - - if (!set) - return -EINVAL; - - sel->num_set = set; - sel->num_set_len = set_len; - - return 0; -} - extern int extra_prog_load_log_flags; static error_t parse_arg(int key, char *arg, struct argp_state *state) @@ -529,13 +451,15 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) if (subtest_str) { *subtest_str = '\0'; if (parse_num_list(subtest_str + 1, - &env->subtest_selector)) { + &env->subtest_selector.num_set, + &env->subtest_selector.num_set_len)) { fprintf(stderr, "Failed to parse subtest numbers.\n"); return -EINVAL; } } - if (parse_num_list(arg, &env->test_selector)) { + if (parse_num_list(arg, &env->test_selector.num_set, + &env->test_selector.num_set_len)) { fprintf(stderr, "Failed to parse test numbers.\n"); return -EINVAL; } @@ -756,11 +680,11 @@ int main(int argc, char **argv) fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); - free(env.test_selector.blacklist.strs); - free(env.test_selector.whitelist.strs); + free_str_set(&env.test_selector.blacklist); + free_str_set(&env.test_selector.whitelist); free(env.test_selector.num_set); - free(env.subtest_selector.blacklist.strs); - free(env.subtest_selector.whitelist.strs); + free_str_set(&env.subtest_selector.blacklist); + free_str_set(&env.subtest_selector.whitelist); free(env.subtest_selector.num_set); return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index f4aff6b8284b..f4503c926aca 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -37,6 +37,7 @@ typedef __u16 __sum16; #include "bpf_util.h" #include <bpf/bpf_endian.h> #include "trace_helpers.h" +#include "testing_helpers.h" #include "flow_dissector_load.h" enum verbosity { @@ -87,23 +88,12 @@ extern void test__skip(void); extern void test__fail(void); extern int test__join_cgroup(const char *path); -#define MAGIC_BYTES 123 - -/* ipv4 test vector */ -struct ipv4_packet { - struct ethhdr eth; - struct iphdr iph; - struct tcphdr tcp; -} __packed; -extern struct ipv4_packet pkt_v4; - -/* ipv6 test vector */ -struct ipv6_packet { - struct ethhdr eth; - struct ipv6hdr iph; - struct tcphdr tcp; -} __packed; -extern struct ipv6_packet pkt_v6; +#define PRINT_FAIL(format...) \ + ({ \ + test__fail(); \ + fprintf(stdout, "%s:FAIL:%d ", __func__, __LINE__); \ + fprintf(stdout, ##format); \ + }) #define _CHECK(condition, tag, duration, format...) ({ \ int __ret = !!(condition); \ @@ -136,10 +126,6 @@ extern struct ipv6_packet pkt_v6; #define CHECK_ATTR(condition, tag, format...) \ _CHECK(condition, tag, tattr.duration, format) -#define MAGIC_VAL 0x1234 -#define NUM_ITER 100000 -#define VIP_NUM 5 - static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -149,7 +135,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); int extract_build_id(char *build_id, size_t size); -void *spin_lock_thread(void *arg); #ifdef __x86_64__ #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 61fd95b89af8..0358814c67dc 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -677,7 +677,7 @@ static int bind4_prog_load(const struct sock_addr_test *test) uint8_t u4_addr8[4]; uint16_t u4_addr16[2]; uint32_t u4_addr32; - } ip4; + } ip4, port; struct sockaddr_in addr4_rw; if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) { @@ -685,6 +685,8 @@ static int bind4_prog_load(const struct sock_addr_test *test) return -1; } + port.u4_addr32 = htons(SERV4_PORT); + if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT, (struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1) return -1; @@ -696,49 +698,65 @@ static int bind4_prog_load(const struct sock_addr_test *test) /* if (sk.family == AF_INET && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32), /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, type)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1), BPF_JMP_A(1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28), /* 1st_byte_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26), /* 2nd_byte_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4) + 1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24), /* 3rd_byte_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4) + 2), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22), /* 4th_byte_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4) + 3), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20), /* 1st_half_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18), /* 2nd_half_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4) + 2), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16), - /* whole_user_ip4 == expected) { */ + /* whole_user_ip4 == expected && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */ + BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12), + + /* 1st_byte_of_user_port == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10), + + /* 1st_half_of_user_port == expected && */ + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8), + + /* user_port == expected) { */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_port)), + BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4), /* user_ip4 = addr4_rw.sin_addr */ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 779e11da979c..37695fc8096a 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -54,7 +54,7 @@ static void running_handler(int a); #define S1_PORT 10000 #define S2_PORT 10001 -#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o" +#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o" #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o" #define CG_PATH "/sockmap" @@ -63,14 +63,12 @@ int s1, s2, c1, c2, p1, p2; int test_cnt; int passed; int failed; -int map_fd[8]; -struct bpf_map *maps[8]; +int map_fd[9]; +struct bpf_map *maps[9]; int prog_fd[11]; int txmsg_pass; -int txmsg_noisy; int txmsg_redir; -int txmsg_redir_noisy; int txmsg_drop; int txmsg_apply; int txmsg_cork; @@ -81,7 +79,10 @@ int txmsg_end_push; int txmsg_start_pop; int txmsg_pop; int txmsg_ingress; -int txmsg_skb; +int txmsg_redir_skb; +int txmsg_ktls_skb; +int txmsg_ktls_skb_drop; +int txmsg_ktls_skb_redir; int ktls; int peek_flag; @@ -89,15 +90,13 @@ static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, {"cgroup", required_argument, NULL, 'c' }, {"rate", required_argument, NULL, 'r' }, - {"verbose", no_argument, NULL, 'v' }, + {"verbose", optional_argument, NULL, 'v' }, {"iov_count", required_argument, NULL, 'i' }, {"length", required_argument, NULL, 'l' }, {"test", required_argument, NULL, 't' }, {"data_test", no_argument, NULL, 'd' }, {"txmsg", no_argument, &txmsg_pass, 1 }, - {"txmsg_noisy", no_argument, &txmsg_noisy, 1 }, {"txmsg_redir", no_argument, &txmsg_redir, 1 }, - {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1}, {"txmsg_drop", no_argument, &txmsg_drop, 1 }, {"txmsg_apply", required_argument, NULL, 'a'}, {"txmsg_cork", required_argument, NULL, 'k'}, @@ -108,12 +107,108 @@ static const struct option long_options[] = { {"txmsg_start_pop", required_argument, NULL, 'w'}, {"txmsg_pop", required_argument, NULL, 'x'}, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, - {"txmsg_skb", no_argument, &txmsg_skb, 1 }, + {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, {"peek", no_argument, &peek_flag, 1 }, + {"whitelist", required_argument, NULL, 'n' }, + {"blacklist", required_argument, NULL, 'b' }, {0, 0, NULL, 0 } }; +struct test_env { + const char *type; + const char *subtest; + const char *prepend; + + int test_num; + int subtest_num; + + int succ_cnt; + int fail_cnt; + int fail_last; +}; + +struct test_env env; + +struct sockmap_options { + int verbose; + bool base; + bool sendpage; + bool data_test; + bool drop_expected; + int iov_count; + int iov_length; + int rate; + char *map; + char *whitelist; + char *blacklist; + char *prepend; +}; + +struct _test { + char *title; + void (*tester)(int cg_fd, struct sockmap_options *opt); +}; + +static void test_start(void) +{ + env.subtest_num++; +} + +static void test_fail(void) +{ + env.fail_cnt++; +} + +static void test_pass(void) +{ + env.succ_cnt++; +} + +static void test_reset(void) +{ + txmsg_start = txmsg_end = 0; + txmsg_start_pop = txmsg_pop = 0; + txmsg_start_push = txmsg_end_push = 0; + txmsg_pass = txmsg_drop = txmsg_redir = 0; + txmsg_apply = txmsg_cork = 0; + txmsg_ingress = txmsg_redir_skb = 0; + txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; +} + +static int test_start_subtest(const struct _test *t, struct sockmap_options *o) +{ + env.type = o->map; + env.subtest = t->title; + env.prepend = o->prepend; + env.test_num++; + env.subtest_num = 0; + env.fail_last = env.fail_cnt; + test_reset(); + return 0; +} + +static void test_end_subtest(void) +{ + int error = env.fail_cnt - env.fail_last; + int type = strcmp(env.type, BPF_SOCKMAP_FILENAME); + + if (!error) + test_pass(); + + fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n", + env.test_num, env.subtest_num, + !type ? "sockmap" : "sockhash", + env.prepend ? : "", + env.subtest, error ? "FAIL" : "OK"); +} + +static void test_print_results(void) +{ + fprintf(stdout, "Pass: %d Fail: %d\n", + env.succ_cnt, env.fail_cnt); +} + static void usage(char *argv[]) { int i; @@ -296,7 +391,7 @@ static int sockmap_init_sockets(int verbose) return errno; } - if (verbose) { + if (verbose > 1) { printf("connected sockets: c1 <-> p1, c2 <-> p2\n"); printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n", c1, s1, c2, s2); @@ -311,17 +406,6 @@ struct msg_stats { struct timespec end; }; -struct sockmap_options { - int verbose; - bool base; - bool sendpage; - bool data_test; - bool drop_expected; - int iov_count; - int iov_length; - int rate; -}; - static int msg_loop_sendpage(int fd, int iov_length, int cnt, struct msg_stats *s, struct sockmap_options *opt) @@ -345,14 +429,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->start); for (i = 0; i < cnt; i++) { - int sent = sendfile(fd, fp, NULL, iov_length); + int sent; + + errno = 0; + sent = sendfile(fd, fp, NULL, iov_length); if (!drop && sent < 0) { - perror("send loop error"); + perror("sendpage loop error"); fclose(file); return sent; } else if (drop && sent >= 0) { - printf("sendpage loop error expected: %i\n", sent); + printf("sendpage loop error expected: %i errno %i\n", + sent, errno); fclose(file); return -EIO; } @@ -418,14 +506,41 @@ unwind_iov: static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) { - int i, j, bytes_cnt = 0; + int i, j = 0, bytes_cnt = 0; unsigned char k = 0; for (i = 0; i < msg->msg_iovlen; i++) { unsigned char *d = msg->msg_iov[i].iov_base; - for (j = 0; - j < msg->msg_iov[i].iov_len && size; j++) { + /* Special case test for skb ingress + ktls */ + if (i == 0 && txmsg_ktls_skb) { + if (msg->msg_iov[i].iov_len < 4) + return -EIO; + if (txmsg_ktls_skb_redir) { + if (memcmp(&d[13], "PASS", 4) != 0) { + fprintf(stderr, + "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]); + return -EIO; + } + d[13] = 0; + d[14] = 1; + d[15] = 2; + d[16] = 3; + j = 13; + } else if (txmsg_ktls_skb) { + if (memcmp(d, "PASS", 4) != 0) { + fprintf(stderr, + "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]); + return -EIO; + } + d[0] = 0; + d[1] = 1; + d[2] = 2; + d[3] = 3; + } + } + + for (; j < msg->msg_iov[i].iov_len && size; j++) { if (d[j] != k++) { fprintf(stderr, "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", @@ -464,13 +579,18 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, if (tx) { clock_gettime(CLOCK_MONOTONIC, &s->start); for (i = 0; i < cnt; i++) { - int sent = sendmsg(fd, &msg, flags); + int sent; + + errno = 0; + sent = sendmsg(fd, &msg, flags); if (!drop && sent < 0) { - perror("send loop error"); + perror("sendmsg loop error"); goto out_errno; } else if (drop && sent >= 0) { - printf("send loop error expected: %i\n", sent); + fprintf(stderr, + "sendmsg loop error expected: %i errno %i\n", + sent, errno); errno = -EIO; goto out_errno; } @@ -497,9 +617,10 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, * paths. */ total_bytes = (float)iov_count * (float)iov_length * (float)cnt; - txmsg_pop_total = txmsg_pop; if (txmsg_apply) - txmsg_pop_total *= (total_bytes / txmsg_apply); + txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply); + else + txmsg_pop_total = txmsg_pop * cnt; total_bytes -= txmsg_pop_total; err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) @@ -633,14 +754,18 @@ static int sendmsg_test(struct sockmap_options *opt) rxpid = fork(); if (rxpid == 0) { - if (opt->drop_expected) - exit(0); + iov_buf -= (txmsg_pop - txmsg_start_pop + 1); + if (opt->drop_expected || txmsg_ktls_skb_drop) + _exit(0); + + if (!iov_buf) /* zero bytes sent case */ + _exit(0); if (opt->sendpage) iov_count = 1; err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false, opt); - if (opt->verbose) + if (opt->verbose > 1) fprintf(stderr, "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); @@ -648,7 +773,7 @@ static int sendmsg_test(struct sockmap_options *opt) sent_Bps = sentBps(s); recvd_Bps = recvdBps(s); } - if (opt->verbose) + if (opt->verbose > 1) fprintf(stdout, "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n", s.bytes_sent, sent_Bps, sent_Bps/giga, @@ -678,7 +803,7 @@ static int sendmsg_test(struct sockmap_options *opt) sent_Bps = sentBps(s); recvd_Bps = recvdBps(s); } - if (opt->verbose) + if (opt->verbose > 1) fprintf(stdout, "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n", s.bytes_sent, sent_Bps, sent_Bps/giga, @@ -694,14 +819,14 @@ static int sendmsg_test(struct sockmap_options *opt) if (WIFEXITED(rx_status)) { err = WEXITSTATUS(rx_status); if (err) { - fprintf(stderr, "rx thread exited with err %d. ", err); + fprintf(stderr, "rx thread exited with err %d.\n", err); goto out; } } if (WIFEXITED(tx_status)) { err = WEXITSTATUS(tx_status); if (err) - fprintf(stderr, "tx thread exited with err %d. ", err); + fprintf(stderr, "tx thread exited with err %d.\n", err); } out: return err; @@ -783,6 +908,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt) } enum { + SELFTESTS, PING_PONG, SENDMSG, BASE, @@ -816,8 +942,28 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) return err; } + /* Attach programs to TLS sockmap */ + if (txmsg_ktls_skb) { + err = bpf_prog_attach(prog_fd[0], map_fd[8], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[8], err, strerror(errno)); + return err; + } + + err = bpf_prog_attach(prog_fd[2], map_fd[8], + BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n", + err, strerror(errno)); + return err; + } + } + /* Attach to cgroups */ - err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0); if (err) { fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n", err, strerror(errno)); @@ -833,19 +979,14 @@ run: /* Attach txmsg program to sockmap */ if (txmsg_pass) - tx_prog_fd = prog_fd[3]; - else if (txmsg_noisy) tx_prog_fd = prog_fd[4]; else if (txmsg_redir) tx_prog_fd = prog_fd[5]; - else if (txmsg_redir_noisy) - tx_prog_fd = prog_fd[6]; - else if (txmsg_drop) - tx_prog_fd = prog_fd[9]; - /* apply and cork must be last */ else if (txmsg_apply) - tx_prog_fd = prog_fd[7]; + tx_prog_fd = prog_fd[6]; else if (txmsg_cork) + tx_prog_fd = prog_fd[7]; + else if (txmsg_drop) tx_prog_fd = prog_fd[8]; else tx_prog_fd = 0; @@ -870,7 +1011,7 @@ run: goto out; } - if (txmsg_redir || txmsg_redir_noisy) + if (txmsg_redir) redir_fd = c2; else redir_fd = c1; @@ -1018,7 +1159,35 @@ run: } } - if (txmsg_skb) { + if (txmsg_ktls_skb) { + int ingress = BPF_F_INGRESS; + + i = 0; + err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n", + err, strerror(errno)); + } + + if (txmsg_ktls_skb_redir) { + i = 1; + err = bpf_map_update_elem(map_fd[7], + &i, &ingress, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n", + err, strerror(errno)); + } + } + + if (txmsg_ktls_skb_drop) { + i = 1; + err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY); + } + } + + if (txmsg_redir_skb) { int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1; int ingress = BPF_F_INGRESS; @@ -1033,8 +1202,7 @@ run: } i = 3; - err = bpf_map_update_elem(map_fd[0], - &i, &skb_fd, BPF_ANY); + err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY); if (err) { fprintf(stderr, "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n", @@ -1068,9 +1236,12 @@ run: fprintf(stderr, "unknown test\n"); out: /* Detatch and zero all the maps */ - bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS); + bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS); bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER); bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT); + bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER); + bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT); + if (tx_prog_fd >= 0) bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT); @@ -1112,12 +1283,8 @@ static void test_options(char *options) if (txmsg_pass) strncat(options, "pass,", OPTSTRING); - if (txmsg_noisy) - strncat(options, "pass_noisy,", OPTSTRING); if (txmsg_redir) strncat(options, "redir,", OPTSTRING); - if (txmsg_redir_noisy) - strncat(options, "redir_noisy,", OPTSTRING); if (txmsg_drop) strncat(options, "drop,", OPTSTRING); if (txmsg_apply) { @@ -1143,8 +1310,10 @@ static void test_options(char *options) } if (txmsg_ingress) strncat(options, "ingress,", OPTSTRING); - if (txmsg_skb) - strncat(options, "skb,", OPTSTRING); + if (txmsg_redir_skb) + strncat(options, "redir_skb,", OPTSTRING); + if (txmsg_ktls_skb) + strncat(options, "ktls_skb,", OPTSTRING); if (ktls) strncat(options, "ktls,", OPTSTRING); if (peek_flag) @@ -1168,416 +1337,317 @@ static int __test_exec(int cgrp, int test, struct sockmap_options *opt) test_options(options); - fprintf(stdout, - "[TEST %i]: (%i, %i, %i, %s, %s): ", - test_cnt, opt->rate, opt->iov_count, opt->iov_length, - test_to_str(test), options); - fflush(stdout); + if (opt->verbose) { + fprintf(stdout, + " [TEST %i]: (%i, %i, %i, %s, %s): ", + test_cnt, opt->rate, opt->iov_count, opt->iov_length, + test_to_str(test), options); + fflush(stdout); + } err = run_options(opt, cgrp, test); - fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED"); + if (opt->verbose) + fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED"); test_cnt++; !err ? passed++ : failed++; free(options); return err; } -static int test_exec(int cgrp, struct sockmap_options *opt) -{ - int err = __test_exec(cgrp, SENDMSG, opt); - - if (err) - goto out; - - err = __test_exec(cgrp, SENDPAGE, opt); -out: - return err; -} - -static int test_loop(int cgrp) -{ - struct sockmap_options opt; - - int err, i, l, r; - - opt.verbose = 0; - opt.base = false; - opt.sendpage = false; - opt.data_test = false; - opt.drop_expected = false; - opt.iov_count = 0; - opt.iov_length = 0; - opt.rate = 0; - - r = 1; - for (i = 1; i < 100; i += 33) { - for (l = 1; l < 100; l += 33) { - opt.rate = r; - opt.iov_count = i; - opt.iov_length = l; - err = test_exec(cgrp, &opt); - if (err) - goto out; - } - } - sched_yield(); -out: - return err; -} - -static int test_txmsg(int cgrp) +static void test_exec(int cgrp, struct sockmap_options *opt) { + int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME); int err; - txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; - txmsg_apply = txmsg_cork = 0; - txmsg_ingress = txmsg_skb = 0; - - txmsg_pass = 1; - err = test_loop(cgrp); - txmsg_pass = 0; - if (err) - goto out; - - txmsg_redir = 1; - err = test_loop(cgrp); - txmsg_redir = 0; - if (err) - goto out; - - txmsg_drop = 1; - err = test_loop(cgrp); - txmsg_drop = 0; - if (err) - goto out; - - txmsg_redir = 1; - txmsg_ingress = 1; - err = test_loop(cgrp); - txmsg_redir = 0; - txmsg_ingress = 0; - if (err) - goto out; -out: - txmsg_pass = 0; - txmsg_redir = 0; - txmsg_drop = 0; - return err; + if (type == 0) { + test_start(); + err = __test_exec(cgrp, SENDMSG, opt); + if (err) + test_fail(); + } else { + test_start(); + err = __test_exec(cgrp, SENDPAGE, opt); + if (err) + test_fail(); + } } -static int test_send(struct sockmap_options *opt, int cgrp) +static void test_send_one(struct sockmap_options *opt, int cgrp) { - int err; - opt->iov_length = 1; opt->iov_count = 1; opt->rate = 1; - err = test_exec(cgrp, opt); - if (err) - goto out; + test_exec(cgrp, opt); opt->iov_length = 1; opt->iov_count = 1024; opt->rate = 1; - err = test_exec(cgrp, opt); - if (err) - goto out; + test_exec(cgrp, opt); opt->iov_length = 1024; opt->iov_count = 1; opt->rate = 1; - err = test_exec(cgrp, opt); - if (err) - goto out; + test_exec(cgrp, opt); - opt->iov_length = 1; +} + +static void test_send_many(struct sockmap_options *opt, int cgrp) +{ + opt->iov_length = 3; opt->iov_count = 1; opt->rate = 512; - err = test_exec(cgrp, opt); - if (err) - goto out; - - opt->iov_length = 256; - opt->iov_count = 1024; - opt->rate = 2; - err = test_exec(cgrp, opt); - if (err) - goto out; + test_exec(cgrp, opt); opt->rate = 100; opt->iov_count = 1; opt->iov_length = 5; - err = test_exec(cgrp, opt); - if (err) - goto out; -out: - sched_yield(); - return err; + test_exec(cgrp, opt); } -static int test_mixed(int cgrp) +static void test_send_large(struct sockmap_options *opt, int cgrp) { - struct sockmap_options opt = {0}; - int err; + opt->iov_length = 256; + opt->iov_count = 1024; + opt->rate = 2; + test_exec(cgrp, opt); +} - txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; - txmsg_apply = txmsg_cork = 0; - txmsg_start = txmsg_end = 0; - txmsg_start_push = txmsg_end_push = 0; - txmsg_start_pop = txmsg_pop = 0; +static void test_send(struct sockmap_options *opt, int cgrp) +{ + test_send_one(opt, cgrp); + test_send_many(opt, cgrp); + test_send_large(opt, cgrp); + sched_yield(); +} +static void test_txmsg_pass(int cgrp, struct sockmap_options *opt) +{ /* Test small and large iov_count values with pass/redir/apply/cork */ txmsg_pass = 1; - txmsg_redir = 0; - txmsg_apply = 1; - txmsg_cork = 0; - err = test_send(&opt, cgrp); - if (err) - goto out; + test_send(opt, cgrp); +} - txmsg_pass = 1; - txmsg_redir = 0; - txmsg_apply = 0; - txmsg_cork = 1; - err = test_send(&opt, cgrp); - if (err) - goto out; +static void test_txmsg_redir(int cgrp, struct sockmap_options *opt) +{ + txmsg_redir = 1; + test_send(opt, cgrp); +} - txmsg_pass = 1; - txmsg_redir = 0; - txmsg_apply = 1; - txmsg_cork = 1; - err = test_send(&opt, cgrp); - if (err) - goto out; +static void test_txmsg_drop(int cgrp, struct sockmap_options *opt) +{ + txmsg_drop = 1; + test_send(opt, cgrp); +} - txmsg_pass = 1; - txmsg_redir = 0; - txmsg_apply = 1024; - txmsg_cork = 0; - err = test_send(&opt, cgrp); - if (err) - goto out; +static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt) +{ + txmsg_pass = txmsg_drop = 0; + txmsg_ingress = txmsg_redir = 1; + test_send(opt, cgrp); +} - txmsg_pass = 1; - txmsg_redir = 0; - txmsg_apply = 0; - txmsg_cork = 1024; - err = test_send(&opt, cgrp); - if (err) - goto out; +static void test_txmsg_skb(int cgrp, struct sockmap_options *opt) +{ + bool data = opt->data_test; + int k = ktls; - txmsg_pass = 1; - txmsg_redir = 0; - txmsg_apply = 1024; - txmsg_cork = 1024; - err = test_send(&opt, cgrp); - if (err) - goto out; + opt->data_test = true; + ktls = 1; + txmsg_pass = txmsg_drop = 0; + txmsg_ingress = txmsg_redir = 0; + txmsg_ktls_skb = 1; txmsg_pass = 1; - txmsg_redir = 0; - txmsg_cork = 4096; - txmsg_apply = 4096; - err = test_send(&opt, cgrp); - if (err) - goto out; - txmsg_pass = 0; - txmsg_redir = 1; - txmsg_apply = 1; - txmsg_cork = 0; - err = test_send(&opt, cgrp); - if (err) - goto out; + /* Using data verification so ensure iov layout is + * expected from test receiver side. e.g. has enough + * bytes to write test code. + */ + opt->iov_length = 100; + opt->iov_count = 1; + opt->rate = 1; + test_exec(cgrp, opt); - txmsg_pass = 0; - txmsg_redir = 1; - txmsg_apply = 0; - txmsg_cork = 1; - err = test_send(&opt, cgrp); - if (err) - goto out; + txmsg_ktls_skb_drop = 1; + test_exec(cgrp, opt); - txmsg_pass = 0; - txmsg_redir = 1; - txmsg_apply = 1024; - txmsg_cork = 0; - err = test_send(&opt, cgrp); - if (err) - goto out; + txmsg_ktls_skb_drop = 0; + txmsg_ktls_skb_redir = 1; + test_exec(cgrp, opt); - txmsg_pass = 0; - txmsg_redir = 1; - txmsg_apply = 0; - txmsg_cork = 1024; - err = test_send(&opt, cgrp); - if (err) - goto out; + opt->data_test = data; + ktls = k; +} + + +/* Test cork with hung data. This tests poor usage patterns where + * cork can leave data on the ring if user program is buggy and + * doesn't flush them somehow. They do take some time however + * because they wait for a timeout. Test pass, redir and cork with + * apply logic. Use cork size of 4097 with send_large to avoid + * aligning cork size with send size. + */ +static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt) +{ + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_cork = 4097; + txmsg_apply = 4097; + test_send_large(opt, cgrp); txmsg_pass = 0; txmsg_redir = 1; - txmsg_apply = 1024; - txmsg_cork = 1024; - err = test_send(&opt, cgrp); - if (err) - goto out; + txmsg_apply = 0; + txmsg_cork = 4097; + test_send_large(opt, cgrp); txmsg_pass = 0; txmsg_redir = 1; - txmsg_cork = 4096; - txmsg_apply = 4096; - err = test_send(&opt, cgrp); - if (err) - goto out; -out: - return err; + txmsg_apply = 4097; + txmsg_cork = 4097; + test_send_large(opt, cgrp); } -static int test_start_end(int cgrp) +static void test_txmsg_pull(int cgrp, struct sockmap_options *opt) { - struct sockmap_options opt = {0}; - int err, i; + /* Test basic start/end */ + txmsg_start = 1; + txmsg_end = 2; + test_send(opt, cgrp); + + /* Test >4k pull */ + txmsg_start = 4096; + txmsg_end = 9182; + test_send_large(opt, cgrp); - /* Test basic start/end with lots of iov_count and iov_lengths */ + /* Test pull + redirect */ + txmsg_redir = 0; txmsg_start = 1; txmsg_end = 2; - txmsg_start_push = 1; - txmsg_end_push = 2; - txmsg_start_pop = 1; - txmsg_pop = 1; - err = test_txmsg(cgrp); - if (err) - goto out; + test_send(opt, cgrp); - /* Cut a byte of pushed data but leave reamining in place */ + /* Test pull + cork */ + txmsg_redir = 0; + txmsg_cork = 512; txmsg_start = 1; txmsg_end = 2; - txmsg_start_push = 1; - txmsg_end_push = 3; + test_send_many(opt, cgrp); + + /* Test pull + cork + redirect */ + txmsg_redir = 1; + txmsg_cork = 512; + txmsg_start = 1; + txmsg_end = 2; + test_send_many(opt, cgrp); +} + +static void test_txmsg_pop(int cgrp, struct sockmap_options *opt) +{ + /* Test basic pop */ txmsg_start_pop = 1; - txmsg_pop = 1; - err = test_txmsg(cgrp); - if (err) - goto out; + txmsg_pop = 2; + test_send_many(opt, cgrp); - /* Test start/end with cork */ - opt.rate = 16; - opt.iov_count = 1; - opt.iov_length = 100; - txmsg_cork = 1600; - - txmsg_start_pop = 0; - txmsg_pop = 0; - - for (i = 99; i <= 1600; i += 500) { - txmsg_start = 0; - txmsg_end = i; - txmsg_start_push = 0; - txmsg_end_push = i; - err = test_exec(cgrp, &opt); - if (err) - goto out; - } + /* Test pop with >4k */ + txmsg_start_pop = 4096; + txmsg_pop = 4096; + test_send_large(opt, cgrp); - /* Test pop data in middle of cork */ - for (i = 99; i <= 1600; i += 500) { - txmsg_start_pop = 10; - txmsg_pop = i; - err = test_exec(cgrp, &opt); - if (err) - goto out; - } - txmsg_start_pop = 0; - txmsg_pop = 0; - - /* Test start/end with cork but pull data in middle */ - for (i = 199; i <= 1600; i += 500) { - txmsg_start = 100; - txmsg_end = i; - txmsg_start_push = 100; - txmsg_end_push = i; - err = test_exec(cgrp, &opt); - if (err) - goto out; - } + /* Test pop + redirect */ + txmsg_redir = 1; + txmsg_start_pop = 1; + txmsg_pop = 2; + test_send_many(opt, cgrp); - /* Test start/end with cork pulling last sg entry */ - txmsg_start = 1500; - txmsg_end = 1600; - txmsg_start_push = 1500; - txmsg_end_push = 1600; - err = test_exec(cgrp, &opt); - if (err) - goto out; + /* Test pop + cork */ + txmsg_redir = 0; + txmsg_cork = 512; + txmsg_start_pop = 1; + txmsg_pop = 2; + test_send_many(opt, cgrp); - /* Test pop with cork pulling last sg entry */ - txmsg_start_pop = 1500; - txmsg_pop = 1600; - err = test_exec(cgrp, &opt); - if (err) - goto out; - txmsg_start_pop = 0; - txmsg_pop = 0; - - /* Test start/end pull of single byte in last page */ - txmsg_start = 1111; - txmsg_end = 1112; - txmsg_start_push = 1111; - txmsg_end_push = 1112; - err = test_exec(cgrp, &opt); - if (err) - goto out; + /* Test pop + redirect + cork */ + txmsg_redir = 1; + txmsg_cork = 4; + txmsg_start_pop = 1; + txmsg_pop = 2; + test_send_many(opt, cgrp); +} - /* Test pop of single byte in last page */ - txmsg_start_pop = 1111; - txmsg_pop = 1112; - err = test_exec(cgrp, &opt); - if (err) - goto out; +static void test_txmsg_push(int cgrp, struct sockmap_options *opt) +{ + /* Test basic push */ + txmsg_start_push = 1; + txmsg_end_push = 1; + test_send(opt, cgrp); - /* Test start/end with end < start */ - txmsg_start = 1111; - txmsg_end = 0; - txmsg_start_push = 1111; - txmsg_end_push = 0; - err = test_exec(cgrp, &opt); - if (err) - goto out; + /* Test push 4kB >4k */ + txmsg_start_push = 4096; + txmsg_end_push = 4096; + test_send_large(opt, cgrp); - /* Test start/end with end > data */ - txmsg_start = 0; - txmsg_end = 1601; - txmsg_start_push = 0; - txmsg_end_push = 1601; - err = test_exec(cgrp, &opt); - if (err) - goto out; + /* Test push + redirect */ + txmsg_redir = 1; + txmsg_start_push = 1; + txmsg_end_push = 2; + test_send_many(opt, cgrp); - /* Test start/end with start > data */ - txmsg_start = 1601; - txmsg_end = 1600; - txmsg_start_push = 1601; - txmsg_end_push = 1600; - err = test_exec(cgrp, &opt); - if (err) - goto out; + /* Test push + cork */ + txmsg_redir = 0; + txmsg_cork = 512; + txmsg_start_push = 1; + txmsg_end_push = 2; + test_send_many(opt, cgrp); +} - /* Test pop with start > data */ - txmsg_start_pop = 1601; - txmsg_pop = 1; - err = test_exec(cgrp, &opt); - if (err) - goto out; +static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt) +{ + txmsg_start_push = 1; + txmsg_end_push = 10; + txmsg_start_pop = 5; + txmsg_pop = 4; + test_send_large(opt, cgrp); +} - /* Test pop with pop range > data */ - txmsg_start_pop = 1599; - txmsg_pop = 10; - err = test_exec(cgrp, &opt); -out: - txmsg_start = 0; - txmsg_end = 0; - sched_yield(); - return err; +static void test_txmsg_apply(int cgrp, struct sockmap_options *opt) +{ + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1; + txmsg_cork = 0; + test_send_one(opt, cgrp); + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 1; + txmsg_cork = 0; + test_send_one(opt, cgrp); + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1024; + txmsg_cork = 0; + test_send_large(opt, cgrp); + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 1024; + txmsg_cork = 0; + test_send_large(opt, cgrp); +} + +static void test_txmsg_cork(int cgrp, struct sockmap_options *opt) +{ + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 0; + txmsg_cork = 1; + test_send(opt, cgrp); + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1; + txmsg_cork = 1; + test_send(opt, cgrp); } char *map_names[] = { @@ -1589,11 +1659,13 @@ char *map_names[] = { "sock_bytes", "sock_redir_flags", "sock_skb_opts", + "tls_sock_map", }; int prog_attach_type[] = { BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_SK_SKB_STREAM_VERDICT, BPF_CGROUP_SOCK_OPS, BPF_SK_MSG_VERDICT, BPF_SK_MSG_VERDICT, @@ -1607,6 +1679,7 @@ int prog_attach_type[] = { int prog_type[] = { BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_MSG, BPF_PROG_TYPE_SK_MSG, @@ -1662,73 +1735,117 @@ static int populate_progs(char *bpf_file) return 0; } -static int __test_suite(int cg_fd, char *bpf_file) +struct _test test[] = { + {"txmsg test passthrough", test_txmsg_pass}, + {"txmsg test redirect", test_txmsg_redir}, + {"txmsg test drop", test_txmsg_drop}, + {"txmsg test ingress redirect", test_txmsg_ingress_redir}, + {"txmsg test skb", test_txmsg_skb}, + {"txmsg test apply", test_txmsg_apply}, + {"txmsg test cork", test_txmsg_cork}, + {"txmsg test hanging corks", test_txmsg_cork_hangs}, + {"txmsg test push_data", test_txmsg_push}, + {"txmsg test pull-data", test_txmsg_pull}, + {"txmsg test pop-data", test_txmsg_pop}, + {"txmsg test push/pop data", test_txmsg_push_pop}, +}; + +static int check_whitelist(struct _test *t, struct sockmap_options *opt) { - int err, cleanup = cg_fd; + char *entry, *ptr; + + if (!opt->whitelist) + return 0; + ptr = strdup(opt->whitelist); + if (!ptr) + return -ENOMEM; + entry = strtok(ptr, ","); + while (entry) { + if ((opt->prepend && strstr(opt->prepend, entry) != 0) || + strstr(opt->map, entry) != 0 || + strstr(t->title, entry) != 0) + return 0; + entry = strtok(NULL, ","); + } + return -EINVAL; +} - err = populate_progs(bpf_file); +static int check_blacklist(struct _test *t, struct sockmap_options *opt) +{ + char *entry, *ptr; + + if (!opt->blacklist) + return -EINVAL; + ptr = strdup(opt->blacklist); + if (!ptr) + return -ENOMEM; + entry = strtok(ptr, ","); + while (entry) { + if ((opt->prepend && strstr(opt->prepend, entry) != 0) || + strstr(opt->map, entry) != 0 || + strstr(t->title, entry) != 0) + return 0; + entry = strtok(NULL, ","); + } + return -EINVAL; +} + +static int __test_selftests(int cg_fd, struct sockmap_options *opt) +{ + int i, err; + + err = populate_progs(opt->map); if (err < 0) { fprintf(stderr, "ERROR: (%i) load bpf failed\n", err); return err; } - if (cg_fd < 0) { - if (setup_cgroup_environment()) { - fprintf(stderr, "ERROR: cgroup env failed\n"); - return -EINVAL; - } + /* Tests basic commands and APIs */ + for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) { + struct _test t = test[i]; - cg_fd = create_and_get_cgroup(CG_PATH); - if (cg_fd < 0) { - fprintf(stderr, - "ERROR: (%i) open cg path failed: %s\n", - cg_fd, optarg); - return cg_fd; - } + if (check_whitelist(&t, opt) != 0) + continue; + if (check_blacklist(&t, opt) == 0) + continue; - if (join_cgroup(CG_PATH)) { - fprintf(stderr, "ERROR: failed to join cgroup\n"); - return -EINVAL; - } + test_start_subtest(&t, opt); + t.tester(cg_fd, opt); + test_end_subtest(); } - /* Tests basic commands and APIs with range of iov values */ - txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0; - err = test_txmsg(cg_fd); - if (err) - goto out; + return err; +} - /* Tests interesting combinations of APIs used together */ - err = test_mixed(cg_fd); - if (err) - goto out; +static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt) +{ + opt->map = BPF_SOCKMAP_FILENAME; + __test_selftests(cg_fd, opt); +} - /* Tests pull_data API using start/end API */ - err = test_start_end(cg_fd); - if (err) - goto out; +static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt) +{ + opt->map = BPF_SOCKHASH_FILENAME; + __test_selftests(cg_fd, opt); +} -out: - printf("Summary: %i PASSED %i FAILED\n", passed, failed); - if (cleanup < 0) { - cleanup_cgroup_environment(); - close(cg_fd); - } - return err; +static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt) +{ + opt->map = BPF_SOCKHASH_FILENAME; + opt->prepend = "ktls"; + ktls = 1; + __test_selftests(cg_fd, opt); + ktls = 0; } -static int test_suite(int cg_fd) +static int test_selftest(int cg_fd, struct sockmap_options *opt) { - int err; - err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME); - if (err) - goto out; - err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME); -out: - if (cg_fd > -1) - close(cg_fd); - return err; + test_selftests_sockmap(cg_fd, opt); + test_selftests_sockhash(cg_fd, opt); + test_selftests_ktls(cg_fd, opt); + test_print_results(); + return 0; } int main(int argc, char **argv) @@ -1737,12 +1854,10 @@ int main(int argc, char **argv) struct sockmap_options options = {0}; int opt, longindex, err, cg_fd = 0; char *bpf_file = BPF_SOCKMAP_FILENAME; - int test = PING_PONG; - - if (argc < 2) - return test_suite(-1); + int test = SELFTESTS; + bool cg_created = 0; - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:", + while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:", long_options, &longindex)) != -1) { switch (opt) { case 's': @@ -1783,6 +1898,8 @@ int main(int argc, char **argv) break; case 'v': options.verbose = 1; + if (optarg) + options.verbose = atoi(optarg); break; case 'i': iov_count = atoi(optarg); @@ -1809,6 +1926,15 @@ int main(int argc, char **argv) return -1; } break; + case 'n': + options.whitelist = strdup(optarg); + if (!options.whitelist) + return -ENOMEM; + break; + case 'b': + options.blacklist = strdup(optarg); + if (!options.blacklist) + return -ENOMEM; case 0: break; case 'h': @@ -1818,13 +1944,30 @@ int main(int argc, char **argv) } } - if (argc <= 3 && cg_fd) - return test_suite(cg_fd); - if (!cg_fd) { - fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n", - argv[0]); - return -1; + if (setup_cgroup_environment()) { + fprintf(stderr, "ERROR: cgroup env failed\n"); + return -EINVAL; + } + + cg_fd = create_and_get_cgroup(CG_PATH); + if (cg_fd < 0) { + fprintf(stderr, + "ERROR: (%i) open cg path failed: %s\n", + cg_fd, strerror(errno)); + return cg_fd; + } + + if (join_cgroup(CG_PATH)) { + fprintf(stderr, "ERROR: failed to join cgroup\n"); + return -EINVAL; + } + cg_created = 1; + } + + if (test == SELFTESTS) { + err = test_selftest(cg_fd, &options); + goto out; } err = populate_progs(bpf_file); @@ -1843,6 +1986,13 @@ int main(int argc, char **argv) options.rate = rate; err = run_options(&options, cg_fd, test); +out: + if (options.whitelist) + free(options.whitelist); + if (options.blacklist) + free(options.blacklist); + if (cg_created) + cleanup_cgroup_environment(); close(cg_fd); return err; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 87eaa49609a0..78a6bae56ea6 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -50,7 +50,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 19 +#define MAX_NR_MAPS 20 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -86,6 +86,7 @@ struct bpf_test { int fixup_map_array_small[MAX_FIXUPS]; int fixup_sk_storage_map[MAX_FIXUPS]; int fixup_map_event_output[MAX_FIXUPS]; + int fixup_map_reuseport_array[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t insn_processed; @@ -637,6 +638,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_array_small = test->fixup_map_array_small; int *fixup_sk_storage_map = test->fixup_sk_storage_map; int *fixup_map_event_output = test->fixup_map_event_output; + int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -806,12 +808,28 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_event_output++; } while (*fixup_map_event_output); } + if (*fixup_map_reuseport_array) { + map_fds[19] = __create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + sizeof(u32), sizeof(u64), 1, 0); + do { + prog[*fixup_map_reuseport_array].imm = map_fds[19]; + fixup_map_reuseport_array++; + } while (*fixup_map_reuseport_array); + } } +struct libcap { + struct __user_cap_header_struct hdr; + struct __user_cap_data_struct data[2]; +}; + static int set_admin(bool admin) { cap_t caps; - const cap_value_t cap_val = CAP_SYS_ADMIN; + /* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */ + const cap_value_t cap_net_admin = CAP_NET_ADMIN; + const cap_value_t cap_sys_admin = CAP_SYS_ADMIN; + struct libcap *cap; int ret = -1; caps = cap_get_proc(); @@ -819,11 +837,26 @@ static int set_admin(bool admin) perror("cap_get_proc"); return -1; } - if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val, + cap = (struct libcap *)caps; + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) { + perror("cap_set_flag clear admin"); + goto out; + } + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin, admin ? CAP_SET : CAP_CLEAR)) { - perror("cap_set_flag"); + perror("cap_set_flag set_or_clear net"); goto out; } + /* libcap is likely old and simply ignores CAP_BPF and CAP_PERFMON, + * so update effective bits manually + */ + if (admin) { + cap->data[1].effective |= 1 << (38 /* CAP_PERFMON */ - 32); + cap->data[1].effective |= 1 << (39 /* CAP_BPF */ - 32); + } else { + cap->data[1].effective &= ~(1 << (38 - 32)); + cap->data[1].effective &= ~(1 << (39 - 32)); + } if (cap_set_proc(caps)) { perror("cap_set_proc"); goto out; @@ -943,7 +976,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, attr.insns = prog; attr.insns_cnt = prog_len; attr.license = "GPL"; - attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4; + if (verbose) + attr.log_level = 1; + else if (expected_ret == VERBOSE_ACCEPT) + attr.log_level = 2; + else + attr.log_level = 4; attr.prog_flags = pflags; fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); @@ -1052,9 +1090,11 @@ fail_log: static bool is_admin(void) { + cap_flag_value_t net_priv = CAP_CLEAR; + bool perfmon_priv = false; + bool bpf_priv = false; + struct libcap *cap; cap_t caps; - cap_flag_value_t sysadmin = CAP_CLEAR; - const cap_value_t cap_val = CAP_SYS_ADMIN; #ifdef CAP_IS_SUPPORTED if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) { @@ -1067,11 +1107,14 @@ static bool is_admin(void) perror("cap_get_proc"); return false; } - if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin)) - perror("cap_get_flag"); + cap = (struct libcap *)caps; + bpf_priv = cap->data[1].effective & (1 << (39/* CAP_BPF */ - 32)); + perfmon_priv = cap->data[1].effective & (1 << (38/* CAP_PERFMON */ - 32)); + if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv)) + perror("cap_get_flag NET"); if (cap_free(caps)) perror("cap_free"); - return (sysadmin == CAP_SET); + return bpf_priv && perfmon_priv && net_priv == CAP_SET; } static void get_unpriv_disabled() diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c new file mode 100644 index 000000000000..0af6337a8962 --- /dev/null +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (C) 2020 Facebook, Inc. */ +#include <stdlib.h> +#include <errno.h> +#include "testing_helpers.h" + +int parse_num_list(const char *s, bool **num_set, int *num_set_len) +{ + int i, set_len = 0, new_len, num, start = 0, end = -1; + bool *set = NULL, *tmp, parsing_end = false; + char *next; + + while (s[0]) { + errno = 0; + num = strtol(s, &next, 10); + if (errno) + return -errno; + + if (parsing_end) + end = num; + else + start = num; + + if (!parsing_end && *next == '-') { + s = next + 1; + parsing_end = true; + continue; + } else if (*next == ',') { + parsing_end = false; + s = next + 1; + end = num; + } else if (*next == '\0') { + parsing_end = false; + s = next; + end = num; + } else { + return -EINVAL; + } + + if (start > end) + return -EINVAL; + + if (end + 1 > set_len) { + new_len = end + 1; + tmp = realloc(set, new_len); + if (!tmp) { + free(set); + return -ENOMEM; + } + for (i = set_len; i < start; i++) + tmp[i] = false; + set = tmp; + set_len = new_len; + } + for (i = start; i <= end; i++) + set[i] = true; + } + + if (!set) + return -EINVAL; + + *num_set = set; + *num_set_len = set_len; + + return 0; +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h new file mode 100644 index 000000000000..923b51762759 --- /dev/null +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (C) 2020 Facebook, Inc. */ +#include <stdbool.h> + +int parse_num_list(const char *s, bool **set, int *set_len); diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c index e0fad1548737..d781bc86e100 100644 --- a/tools/testing/selftests/bpf/verifier/and.c +++ b/tools/testing/selftests/bpf/verifier/and.c @@ -15,7 +15,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, - .errstr = "R0 max value is outside of the array range", + .errstr = "R0 max value is outside of the allowed memory range", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -44,7 +44,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, - .errstr = "R0 max value is outside of the array range", + .errstr = "R0 max value is outside of the allowed memory range", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index f3c33e128709..1c4b1939f5a8 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -117,7 +117,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, - .errstr = "R0 min value is outside of the array range", + .errstr = "R0 min value is outside of the allowed memory range", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -137,7 +137,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, - .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map", + .errstr = "R0 unbounded memory access, make sure to bounds check any such access", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 58f4aa593b1b..4d6645f2874c 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -20,7 +20,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "R0 max value is outside of the array range", + .errstr = "R0 max value is outside of the allowed memory range", .result = REJECT, }, { @@ -146,7 +146,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "R0 min value is outside of the array range", + .errstr = "R0 min value is outside of the allowed memory range", .result = REJECT }, { @@ -354,7 +354,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "R0 max value is outside of the array range", + .errstr = "R0 max value is outside of the allowed memory range", .result = REJECT }, { diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 2d752c4f8d9d..94258c6b5235 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -19,7 +19,7 @@ BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 1, @@ -105,7 +105,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_hash_8b = { 16 }, .result = REJECT, - .errstr = "R0 min value is outside of the array range", + .errstr = "R0 min value is outside of the allowed memory range", }, { "calls: overlapping caller/callee", @@ -315,7 +315,7 @@ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "allowed for root only", + .errstr_unpriv = "allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = POINTER_VALUE, @@ -346,7 +346,7 @@ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), BPF_EXIT_INSN(), }, - .errstr_unpriv = "allowed for root only", + .errstr_unpriv = "allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, @@ -397,7 +397,7 @@ BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .fixup_map_hash_48b = { 3 }, .result_unpriv = REJECT, .result = ACCEPT, @@ -1064,7 +1064,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "allowed for root only", + .errstr_unpriv = "allowed for", .result_unpriv = REJECT, .errstr = "R0 !read_ok", .result = REJECT, @@ -1977,7 +1977,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, }, @@ -2003,7 +2003,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .errstr = "!read_ok", .result = REJECT, }, @@ -2028,7 +2028,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .errstr = "!read_ok", .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c index 84446dfc7c1d..6c214c58e8d4 100644 --- a/tools/testing/selftests/bpf/verifier/const_or.c +++ b/tools/testing/selftests/bpf/verifier/const_or.c @@ -6,7 +6,7 @@ BPF_MOV64_IMM(BPF_REG_2, 34), BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -20,7 +20,7 @@ BPF_MOV64_IMM(BPF_REG_2, 34), BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .errstr = "invalid stack type R1 off=-48 access_size=58", @@ -36,7 +36,7 @@ BPF_MOV64_IMM(BPF_REG_4, 13), BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -51,7 +51,7 @@ BPF_MOV64_IMM(BPF_REG_4, 24), BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .errstr = "invalid stack type R1 off=-48 access_size=58", diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index 50a8a63be4ac..5cf361d8eb1c 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -85,7 +85,7 @@ BPF_MOV64_IMM(BPF_REG_0, 12), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, @@ -103,7 +103,7 @@ BPF_MOV64_IMM(BPF_REG_0, 12), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, @@ -121,7 +121,7 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, @@ -137,7 +137,7 @@ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, @@ -152,7 +152,7 @@ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr_unpriv = "function calls to other bpf functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c index b9fb28e8e224..988f46a1a4c7 100644 --- a/tools/testing/selftests/bpf/verifier/direct_value_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c @@ -68,7 +68,7 @@ }, .fixup_map_array_48b = { 1 }, .result = REJECT, - .errstr = "R1 min value is outside of the array range", + .errstr = "R1 min value is outside of the allowed memory range", }, { "direct map access, write test 7", @@ -220,7 +220,7 @@ }, .fixup_map_array_small = { 1 }, .result = REJECT, - .errstr = "R1 min value is outside of the array range", + .errstr = "R1 min value is outside of the allowed memory range", }, { "direct map access, write test 19", diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c index 130553e19eca..99f8f582c02b 100644 --- a/tools/testing/selftests/bpf/verifier/event_output.c +++ b/tools/testing/selftests/bpf/verifier/event_output.c @@ -92,3 +92,27 @@ .result = ACCEPT, .retval = 1, }, +{ + "perfevent for cgroup dev", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_CGROUP_DEVICE, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "perfevent for cgroup sysctl", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "perfevent for cgroup sockopt", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c index 67ab12410050..87c4e7900083 100644 --- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c +++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c @@ -19,7 +19,7 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -36,7 +36,7 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .errstr = "invalid indirect read from stack off -64+0 size 64", @@ -55,7 +55,7 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -84,7 +84,7 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -112,7 +112,7 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -132,7 +132,7 @@ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -152,7 +152,7 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -171,7 +171,7 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -190,7 +190,7 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -208,7 +208,7 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -233,7 +233,7 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -259,7 +259,7 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -286,7 +286,7 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -313,12 +313,12 @@ BPF_MOV64_IMM(BPF_REG_4, 0), BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 4 }, - .errstr = "R1 min value is outside of the array range", + .errstr = "R1 min value is outside of the allowed memory range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -468,7 +468,7 @@ BPF_MOV64_IMM(BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .errstr = "R1 type=inv expected=fp", @@ -481,7 +481,7 @@ BPF_MOV64_IMM(BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_2, 1), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .errstr = "R1 type=inv expected=fp", @@ -495,7 +495,7 @@ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -513,7 +513,7 @@ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, @@ -534,7 +534,7 @@ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, @@ -554,7 +554,7 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, @@ -580,7 +580,7 @@ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_EXIT_INSN(), }, @@ -607,7 +607,7 @@ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_EXIT_INSN(), }, diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c index 7572e403ddb9..1c7882ddfa63 100644 --- a/tools/testing/selftests/bpf/verifier/helper_value_access.c +++ b/tools/testing/selftests/bpf/verifier/helper_value_access.c @@ -10,7 +10,7 @@ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -29,7 +29,7 @@ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_2, 8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -67,7 +67,7 @@ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -87,7 +87,7 @@ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_2, -8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -109,7 +109,7 @@ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) - offsetof(struct test_val, foo)), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -129,7 +129,7 @@ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), BPF_MOV64_IMM(BPF_REG_2, 8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -170,7 +170,7 @@ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) - offsetof(struct test_val, foo) + 8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -191,7 +191,7 @@ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), BPF_MOV64_IMM(BPF_REG_2, -8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -212,7 +212,7 @@ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), BPF_MOV64_IMM(BPF_REG_2, -1), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -235,7 +235,7 @@ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) - offsetof(struct test_val, foo)), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -256,7 +256,7 @@ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), BPF_MOV64_IMM(BPF_REG_2, 8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -280,7 +280,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, - .errstr = "R1 min value is outside of the array range", + .errstr = "R1 min value is outside of the allowed memory range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -300,7 +300,7 @@ sizeof(struct test_val) - offsetof(struct test_val, foo) + 8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -322,7 +322,7 @@ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), BPF_MOV64_IMM(BPF_REG_2, -8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -344,7 +344,7 @@ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), BPF_MOV64_IMM(BPF_REG_2, -1), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -368,7 +368,7 @@ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) - offsetof(struct test_val, foo)), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -390,7 +390,7 @@ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), BPF_MOV64_IMM(BPF_REG_2, 8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -415,7 +415,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, - .errstr = "R1 min value is outside of the array range", + .errstr = "R1 min value is outside of the allowed memory range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -433,7 +433,7 @@ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), BPF_MOV64_IMM(BPF_REG_2, 1), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -458,7 +458,7 @@ sizeof(struct test_val) - offsetof(struct test_val, foo) + 1), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, @@ -926,7 +926,7 @@ }, .fixup_map_hash_16b = { 3, 10 }, .result = REJECT, - .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map", + .errstr = "R2 unbounded memory access, make sure to bounds check any such access", .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 02151f8c940f..6dc8003ffc70 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -31,14 +31,14 @@ BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, .fixup_map_array_48b = { 1 }, .result = VERBOSE_ACCEPT, .errstr = - "26: (85) call bpf_probe_read#4\ + "26: (85) call bpf_probe_read_kernel#113\ last_idx 26 first_idx 20\ regs=4 stack=0 before 25\ regs=4 stack=0 before 24\ @@ -91,7 +91,7 @@ BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -99,7 +99,7 @@ .result = VERBOSE_ACCEPT, .flags = BPF_F_TEST_STATE_FREQ, .errstr = - "26: (85) call bpf_probe_read#4\ + "26: (85) call bpf_probe_read_kernel#113\ last_idx 26 first_idx 22\ regs=4 stack=0 before 25\ regs=4 stack=0 before 24\ diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c index da7a4b37cb98..fc4e301260f6 100644 --- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c +++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c @@ -1,34 +1,4 @@ { - "prevent map lookup in sockmap", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_sockmap = { 3 }, - .result = REJECT, - .errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem", - .prog_type = BPF_PROG_TYPE_SOCK_OPS, -}, -{ - "prevent map lookup in sockhash", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_sockhash = { 3 }, - .result = REJECT, - .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem", - .prog_type = BPF_PROG_TYPE_SOCK_OPS, -}, -{ "prevent map lookup in stack trace", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 604b46151736..056e0273bf12 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -821,3 +821,36 @@ .result = REJECT, .errstr = "invalid mem access", }, +{ + "reference tracking: branch tracking valid pointer null comparison", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: branch tracking valid pointer value comparison", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 1234, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index 9ed192e14f5f..b1aac2641498 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -222,7 +222,7 @@ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, rx_queue_mapping)), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -516,3 +516,118 @@ .prog_type = BPF_PROG_TYPE_XDP, .result = ACCEPT, }, +{ + "bpf_map_lookup_elem(sockmap, &key)", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_sockmap = { 3 }, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .result = REJECT, + .errstr = "Unreleased reference id=2 alloc_insn=5", +}, +{ + "bpf_map_lookup_elem(sockhash, &key)", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_sockhash = { 3 }, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .result = REJECT, + .errstr = "Unreleased reference id=2 alloc_insn=5", +}, +{ + "bpf_map_lookup_elem(sockmap, &key); sk->type [fullsock field]; bpf_sk_release(sk)", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_map_sockmap = { 3 }, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .result = ACCEPT, +}, +{ + "bpf_map_lookup_elem(sockhash, &key); sk->type [fullsock field]; bpf_sk_release(sk)", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_map_sockhash = { 3 }, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .result = ACCEPT, +}, +{ + "bpf_sk_select_reuseport(ctx, reuseport_array, &key, flags)", + .insns = { + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport), + BPF_EXIT_INSN(), + }, + .fixup_map_reuseport_array = { 4 }, + .prog_type = BPF_PROG_TYPE_SK_REUSEPORT, + .result = ACCEPT, +}, +{ + "bpf_sk_select_reuseport(ctx, sockmap, &key, flags)", + .insns = { + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport), + BPF_EXIT_INSN(), + }, + .fixup_map_sockmap = { 4 }, + .prog_type = BPF_PROG_TYPE_SK_REUSEPORT, + .result = ACCEPT, +}, +{ + "bpf_sk_select_reuseport(ctx, sockhash, &key, flags)", + .insns = { + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport), + BPF_EXIT_INSN(), + }, + .fixup_map_sockmap = { 4 }, + .prog_type = BPF_PROG_TYPE_SK_REUSEPORT, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c index 860d4a71cd83..3ecb70a3d939 100644 --- a/tools/testing/selftests/bpf/verifier/value_or_null.c +++ b/tools/testing/selftests/bpf/verifier/value_or_null.c @@ -150,3 +150,22 @@ .result_unpriv = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "map lookup and null branch prediction", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_10, 10), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index a53d99cebd9f..97ee658e1242 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -50,7 +50,7 @@ .fixup_map_array_48b = { 8 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R0 min value is outside of the array range", + .errstr_unpriv = "R0 min value is outside of the allowed memory range", .retval = 1, }, { @@ -325,7 +325,7 @@ }, .fixup_map_array_48b = { 3 }, .result = REJECT, - .errstr = "R0 min value is outside of the array range", + .errstr = "R0 min value is outside of the allowed memory range", .result_unpriv = REJECT, .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", }, @@ -601,7 +601,7 @@ }, .fixup_map_array_48b = { 3 }, .result = REJECT, - .errstr = "R1 max value is outside of the array range", + .errstr = "R1 max value is outside of the allowed memory range", .errstr_unpriv = "R1 pointer arithmetic of map value goes out of range", .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -726,7 +726,7 @@ }, .fixup_map_array_48b = { 3 }, .result = REJECT, - .errstr = "R0 min value is outside of the array range", + .errstr = "R0 min value is outside of the allowed memory range", }, { "map access: value_ptr -= known scalar, 2", diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh index 26044e397157..b32ba5fec59d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh @@ -107,7 +107,7 @@ ingress_flow_action_drop_test() RET=0 - devlink_trap_drop_test ingress_flow_action_drop acl_drops $swp2 101 + devlink_trap_drop_test ingress_flow_action_drop $swp2 101 log_test "ingress_flow_action_drop" @@ -132,7 +132,7 @@ egress_flow_action_drop_test() RET=0 - devlink_trap_drop_test egress_flow_action_drop acl_drops $swp2 102 + devlink_trap_drop_test egress_flow_action_drop $swp2 102 log_test "egress_flow_action_drop" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh new file mode 100755 index 000000000000..a37273473c1b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh @@ -0,0 +1,688 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap control trap functionality over mlxsw. Each registered +# control packet trap is tested to make sure it is triggered under the right +# conditions. +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | 2001:db8:2::2/64 | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + stp_test + lacp_test + lldp_test + igmp_query_test + igmp_v1_report_test + igmp_v2_report_test + igmp_v3_report_test + igmp_v2_leave_test + mld_query_test + mld_v1_report_test + mld_v2_report_test + mld_v1_done_test + ipv4_dhcp_test + ipv6_dhcp_test + arp_request_test + arp_response_test + ipv6_neigh_solicit_test + ipv6_neigh_advert_test + ipv4_bfd_test + ipv6_bfd_test + ipv4_ospf_test + ipv6_ospf_test + ipv4_bgp_test + ipv6_bgp_test + ipv4_vrrp_test + ipv6_vrrp_test + ipv4_pim_test + ipv6_pim_test + uc_loopback_test + local_route_test + external_route_test + ipv6_uc_dip_link_local_scope_test + ipv4_router_alert_test + ipv6_router_alert_test + ipv6_dip_all_nodes_test + ipv6_dip_all_routers_test + ipv6_router_solicit_test + ipv6_router_advert_test + ipv6_redirect_test + ptp_event_test + ptp_general_test + flow_action_sample_test + flow_action_trap_test +" +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64 +} + +router_destroy() +{ + __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +stp_test() +{ + devlink_trap_stats_test "STP" "stp" $MZ $h1 -c 1 -t bpdu -q +} + +lacp_payload_get() +{ + local source_mac=$1; shift + local p + + p=$(: + )"01:80:C2:00:00:02:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"88:09:"$( : ETH type + ) + echo $p +} + +lacp_test() +{ + local h1mac=$(mac_get $h1) + + devlink_trap_stats_test "LACP" "lacp" $MZ $h1 -c 1 \ + $(lacp_payload_get $h1mac) -p 100 -q +} + +lldp_payload_get() +{ + local source_mac=$1; shift + local p + + p=$(: + )"01:80:C2:00:00:0E:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"88:CC:"$( : ETH type + ) + echo $p +} + +lldp_test() +{ + local h1mac=$(mac_get $h1) + + devlink_trap_stats_test "LLDP" "lldp" $MZ $h1 -c 1 \ + $(lldp_payload_get $h1mac) -p 100 -q +} + +igmp_query_test() +{ + # IGMP (IP Protocol 2) Membership Query (Type 0x11) + devlink_trap_stats_test "IGMP Membership Query" "igmp_query" \ + $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \ + -A 192.0.2.1 -B 224.0.0.1 -t ip proto=2,p=11 -p 100 -q +} + +igmp_v1_report_test() +{ + # IGMP (IP Protocol 2) Version 1 Membership Report (Type 0x12) + devlink_trap_stats_test "IGMP Version 1 Membership Report" \ + "igmp_v1_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \ + -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=12 -p 100 -q +} + +igmp_v2_report_test() +{ + # IGMP (IP Protocol 2) Version 2 Membership Report (Type 0x16) + devlink_trap_stats_test "IGMP Version 2 Membership Report" \ + "igmp_v2_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \ + -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=16 -p 100 -q +} + +igmp_v3_report_test() +{ + # IGMP (IP Protocol 2) Version 3 Membership Report (Type 0x22) + devlink_trap_stats_test "IGMP Version 3 Membership Report" \ + "igmp_v3_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \ + -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=22 -p 100 -q +} + +igmp_v2_leave_test() +{ + # IGMP (IP Protocol 2) Version 2 Leave Group (Type 0x17) + devlink_trap_stats_test "IGMP Version 2 Leave Group" \ + "igmp_v2_leave" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:02 \ + -A 192.0.2.1 -B 224.0.0.2 -t ip proto=2,p=17 -p 100 -q +} + +mld_payload_get() +{ + local type=$1; shift + local p + + type=$(printf "%x" $type) + p=$(: + )"3A:"$( : Next Header - ICMPv6 + )"00:"$( : Hdr Ext Len + )"00:00:00:00:00:00:"$( : Options and Padding + )"$type:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +mld_query_test() +{ + # MLD Multicast Listener Query (Type 130) + devlink_trap_stats_test "MLD Multicast Listener Query" "mld_query" \ + $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1 \ + -t ip hop=1,next=0,payload=$(mld_payload_get 130) -p 100 -q +} + +mld_v1_report_test() +{ + # MLD Version 1 Multicast Listener Report (Type 131) + devlink_trap_stats_test "MLD Version 1 Multicast Listener Report" \ + "mld_v1_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \ + -t ip hop=1,next=0,payload=$(mld_payload_get 131) -p 100 -q +} + +mld_v2_report_test() +{ + # MLD Version 2 Multicast Listener Report (Type 143) + devlink_trap_stats_test "MLD Version 2 Multicast Listener Report" \ + "mld_v2_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \ + -t ip hop=1,next=0,payload=$(mld_payload_get 143) -p 100 -q +} + +mld_v1_done_test() +{ + # MLD Version 1 Multicast Listener Done (Type 132) + devlink_trap_stats_test "MLD Version 1 Multicast Listener Done" \ + "mld_v1_done" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \ + -t ip hop=1,next=0,payload=$(mld_payload_get 132) -p 100 -q +} + +ipv4_dhcp_test() +{ + devlink_trap_stats_test "IPv4 DHCP Port 67" "ipv4_dhcp" \ + $MZ $h1 -c 1 -a own -b bcast -A 0.0.0.0 -B 255.255.255.255 \ + -t udp sp=68,dp=67 -p 100 -q + + devlink_trap_stats_test "IPv4 DHCP Port 68" "ipv4_dhcp" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) -A 192.0.2.1 \ + -B 255.255.255.255 -t udp sp=67,dp=68 -p 100 -q +} + +ipv6_dhcp_test() +{ + devlink_trap_stats_test "IPv6 DHCP Port 547" "ipv6_dhcp" \ + $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=546,dp=547 \ + -p 100 -q + + devlink_trap_stats_test "IPv6 DHCP Port 546" "ipv6_dhcp" \ + $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=547,dp=546 \ + -p 100 -q +} + +arp_request_test() +{ + devlink_trap_stats_test "ARP Request" "arp_request" \ + $MZ $h1 -c 1 -a own -b bcast -t arp request -p 100 -q +} + +arp_response_test() +{ + devlink_trap_stats_test "ARP Response" "arp_response" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) -t arp reply -p 100 -q +} + +icmpv6_header_get() +{ + local type=$1; shift + local p + + type=$(printf "%x" $type) + p=$(: + )"$type:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +ipv6_neigh_solicit_test() +{ + devlink_trap_stats_test "IPv6 Neighbour Solicitation" \ + "ipv6_neigh_solicit" $MZ $h1 -6 -c 1 \ + -A fe80::1 -B ff02::1:ff00:02 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 135) -p 100 -q +} + +ipv6_neigh_advert_test() +{ + devlink_trap_stats_test "IPv6 Neighbour Advertisement" \ + "ipv6_neigh_advert" $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A fe80::1 -B 2001:db8:1::2 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 136) -p 100 -q +} + +ipv4_bfd_test() +{ + devlink_trap_stats_test "IPv4 BFD Control - Port 3784" "ipv4_bfd" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3784 -p 100 -q + + devlink_trap_stats_test "IPv4 BFD Echo - Port 3785" "ipv4_bfd" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3785 -p 100 -q +} + +ipv6_bfd_test() +{ + devlink_trap_stats_test "IPv6 BFD Control - Port 3784" "ipv6_bfd" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::2 \ + -t udp sp=49153,dp=3784 -p 100 -q + + devlink_trap_stats_test "IPv6 BFD Echo - Port 3785" "ipv6_bfd" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::2 \ + -t udp sp=49153,dp=3785 -p 100 -q +} + +ipv4_ospf_test() +{ + devlink_trap_stats_test "IPv4 OSPF - Multicast" "ipv4_ospf" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:05 \ + -A 192.0.2.1 -B 224.0.0.5 -t ip proto=89 -p 100 -q + + devlink_trap_stats_test "IPv4 OSPF - Unicast" "ipv4_ospf" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t ip proto=89 -p 100 -q +} + +ipv6_ospf_test() +{ + devlink_trap_stats_test "IPv6 OSPF - Multicast" "ipv6_ospf" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:05 \ + -A fe80::1 -B ff02::5 -t ip next=89 -p 100 -q + + devlink_trap_stats_test "IPv6 OSPF - Unicast" "ipv6_ospf" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::2 -t ip next=89 -p 100 -q +} + +ipv4_bgp_test() +{ + devlink_trap_stats_test "IPv4 BGP" "ipv4_bgp" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t tcp sp=54321,dp=179,flags=rst \ + -p 100 -q +} + +ipv6_bgp_test() +{ + devlink_trap_stats_test "IPv6 BGP" "ipv6_bgp" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::2 \ + -t tcp sp=54321,dp=179,flags=rst -p 100 -q +} + +ipv4_vrrp_test() +{ + devlink_trap_stats_test "IPv4 VRRP" "ipv4_vrrp" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:12 \ + -A 192.0.2.1 -B 224.0.0.18 -t ip proto=112 -p 100 -q +} + +ipv6_vrrp_test() +{ + devlink_trap_stats_test "IPv6 VRRP" "ipv6_vrrp" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:12 \ + -A fe80::1 -B ff02::12 -t ip next=112 -p 100 -q +} + +ipv4_pim_test() +{ + devlink_trap_stats_test "IPv4 PIM - Multicast" "ipv4_pim" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:0d \ + -A 192.0.2.1 -B 224.0.0.13 -t ip proto=103 -p 100 -q + + devlink_trap_stats_test "IPv4 PIM - Unicast" "ipv4_pim" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t ip proto=103 -p 100 -q +} + +ipv6_pim_test() +{ + devlink_trap_stats_test "IPv6 PIM - Multicast" "ipv6_pim" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:0d \ + -A fe80::1 -B ff02::d -t ip next=103 -p 100 -q + + devlink_trap_stats_test "IPv6 PIM - Unicast" "ipv6_pim" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A fe80::1 -B 2001:db8:1::2 -t ip next=103 -p 100 -q +} + +uc_loopback_test() +{ + # Add neighbours to the fake destination IPs, so that the packets are + # routed in the device and not trapped due to an unresolved neighbour + # exception. + ip -4 neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud permanent \ + dev $rp1 + ip -6 neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud permanent \ + dev $rp1 + + devlink_trap_stats_test "IPv4 Unicast Loopback" "uc_loopback" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.3 -t udp sp=54321,dp=12345 -p 100 -q + + devlink_trap_stats_test "IPv6 Unicast Loopback" "uc_loopback" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::3 -t udp sp=54321,dp=12345 \ + -p 100 -q + + ip -6 neigh del 2001:db8:1::3 dev $rp1 + ip -4 neigh del 192.0.2.3 dev $rp1 +} + +local_route_test() +{ + # Use a fake source IP to prevent the trap from being triggered twice + # when the router sends back a port unreachable message. + devlink_trap_stats_test "IPv4 Local Route" "local_route" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.3 -B 192.0.2.2 -t udp sp=54321,dp=12345 -p 100 -q + + devlink_trap_stats_test "IPv6 Local Route" "local_route" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::3 -B 2001:db8:1::2 -t udp sp=54321,sp=12345 \ + -p 100 -q +} + +external_route_test() +{ + # Add a dummy device through which the incoming packets should be + # routed. + ip link add name dummy10 up type dummy + ip address add 203.0.113.1/24 dev dummy10 + ip -6 address add 2001:db8:10::1/64 dev dummy10 + + devlink_trap_stats_test "IPv4 External Route" "external_route" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 203.0.113.2 -t udp sp=54321,dp=12345 -p 100 -q + + devlink_trap_stats_test "IPv6 External Route" "external_route" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:10::2 -t udp sp=54321,sp=12345 \ + -p 100 -q + + ip -6 address del 2001:db8:10::1/64 dev dummy10 + ip address del 203.0.113.1/24 dev dummy10 + ip link del dev dummy10 +} + +ipv6_uc_dip_link_local_scope_test() +{ + # Add a dummy link-local prefix route to allow the packet to be routed. + ip -6 route add fe80:1::/64 dev $rp2 + + devlink_trap_stats_test \ + "IPv6 Unicast Destination IP With Link-Local Scope" \ + "ipv6_uc_dip_link_local_scope" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A fe80::1 -B fe80:1::2 -t udp sp=54321,sp=12345 \ + -p 100 -q + + ip -6 route del fe80:1::/64 dev $rp2 +} + +ipv4_router_alert_get() +{ + local p + + # https://en.wikipedia.org/wiki/IPv4#Options + p=$(: + )"94:"$( : Option Number + )"04:"$( : Option Length + )"00:00:"$( : Option Data + ) + echo $p +} + +ipv4_router_alert_test() +{ + devlink_trap_stats_test "IPv4 Router Alert" "ipv4_router_alert" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 198.51.100.3 \ + -t ip option=$(ipv4_router_alert_get) -p 100 -q +} + +ipv6_router_alert_get() +{ + local p + + # https://en.wikipedia.org/wiki/IPv6_packet#Hop-by-hop_options_and_destination_options + # https://tools.ietf.org/html/rfc2711#section-2.1 + p=$(: + )"11:"$( : Next Header - UDP + )"00:"$( : Hdr Ext Len + )"05:02:00:00:00:00:"$( : Option Data + ) + echo $p +} + +ipv6_router_alert_test() +{ + devlink_trap_stats_test "IPv6 Router Alert" "ipv6_router_alert" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::3 \ + -t ip next=0,payload=$(ipv6_router_alert_get) -p 100 -q +} + +ipv6_dip_all_nodes_test() +{ + devlink_trap_stats_test "IPv6 Destination IP \"All Nodes Address\"" \ + "ipv6_dip_all_nodes" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \ + -A 2001:db8:1::1 -B ff02::1 -t udp sp=12345,dp=54321 -p 100 -q +} + +ipv6_dip_all_routers_test() +{ + devlink_trap_stats_test "IPv6 Destination IP \"All Routers Address\"" \ + "ipv6_dip_all_routers" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \ + -A 2001:db8:1::1 -B ff02::2 -t udp sp=12345,dp=54321 -p 100 -q +} + +ipv6_router_solicit_test() +{ + devlink_trap_stats_test "IPv6 Router Solicitation" \ + "ipv6_router_solicit" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \ + -A fe80::1 -B ff02::2 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 133) -p 100 -q +} + +ipv6_router_advert_test() +{ + devlink_trap_stats_test "IPv6 Router Advertisement" \ + "ipv6_router_advert" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \ + -A fe80::1 -B ff02::1 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 134) -p 100 -q +} + +ipv6_redirect_test() +{ + devlink_trap_stats_test "IPv6 Redirect Message" \ + "ipv6_redirect" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A fe80::1 -B 2001:db8:1::2 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 137) -p 100 -q +} + +ptp_event_test() +{ + # PTP is only supported on Spectrum-1, for now. + [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return + + # PTP Sync (0) + devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \ + -A 192.0.2.1 -B 224.0.1.129 \ + -t udp sp=12345,dp=319,payload=10 -p 100 -q +} + +ptp_general_test() +{ + # PTP is only supported on Spectrum-1, for now. + [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return + + # PTP Announce (b) + devlink_trap_stats_test "PTP General Message" "ptp_general" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \ + -A 192.0.2.1 -B 224.0.1.129 \ + -t udp sp=12345,dp=320,payload=1b -p 100 -q +} + +flow_action_sample_test() +{ + # Install a filter that samples every incoming packet. + tc qdisc add dev $rp1 clsact + tc filter add dev $rp1 ingress proto all pref 1 handle 101 matchall \ + skip_sw action sample rate 1 group 1 + + devlink_trap_stats_test "Flow Sampling" "flow_action_sample" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q + + tc filter del dev $rp1 ingress proto all pref 1 handle 101 matchall + tc qdisc del dev $rp1 clsact +} + +flow_action_trap_test() +{ + # Install a filter that traps a specific flow. + tc qdisc add dev $rp1 clsact + tc filter add dev $rp1 ingress proto ip pref 1 handle 101 flower \ + skip_sw ip_proto udp src_port 12345 dst_port 54321 action trap + + devlink_trap_stats_test "Flow Trapping (Logging)" "flow_action_trap" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q + + tc filter del dev $rp1 ingress proto ip pref 1 handle 101 flower + tc qdisc del dev $rp1 clsact +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh index e7aecb065409..a4c2812e9807 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh @@ -96,7 +96,6 @@ source_mac_is_multicast_test() { local trap_name="source_mac_is_multicast" local smac=01:02:03:04:05:06 - local group_name="l2_drops" local mz_pid tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ @@ -107,7 +106,7 @@ source_mac_is_multicast_test() RET=0 - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 log_test "Source MAC is multicast" @@ -118,7 +117,6 @@ __vlan_tag_mismatch_test() { local trap_name="vlan_tag_mismatch" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local opt=$1; shift local mz_pid @@ -132,7 +130,7 @@ __vlan_tag_mismatch_test() $MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Add PVID and make sure packets are no longer dropped. bridge vlan add vid 1 dev $swp1 pvid untagged master @@ -140,7 +138,7 @@ __vlan_tag_mismatch_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -179,7 +177,6 @@ ingress_vlan_filter_test() { local trap_name="ingress_vlan_filter" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local mz_pid local vid=10 @@ -193,7 +190,7 @@ ingress_vlan_filter_test() $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Add the VLAN on the bridge port and make sure packets are no longer # dropped. @@ -202,7 +199,7 @@ ingress_vlan_filter_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -222,7 +219,6 @@ __ingress_stp_filter_test() { local trap_name="ingress_spanning_tree_filter" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local state=$1; shift local mz_pid local vid=20 @@ -237,7 +233,7 @@ __ingress_stp_filter_test() $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Change STP state to forwarding and make sure packets are no longer # dropped. @@ -246,7 +242,7 @@ __ingress_stp_filter_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -292,7 +288,6 @@ port_list_is_empty_uc_test() { local trap_name="port_list_is_empty" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local mz_pid # Disable unicast flooding on both ports, so that packets cannot egress @@ -308,7 +303,7 @@ port_list_is_empty_uc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Allow packets to be flooded to one port. ip link set dev $swp2 type bridge_slave flood on @@ -316,7 +311,7 @@ port_list_is_empty_uc_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -335,7 +330,6 @@ port_list_is_empty_mc_test() { local trap_name="port_list_is_empty" local dmac=01:00:5e:00:00:01 - local group_name="l2_drops" local dip=239.0.0.1 local mz_pid @@ -354,7 +348,7 @@ port_list_is_empty_mc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Allow packets to be flooded to one port. ip link set dev $swp2 type bridge_slave mcast_flood on @@ -362,7 +356,7 @@ port_list_is_empty_mc_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -387,7 +381,6 @@ port_loopback_filter_uc_test() { local trap_name="port_loopback_filter" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local mz_pid # Make sure packets can only egress the input port. @@ -401,7 +394,7 @@ port_loopback_filter_uc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Allow packets to be flooded. ip link set dev $swp2 type bridge_slave flood on @@ -409,7 +402,7 @@ port_loopback_filter_uc_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index 616f47d86a61..f5abb1ebd392 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -161,7 +161,6 @@ ping_check() non_ip_test() { local trap_name="non_ip" - local group_name="l3_drops" local mz_pid RET=0 @@ -176,7 +175,7 @@ non_ip_test() 00:00 de:ad:be:ef" & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Non IP" @@ -190,7 +189,6 @@ __uc_dip_over_mc_dmac_test() local dip=$1; shift local flags=${1:-""}; shift local trap_name="uc_dip_over_mc_dmac" - local group_name="l3_drops" local dmac=01:02:03:04:05:06 local mz_pid @@ -206,7 +204,7 @@ __uc_dip_over_mc_dmac_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Unicast destination IP over multicast destination MAC: $desc" @@ -227,7 +225,6 @@ __sip_is_loopback_test() local dip=$1; shift local flags=${1:-""}; shift local trap_name="sip_is_loopback_address" - local group_name="l3_drops" local mz_pid RET=0 @@ -242,7 +239,7 @@ __sip_is_loopback_test() -b $rp1mac -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Source IP is loopback address: $desc" @@ -262,7 +259,6 @@ __dip_is_loopback_test() local dip=$1; shift local flags=${1:-""}; shift local trap_name="dip_is_loopback_address" - local group_name="l3_drops" local mz_pid RET=0 @@ -277,7 +273,7 @@ __dip_is_loopback_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Destination IP is loopback address: $desc" @@ -298,7 +294,6 @@ __sip_is_mc_test() local dip=$1; shift local flags=${1:-""}; shift local trap_name="sip_is_mc" - local group_name="l3_drops" local mz_pid RET=0 @@ -313,7 +308,7 @@ __sip_is_mc_test() -b $rp1mac -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Source IP is multicast: $desc" @@ -329,7 +324,6 @@ sip_is_mc_test() ipv4_sip_is_limited_bc_test() { local trap_name="ipv4_sip_is_limited_bc" - local group_name="l3_drops" local sip=255.255.255.255 local mz_pid @@ -345,7 +339,7 @@ ipv4_sip_is_limited_bc_test() -B $h2_ipv4 -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IPv4 source IP is limited broadcast" @@ -382,7 +376,6 @@ __ipv4_header_corrupted_test() local ihl=$1; shift local checksum=$1; shift local trap_name="ip_header_corrupted" - local group_name="l3_drops" local payload local mz_pid @@ -399,7 +392,7 @@ __ipv4_header_corrupted_test() $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IP header corrupted: $desc: IPv4" @@ -429,7 +422,6 @@ __ipv6_header_corrupted_test() local desc=$1; shift local ipver=$1; shift local trap_name="ip_header_corrupted" - local group_name="l3_drops" local payload local mz_pid @@ -446,7 +438,7 @@ __ipv6_header_corrupted_test() $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IP header corrupted: $desc: IPv6" @@ -469,7 +461,6 @@ ip_header_corrupted_test() ipv6_mc_dip_reserved_scope_test() { local trap_name="ipv6_mc_dip_reserved_scope" - local group_name="l3_drops" local dip=FF00:: local mz_pid @@ -485,7 +476,7 @@ ipv6_mc_dip_reserved_scope_test() "33:33:00:00:00:00" -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IPv6 multicast destination IP reserved scope" @@ -495,7 +486,6 @@ ipv6_mc_dip_reserved_scope_test() ipv6_mc_dip_interface_local_scope_test() { local trap_name="ipv6_mc_dip_interface_local_scope" - local group_name="l3_drops" local dip=FF01:: local mz_pid @@ -511,7 +501,7 @@ ipv6_mc_dip_interface_local_scope_test() "33:33:00:00:00:00" -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IPv6 multicast destination IP interface-local scope" @@ -526,7 +516,6 @@ __blackhole_route_test() local dip=$1; shift local ip_proto=${1:-"icmp"}; shift local trap_name="blackhole_route" - local group_name="l3_drops" local mz_pid RET=0 @@ -542,7 +531,7 @@ __blackhole_route_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Blackhole route: IPv$flags" devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 @@ -558,7 +547,6 @@ blackhole_route_test() irif_disabled_test() { local trap_name="irif_disabled" - local group_name="l3_drops" local t0_packets t0_bytes local t1_packets t1_bytes local mz_pid @@ -613,7 +601,6 @@ irif_disabled_test() erif_disabled_test() { local trap_name="erif_disabled" - local group_name="l3_drops" local t0_packets t0_bytes local t1_packets t1_bytes local mz_pid diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 2bc6df42d597..1fedfc9da434 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -169,7 +169,6 @@ trap_action_check() mtu_value_is_too_small_test() { local trap_name="mtu_value_is_too_small" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -191,7 +190,7 @@ mtu_value_is_too_small_test() -B 198.51.100.1 -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets_hitting "dev $h1 ingress" 101 check_err $? "Packets were not received to h1" @@ -208,7 +207,6 @@ __ttl_value_is_too_small_test() { local ttl_val=$1; shift local trap_name="ttl_value_is_too_small" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -227,7 +225,7 @@ __ttl_value_is_too_small_test() -b $rp1mac -B 198.51.100.1 -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets_hitting "dev $h1 ingress" 101 check_err $? "Packets were not received to h1" @@ -271,7 +269,6 @@ __mc_reverse_path_forwarding_test() local proto=$1; shift local flags=${1:-""}; shift local trap_name="mc_reverse_path_forwarding" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -292,7 +289,7 @@ __mc_reverse_path_forwarding_test() mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $rp2 egress" 101 0 check_err $? "Packets were not dropped" @@ -322,7 +319,6 @@ __reject_route_test() local unreachable=$1; shift local flags=${1:-""}; shift local trap_name="reject_route" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -341,7 +337,7 @@ __reject_route_test() -B $dst_ip -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets_hitting "dev $h1 ingress" 101 check_err $? "ICMP packet was not received to h1" @@ -370,7 +366,6 @@ __host_miss_test() local desc=$1; shift local dip=$1; shift local trap_name="unresolved_neigh" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -405,7 +400,6 @@ __invalid_nexthop_test() local subnet=$1; shift local via_add=$1; shift local trap_name="unresolved_neigh" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -494,7 +488,6 @@ vrf_without_routes_destroy() ipv4_lpm_miss_test() { local trap_name="ipv4_lpm_miss" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -511,7 +504,7 @@ ipv4_lpm_miss_test() -B 203.0.113.1 -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name log_test "LPM miss: IPv4" @@ -522,7 +515,6 @@ ipv4_lpm_miss_test() ipv6_lpm_miss_test() { local trap_name="ipv6_lpm_miss" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -539,7 +531,7 @@ ipv6_lpm_miss_test() -B 2001:db8::1 -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name log_test "LPM miss: IPv6" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh index 039629bb92a3..8817851da7a9 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -140,7 +140,6 @@ ecn_payload_get() ecn_decap_test() { local trap_name="decap_error" - local group_name="tunnel_drops" local desc=$1; shift local ecn_desc=$1; shift local outer_tos=$1; shift @@ -161,7 +160,7 @@ ecn_decap_test() mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $swp1 egress" 101 0 check_err $? "Packets were not dropped" @@ -200,7 +199,6 @@ ipip_payload_get() no_matching_tunnel_test() { local trap_name="decap_error" - local group_name="tunnel_drops" local desc=$1; shift local sip=$1; shift local mz_pid @@ -218,7 +216,7 @@ no_matching_tunnel_test() -A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $swp1 egress" 101 0 check_err $? "Packets were not dropped" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh index e11a416323cf..10e0f3dbc930 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -159,7 +159,6 @@ ecn_payload_get() ecn_decap_test() { local trap_name="decap_error" - local group_name="tunnel_drops" local desc=$1; shift local ecn_desc=$1; shift local outer_tos=$1; shift @@ -177,7 +176,7 @@ ecn_decap_test() -t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $swp1 egress" 101 0 check_err $? "Packets were not dropped" @@ -228,7 +227,6 @@ short_payload_get() corrupted_packet_test() { local trap_name="decap_error" - local group_name="tunnel_drops" local desc=$1; shift local payload_get=$1; shift local mz_pid @@ -246,7 +244,7 @@ corrupted_packet_test() -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $swp1 egress" 101 0 check_err $? "Packets were not dropped" @@ -297,7 +295,6 @@ mc_smac_payload_get() overlay_smac_is_mc_test() { local trap_name="overlay_smac_is_mc" - local group_name="tunnel_drops" local mz_pid RET=0 @@ -314,7 +311,7 @@ overlay_smac_is_mc_test() -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp1 101 + devlink_trap_drop_test $trap_name $swp1 101 log_test "Overlay source MAC is multicast" diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh index 58f3a05f08af..7d9e73a43a49 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh @@ -15,7 +15,7 @@ source mlxsw_lib.sh SB_POOL_ING=0 SB_POOL_EGR_CPU=10 -SB_ITC_CPU_IP=3 +SB_ITC_CPU_IP=2 SB_ITC_CPU_ARP=2 SB_ITC=0 diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh index 68c80d0ec1ec..9241250c5921 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh @@ -7,6 +7,10 @@ ALL_TESTS=" shared_block_drop_test egress_redirect_test multi_mirror_test + matchall_sample_egress_test + matchall_mirror_behind_flower_ingress_test + matchall_sample_behind_flower_ingress_test + matchall_mirror_behind_flower_egress_test " NUM_NETIFS=2 @@ -155,6 +159,134 @@ multi_mirror_test() log_test "multi mirror" } +matchall_sample_egress_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have matchall with sample action + # bound on egress + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress protocol all pref 1 handle 101 \ + matchall skip_sw action sample rate 100 group 1 + check_err $? "Failed to add rule with sample action on ingress" + + tc filter del dev $swp1 ingress protocol all pref 1 handle 101 matchall + + tc filter add dev $swp1 egress protocol all pref 1 handle 101 \ + matchall skip_sw action sample rate 100 group 1 + check_fail $? "Incorrect success to add rule with sample action on egress" + + tc qdisc del dev $swp1 clsact + + log_test "matchall sample egress" +} + +matchall_behind_flower_ingress_test() +{ + local action=$1 + local action_args=$2 + + RET=0 + + # On ingress, all matchall-mirror and matchall-sample + # rules have to be in front of the flower rules + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + + tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \ + matchall skip_sw action $action_args + check_err $? "Failed to add matchall rule in front of a flower rule" + + tc filter del dev $swp1 ingress protocol all pref 9 handle 102 matchall + + tc filter add dev $swp1 ingress protocol all pref 11 handle 102 \ + matchall skip_sw action $action_args + check_fail $? "Incorrect success to add matchall rule behind a flower rule" + + tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower + + tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \ + matchall skip_sw action $action_args + + tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add flower rule behind a matchall rule" + + tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower + + tc filter add dev $swp1 ingress protocol ip pref 8 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_fail $? "Incorrect success to add flower rule in front of a matchall rule" + + tc qdisc del dev $swp1 clsact + + log_test "matchall $action flower ingress" +} + +matchall_mirror_behind_flower_ingress_test() +{ + matchall_behind_flower_ingress_test "mirror" "mirred egress mirror dev $swp2" +} + +matchall_sample_behind_flower_ingress_test() +{ + matchall_behind_flower_ingress_test "sample" "sample rate 100 group 1" +} + +matchall_behind_flower_egress_test() +{ + local action=$1 + local action_args=$2 + + RET=0 + + # On egress, all matchall-mirror rules have to be behind the flower rules + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + + tc filter add dev $swp1 egress protocol all pref 11 handle 102 \ + matchall skip_sw action $action_args + check_err $? "Failed to add matchall rule in front of a flower rule" + + tc filter del dev $swp1 egress protocol all pref 11 handle 102 matchall + + tc filter add dev $swp1 egress protocol all pref 9 handle 102 \ + matchall skip_sw action $action_args + check_fail $? "Incorrect success to add matchall rule behind a flower rule" + + tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower + + tc filter add dev $swp1 egress protocol all pref 11 handle 102 \ + matchall skip_sw action $action_args + + tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add flower rule behind a matchall rule" + + tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower + + tc filter add dev $swp1 egress protocol ip pref 12 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_fail $? "Incorrect success to add flower rule in front of a matchall rule" + + tc qdisc del dev $swp1 clsact + + log_test "matchall $action flower egress" +} + +matchall_mirror_behind_flower_egress_test() +{ + matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2" +} + setup_prepare() { swp1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 9f9741444549..de4b32fc4223 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -146,11 +146,39 @@ regions_test() check_region_snapshot_count dummy post-first-request 3 + devlink region dump $DL_HANDLE/dummy snapshot 25 >> /dev/null + check_err $? "Failed to dump snapshot with id 25" + + devlink region read $DL_HANDLE/dummy snapshot 25 addr 0 len 1 >> /dev/null + check_err $? "Failed to read snapshot with id 25 (1 byte)" + + devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len 128 >> /dev/null + check_err $? "Failed to read snapshot with id 25 (128 bytes)" + + devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len $((1<<32)) >> /dev/null + check_err $? "Failed to read snapshot with id 25 (oversized)" + + devlink region read $DL_HANDLE/dummy snapshot 25 addr $((1<<32)) len 128 >> /dev/null 2>&1 + check_fail $? "Bad read of snapshot with id 25 did not fail" + devlink region del $DL_HANDLE/dummy snapshot 25 check_err $? "Failed to delete snapshot with id 25" check_region_snapshot_count dummy post-second-delete 2 + sid=$(devlink -j region new $DL_HANDLE/dummy | jq '.[][][][]') + check_err $? "Failed to create a new snapshot with id allocated by the kernel" + + check_region_snapshot_count dummy post-first-request 3 + + devlink region dump $DL_HANDLE/dummy snapshot $sid >> /dev/null + check_err $? "Failed to dump a snapshot with id allocated by the kernel" + + devlink region del $DL_HANDLE/dummy snapshot $sid + check_err $? "Failed to delete snapshot with id allocated by the kernel" + + check_region_snapshot_count dummy post-first-request 2 + log_test "regions test" } diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 2bb8c81fc0b4..c9f03ef93338 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -168,9 +168,17 @@ #define __TEST_IMPL(test_name, _signal) \ static void test_name(struct __test_metadata *_metadata); \ + static inline void wrapper_##test_name( \ + struct __test_metadata *_metadata, \ + struct __fixture_variant_metadata *variant) \ + { \ + test_name(_metadata); \ + } \ static struct __test_metadata _##test_name##_object = \ - { .name = "global." #test_name, \ - .fn = &test_name, .termsig = _signal, \ + { .name = #test_name, \ + .fn = &wrapper_##test_name, \ + .fixture = &_fixture_global, \ + .termsig = _signal, \ .timeout = TEST_TIMEOUT_DEFAULT, }; \ static void __attribute__((constructor)) _register_##test_name(void) \ { \ @@ -212,10 +220,13 @@ * populated and cleaned up using FIXTURE_SETUP() and FIXTURE_TEARDOWN(). */ #define FIXTURE(fixture_name) \ + FIXTURE_VARIANT(fixture_name); \ + static struct __fixture_metadata _##fixture_name##_fixture_object = \ + { .name = #fixture_name, }; \ static void __attribute__((constructor)) \ _register_##fixture_name##_data(void) \ { \ - __fixture_count++; \ + __register_fixture(&_##fixture_name##_fixture_object); \ } \ FIXTURE_DATA(fixture_name) @@ -241,7 +252,10 @@ #define FIXTURE_SETUP(fixture_name) \ void fixture_name##_setup( \ struct __test_metadata __attribute__((unused)) *_metadata, \ - FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + __attribute__((unused)) *variant) + /** * FIXTURE_TEARDOWN(fixture_name) * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. @@ -264,6 +278,59 @@ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) /** + * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture + * to declare fixture variant + * + * @fixture_name: fixture name + * + * .. code-block:: c + * + * FIXTURE_VARIANT(datatype name) { + * type property1; + * ... + * }; + * + * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F() + * as *variant*. Variants allow the same tests to be run with different + * arguments. + */ +#define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name + +/** + * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture + * variant to setup and register the data + * + * @fixture_name: fixture name + * @variant_name: name of the parameter set + * + * .. code-block:: c + * + * FIXTURE_ADD(datatype name) { + * .property1 = val1; + * ... + * }; + * + * Defines a variant of the test fixture, provided to FIXTURE_SETUP() and + * TEST_F() as *variant*. Tests of each fixture will be run once for each + * variant. + */ +#define FIXTURE_VARIANT_ADD(fixture_name, variant_name) \ + extern FIXTURE_VARIANT(fixture_name) \ + _##fixture_name##_##variant_name##_variant; \ + static struct __fixture_variant_metadata \ + _##fixture_name##_##variant_name##_object = \ + { .name = #variant_name, \ + .data = &_##fixture_name##_##variant_name##_variant}; \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_##variant_name(void) \ + { \ + __register_fixture_variant(&_##fixture_name##_fixture_object, \ + &_##fixture_name##_##variant_name##_object); \ + } \ + FIXTURE_VARIANT(fixture_name) \ + _##fixture_name##_##variant_name##_variant = + +/** * TEST_F(fixture_name, test_name) - Emits test registration and helpers for * fixture-based test cases * @@ -293,24 +360,27 @@ #define __TEST_F_IMPL(fixture_name, test_name, signal, tmout) \ static void fixture_name##_##test_name( \ struct __test_metadata *_metadata, \ - FIXTURE_DATA(fixture_name) *self); \ + FIXTURE_DATA(fixture_name) *self, \ + const FIXTURE_VARIANT(fixture_name) *variant); \ static inline void wrapper_##fixture_name##_##test_name( \ - struct __test_metadata *_metadata) \ + struct __test_metadata *_metadata, \ + struct __fixture_variant_metadata *variant) \ { \ /* fixture data is alloced, setup, and torn down per call. */ \ FIXTURE_DATA(fixture_name) self; \ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \ - fixture_name##_setup(_metadata, &self); \ + fixture_name##_setup(_metadata, &self, variant->data); \ /* Let setup failure terminate early. */ \ if (!_metadata->passed) \ return; \ - fixture_name##_##test_name(_metadata, &self); \ + fixture_name##_##test_name(_metadata, &self, variant->data); \ fixture_name##_teardown(_metadata, &self); \ } \ static struct __test_metadata \ _##fixture_name##_##test_name##_object = { \ - .name = #fixture_name "." #test_name, \ + .name = #test_name, \ .fn = &wrapper_##fixture_name##_##test_name, \ + .fixture = &_##fixture_name##_fixture_object, \ .termsig = signal, \ .timeout = tmout, \ }; \ @@ -321,7 +391,9 @@ } \ static void fixture_name##_##test_name( \ struct __test_metadata __attribute__((unused)) *_metadata, \ - FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + __attribute__((unused)) *variant) /** * TEST_HARNESS_MAIN - Simple wrapper to run the test harness @@ -631,11 +703,74 @@ } \ } while (0); OPTIONAL_HANDLER(_assert) +/* List helpers */ +#define __LIST_APPEND(head, item) \ +{ \ + /* Circular linked list where only prev is circular. */ \ + if (head == NULL) { \ + head = item; \ + item->next = NULL; \ + item->prev = item; \ + return; \ + } \ + if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { \ + item->next = NULL; \ + item->prev = head->prev; \ + item->prev->next = item; \ + head->prev = item; \ + } else { \ + item->next = head; \ + item->next->prev = item; \ + item->prev = item; \ + head = item; \ + } \ +} + +struct __test_metadata; +struct __fixture_variant_metadata; + +/* Contains all the information about a fixture. */ +struct __fixture_metadata { + const char *name; + struct __test_metadata *tests; + struct __fixture_variant_metadata *variant; + struct __fixture_metadata *prev, *next; +} _fixture_global __attribute__((unused)) = { + .name = "global", + .prev = &_fixture_global, +}; + +static struct __fixture_metadata *__fixture_list = &_fixture_global; +static int __constructor_order; + +#define _CONSTRUCTOR_ORDER_FORWARD 1 +#define _CONSTRUCTOR_ORDER_BACKWARD -1 + +static inline void __register_fixture(struct __fixture_metadata *f) +{ + __LIST_APPEND(__fixture_list, f); +} + +struct __fixture_variant_metadata { + const char *name; + const void *data; + struct __fixture_variant_metadata *prev, *next; +}; + +static inline void +__register_fixture_variant(struct __fixture_metadata *f, + struct __fixture_variant_metadata *variant) +{ + __LIST_APPEND(f->variant, variant); +} + /* Contains all the information for test execution and status checking. */ struct __test_metadata { const char *name; - void (*fn)(struct __test_metadata *); + void (*fn)(struct __test_metadata *, + struct __fixture_variant_metadata *); pid_t pid; /* pid of test when being run */ + struct __fixture_metadata *fixture; int termsig; int passed; int trigger; /* extra handler after the evaluation */ @@ -646,15 +781,6 @@ struct __test_metadata { struct __test_metadata *prev, *next; }; -/* Storage for the (global) tests to be run. */ -static struct __test_metadata *__test_list; -static unsigned int __test_count; -static unsigned int __fixture_count; -static int __constructor_order; - -#define _CONSTRUCTOR_ORDER_FORWARD 1 -#define _CONSTRUCTOR_ORDER_BACKWARD -1 - /* * Since constructors are called in reverse order, reverse the test * list so tests are run in source declaration order. @@ -666,25 +792,7 @@ static int __constructor_order; */ static inline void __register_test(struct __test_metadata *t) { - __test_count++; - /* Circular linked list where only prev is circular. */ - if (__test_list == NULL) { - __test_list = t; - t->next = NULL; - t->prev = t; - return; - } - if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { - t->next = NULL; - t->prev = __test_list->prev; - t->prev->next = t; - __test_list->prev = t; - } else { - t->next = __test_list; - t->next->prev = t; - t->prev = t; - __test_list = t; - } + __LIST_APPEND(t->fixture->tests, t); } static inline int __bail(int for_realz, bool no_print, __u8 step) @@ -790,43 +898,67 @@ void __wait_for_test(struct __test_metadata *t) } } -void __run_test(struct __test_metadata *t) +void __run_test(struct __fixture_metadata *f, + struct __fixture_variant_metadata *variant, + struct __test_metadata *t) { + /* reset test struct */ t->passed = 1; t->trigger = 0; - printf("[ RUN ] %s\n", t->name); + t->step = 0; + t->no_print = 0; + + printf("[ RUN ] %s%s%s.%s\n", + f->name, variant->name[0] ? "." : "", variant->name, t->name); t->pid = fork(); if (t->pid < 0) { printf("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { - t->fn(t); + t->fn(t, variant); /* return the step that failed or 0 */ _exit(t->passed ? 0 : t->step); } else { __wait_for_test(t); } - printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name); + printf("[ %4s ] %s%s%s.%s\n", (t->passed ? "OK" : "FAIL"), + f->name, variant->name[0] ? "." : "", variant->name, t->name); } static int test_harness_run(int __attribute__((unused)) argc, char __attribute__((unused)) **argv) { + struct __fixture_variant_metadata no_variant = { .name = "", }; + struct __fixture_variant_metadata *v; + struct __fixture_metadata *f; struct __test_metadata *t; int ret = 0; + unsigned int case_count = 0, test_count = 0; unsigned int count = 0; unsigned int pass_count = 0; + for (f = __fixture_list; f; f = f->next) { + for (v = f->variant ?: &no_variant; v; v = v->next) { + case_count++; + for (t = f->tests; t; t = t->next) + test_count++; + } + } + /* TODO(wad) add optional arguments similar to gtest. */ printf("[==========] Running %u tests from %u test cases.\n", - __test_count, __fixture_count + 1); - for (t = __test_list; t; t = t->next) { - count++; - __run_test(t); - if (t->passed) - pass_count++; - else - ret = 1; + test_count, case_count); + for (f = __fixture_list; f; f = f->next) { + for (v = f->variant ?: &no_variant; v; v = v->next) { + for (t = f->tests; t; t = t->next) { + count++; + __run_test(f, v, t); + if (t->passed) + pass_count++; + else + ret = 1; + } + } } printf("[==========] %u / %u tests passed.\n", pass_count, count); printf("[ %s ]\n", (ret ? "FAILED" : "PASSED")); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3f386eb9e7d7..895ec992b2f1 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -16,6 +16,7 @@ TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS += txtimestamp.sh +TEST_PROGS += vrf-xfrm-tests.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 6560ed796ac4..dee567f7576a 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -19,8 +19,8 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime" -IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime" +IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture" +IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture" ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}" TESTS="${ALL_TESTS}" @@ -146,35 +146,36 @@ setup() create_ns remote IP="ip -netns me" + BRIDGE="bridge -netns me" set -e $IP li add veth1 type veth peer name veth2 $IP li set veth1 up $IP addr add 172.16.1.1/24 dev veth1 - $IP -6 addr add 2001:db8:91::1/64 dev veth1 + $IP -6 addr add 2001:db8:91::1/64 dev veth1 nodad $IP li add veth3 type veth peer name veth4 $IP li set veth3 up $IP addr add 172.16.2.1/24 dev veth3 - $IP -6 addr add 2001:db8:92::1/64 dev veth3 + $IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad $IP li set veth2 netns peer up ip -netns peer addr add 172.16.1.2/24 dev veth2 - ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 + ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad $IP li set veth4 netns peer up ip -netns peer addr add 172.16.2.2/24 dev veth4 - ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 + ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad ip -netns remote li add veth5 type veth peer name veth6 ip -netns remote li set veth5 up ip -netns remote addr add dev veth5 172.16.101.1/24 - ip -netns remote addr add dev veth5 2001:db8:101::1/64 + ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2 ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2 ip -netns remote li set veth6 netns peer up ip -netns peer addr add dev veth6 172.16.101.2/24 - ip -netns peer addr add dev veth6 2001:db8:101::2/64 + ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad set +e } @@ -248,11 +249,247 @@ check_route6() local expected="$2" local out - out=$($IP -6 route ls match ${pfx} 2>/dev/null) + out=$($IP -6 route ls match ${pfx} 2>/dev/null | sed -e 's/pref medium//') check_output "${out}" "${expected}" } +check_large_grp() +{ + local ipv=$1 + local ecmp=$2 + local grpnum=100 + local nhidstart=100 + local grpidstart=1000 + local iter=0 + local nhidstr="" + local grpidstr="" + local grpstr="" + local ipstr="" + + if [ $ipv -eq 4 ]; then + ipstr="172.16.1." + else + ipstr="2001:db8:91::" + fi + + # + # Create $grpnum groups with specified $ecmp and dump them + # + + # create nexthops with different gateways + iter=2 + while [ $iter -le $(($ecmp + 1)) ] + do + nhidstr="$(($nhidstart + $iter))" + run_cmd "$IP nexthop add id $nhidstr via $ipstr$iter dev veth1" + check_nexthop "id $nhidstr" "id $nhidstr via $ipstr$iter dev veth1 scope link" + + if [ $iter -le $ecmp ]; then + grpstr+="$nhidstr/" + else + grpstr+="$nhidstr" + fi + ((iter++)) + done + + # create duplicate large ecmp groups + iter=0 + while [ $iter -le $grpnum ] + do + grpidstr="$(($grpidstart + $iter))" + run_cmd "$IP nexthop add id $grpidstr group $grpstr" + check_nexthop "id $grpidstr" "id $grpidstr group $grpstr" + ((iter++)) + done + + # dump large groups + run_cmd "$IP nexthop list" + log_test $? 0 "Dump large (x$ecmp) ecmp groups" +} + +start_ip_monitor() +{ + local mtype=$1 + + # start the monitor in the background + tmpfile=`mktemp /var/run/nexthoptestXXX` + mpid=`($IP monitor $mtype > $tmpfile & echo $!) 2>/dev/null` + sleep 0.2 + echo "$mpid $tmpfile" +} + +stop_ip_monitor() +{ + local mpid=$1 + local tmpfile=$2 + local el=$3 + + # check the monitor results + kill $mpid + lines=`wc -l $tmpfile | cut "-d " -f1` + test $lines -eq $el + rc=$? + rm -rf $tmpfile + + return $rc +} + +check_nexthop_fdb_support() +{ + $IP nexthop help 2>&1 | grep -q fdb + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing fdb nexthop support" + return $ksft_skip + fi +} + +ipv6_fdb_grp_fcnal() +{ + local rc + + echo + echo "IPv6 fdb groups functional" + echo "--------------------------" + + check_nexthop_fdb_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # create group with multiple nexthops + run_cmd "$IP nexthop add id 61 via 2001:db8:91::2 fdb" + run_cmd "$IP nexthop add id 62 via 2001:db8:91::3 fdb" + run_cmd "$IP nexthop add id 102 group 61/62 fdb" + check_nexthop "id 102" "id 102 group 61/62 fdb" + log_test $? 0 "Fdb Nexthop group with multiple nexthops" + + ## get nexthop group + run_cmd "$IP nexthop get id 102" + check_nexthop "id 102" "id 102 group 61/62 fdb" + log_test $? 0 "Get Fdb nexthop group by id" + + # fdb nexthop group can only contain fdb nexthops + run_cmd "$IP nexthop add id 63 via 2001:db8:91::4" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::5" + run_cmd "$IP nexthop add id 103 group 63/64 fdb" + log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" + + # Non fdb nexthop group can not contain fdb nexthops + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 fdb" + run_cmd "$IP nexthop add id 66 via 2001:db8:91::6 fdb" + run_cmd "$IP nexthop add id 104 group 65/66" + log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops" + + # fdb nexthop cannot have blackhole + run_cmd "$IP nexthop add id 67 blackhole fdb" + log_test $? 2 "Fdb Nexthop with blackhole" + + # fdb nexthop with oif + run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 dev veth1 fdb" + log_test $? 2 "Fdb Nexthop with oif" + + # fdb nexthop with onlink + run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 onlink fdb" + log_test $? 2 "Fdb Nexthop with onlink" + + # fdb nexthop with encap + run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb" + log_test $? 2 "Fdb Nexthop with encap" + + run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100" + run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" + log_test $? 0 "Fdb mac add with nexthop group" + + ## fdb nexthops can only reference nexthop groups and not nexthops + run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 61 self" + log_test $? 255 "Fdb mac add with nexthop" + + run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 66" + log_test $? 2 "Route add with fdb nexthop" + + run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103" + log_test $? 2 "Route add with fdb nexthop group" + + run_cmd "$IP nexthop del id 102" + log_test $? 0 "Fdb nexthop delete" + + $IP link del dev vx10 +} + +ipv4_fdb_grp_fcnal() +{ + local rc + + echo + echo "IPv4 fdb groups functional" + echo "--------------------------" + + check_nexthop_fdb_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # create group with multiple nexthops + run_cmd "$IP nexthop add id 12 via 172.16.1.2 fdb" + run_cmd "$IP nexthop add id 13 via 172.16.1.3 fdb" + run_cmd "$IP nexthop add id 102 group 12/13 fdb" + check_nexthop "id 102" "id 102 group 12/13 fdb" + log_test $? 0 "Fdb Nexthop group with multiple nexthops" + + # get nexthop group + run_cmd "$IP nexthop get id 102" + check_nexthop "id 102" "id 102 group 12/13 fdb" + log_test $? 0 "Get Fdb nexthop group by id" + + # fdb nexthop group can only contain fdb nexthops + run_cmd "$IP nexthop add id 14 via 172.16.1.2" + run_cmd "$IP nexthop add id 15 via 172.16.1.3" + run_cmd "$IP nexthop add id 103 group 14/15 fdb" + log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" + + # Non fdb nexthop group can not contain fdb nexthops + run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb" + run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb" + run_cmd "$IP nexthop add id 104 group 14/15" + log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops" + + # fdb nexthop cannot have blackhole + run_cmd "$IP nexthop add id 18 blackhole fdb" + log_test $? 2 "Fdb Nexthop with blackhole" + + # fdb nexthop with oif + run_cmd "$IP nexthop add id 16 via 172.16.1.2 dev veth1 fdb" + log_test $? 2 "Fdb Nexthop with oif" + + # fdb nexthop with onlink + run_cmd "$IP nexthop add id 16 via 172.16.1.2 onlink fdb" + log_test $? 2 "Fdb Nexthop with onlink" + + # fdb nexthop with encap + run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb" + log_test $? 2 "Fdb Nexthop with encap" + + run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100" + run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" + log_test $? 0 "Fdb mac add with nexthop group" + + # fdb nexthops can only reference nexthop groups and not nexthops + run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self" + log_test $? 255 "Fdb mac add with nexthop" + + run_cmd "$IP ro add 172.16.0.0/22 nhid 15" + log_test $? 2 "Route add with fdb nexthop" + + run_cmd "$IP ro add 172.16.0.0/22 nhid 103" + log_test $? 2 "Route add with fdb nexthop group" + + run_cmd "$IP nexthop del id 102" + log_test $? 0 "Fdb nexthop delete" + + $IP link del dev vx10 +} + ################################################################################ # basic operations (add, delete, replace) on nexthops and nexthop groups # @@ -423,8 +660,6 @@ ipv6_fcnal_runtime() echo "IPv6 functional runtime" echo "-----------------------" - sleep 5 - # # IPv6 - the basics # @@ -481,12 +716,12 @@ ipv6_fcnal_runtime() run_cmd "$IP -6 nexthop add id 85 dev veth1" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85" log_test $? 0 "IPv6 route with device only nexthop" - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024 pref medium" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024" run_cmd "$IP nexthop add id 123 group 81/85" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123" log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw" - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1 pref medium" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1" # # IPv6 route with v4 nexthop - not allowed @@ -519,6 +754,75 @@ ipv6_fcnal_runtime() # route with src address and using nexthop - not allowed } +ipv6_large_grp() +{ + local ecmp=32 + + echo + echo "IPv6 large groups (x$ecmp)" + echo "---------------------" + + check_large_grp 6 $ecmp + + $IP nexthop flush >/dev/null 2>&1 +} + +ipv6_del_add_loop1() +{ + while :; do + $IP nexthop del id 100 + $IP nexthop add id 100 via 2001:db8:91::2 dev veth1 + done >/dev/null 2>&1 +} + +ipv6_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 + done >/dev/null 2>&1 +} + +ipv6_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv6 runtime torture" + echo "--------------------" + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101" + run_cmd "$IP route add 2001:db8:101::1 nhid 102" + run_cmd "$IP route add 2001:db8:101::2 nhid 102" + + ipv6_del_add_loop1 & + pid1=$! + ipv6_grp_replace_loop & + pid2=$! + ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + pid3=$! + ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + pid4=$! + ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + + # if we did not crash, success + log_test 0 0 "IPv6 torture test" +} + + ipv4_fcnal() { local rc @@ -866,6 +1170,11 @@ ipv4_fcnal_runtime() $IP neigh sh | grep 'dev veth1' fi + run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1" + run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1" + run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + log_test $? 0 "IPv4 default route with IPv6 gateway" + # # MPLS as an example of LWT encap # @@ -880,6 +1189,241 @@ ipv4_fcnal_runtime() log_test $? 0 "IPv4 route with MPLS encap, v6 gw - check" } +ipv4_large_grp() +{ + local ecmp=32 + + echo + echo "IPv4 large groups (x$ecmp)" + echo "---------------------" + + check_large_grp 4 $ecmp + + $IP nexthop flush >/dev/null 2>&1 +} + +sysctl_nexthop_compat_mode_check() +{ + local sysctlname="net.ipv4.nexthop_compat_mode" + local lprefix=$1 + + IPE="ip netns exec me" + + $IPE sysctl -q $sysctlname 2>&1 >/dev/null + if [ $? -ne 0 ]; then + echo "SKIP: kernel lacks nexthop compat mode sysctl control" + return $ksft_skip + fi + + out=$($IPE sysctl $sysctlname 2>/dev/null) + log_test $? 0 "$lprefix default nexthop compat mode check" + check_output "${out}" "$sysctlname = 1" +} + +sysctl_nexthop_compat_mode_set() +{ + local sysctlname="net.ipv4.nexthop_compat_mode" + local mode=$1 + local lprefix=$2 + + IPE="ip netns exec me" + + out=$($IPE sysctl -w $sysctlname=$mode) + log_test $? 0 "$lprefix set compat mode - $mode" + check_output "${out}" "net.ipv4.nexthop_compat_mode = $mode" +} + +ipv6_compat_mode() +{ + local rc + + echo + echo "IPv6 nexthop api compat mode test" + echo "--------------------------------" + + sysctl_nexthop_compat_mode_check "IPv6" + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 122 group 62/63" + ipmout=$(start_ip_monitor route) + + run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122" + # route add notification should contain expanded nexthops + stop_ip_monitor $ipmout 3 + log_test $? 0 "IPv6 compat mode on - route add notification" + + # route dump should contain expanded nexthops + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop via 2001:db8:91::3 dev veth1 weight 1" + log_test $? 0 "IPv6 compat mode on - route dump" + + # change in nexthop group should generate route notification + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + ipmout=$(start_ip_monitor route) + run_cmd "$IP nexthop replace id 122 group 62/64" + stop_ip_monitor $ipmout 3 + + log_test $? 0 "IPv6 compat mode on - nexthop change" + + # set compat mode off + sysctl_nexthop_compat_mode_set 0 "IPv6" + + run_cmd "$IP -6 ro del 2001:db8:101::1/128 nhid 122" + + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 122 group 62/63" + ipmout=$(start_ip_monitor route) + + run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122" + # route add notification should not contain expanded nexthops + stop_ip_monitor $ipmout 1 + log_test $? 0 "IPv6 compat mode off - route add notification" + + # route dump should not contain expanded nexthops + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024" + log_test $? 0 "IPv6 compat mode off - route dump" + + # change in nexthop group should not generate route notification + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + ipmout=$(start_ip_monitor route) + run_cmd "$IP nexthop replace id 122 group 62/64" + stop_ip_monitor $ipmout 0 + log_test $? 0 "IPv6 compat mode off - nexthop change" + + # nexthop delete should not generate route notification + ipmout=$(start_ip_monitor route) + run_cmd "$IP nexthop del id 122" + stop_ip_monitor $ipmout 0 + log_test $? 0 "IPv6 compat mode off - nexthop delete" + + # set compat mode back on + sysctl_nexthop_compat_mode_set 1 "IPv6" +} + +ipv4_compat_mode() +{ + local rc + + echo + echo "IPv4 nexthop api compat mode" + echo "----------------------------" + + sysctl_nexthop_compat_mode_check "IPv4" + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + run_cmd "$IP nexthop add id 21 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 22 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 122 group 21/22" + ipmout=$(start_ip_monitor route) + + run_cmd "$IP ro add 172.16.101.1/32 nhid 122" + stop_ip_monitor $ipmout 3 + + # route add notification should contain expanded nexthops + log_test $? 0 "IPv4 compat mode on - route add notification" + + # route dump should contain expanded nexthops + check_route "172.16.101.1" "172.16.101.1 nhid 122 nexthop via 172.16.1.2 dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" + log_test $? 0 "IPv4 compat mode on - route dump" + + # change in nexthop group should generate route notification + run_cmd "$IP nexthop add id 23 via 172.16.1.3 dev veth1" + ipmout=$(start_ip_monitor route) + run_cmd "$IP nexthop replace id 122 group 21/23" + stop_ip_monitor $ipmout 3 + log_test $? 0 "IPv4 compat mode on - nexthop change" + + sysctl_nexthop_compat_mode_set 0 "IPv4" + + # cleanup + run_cmd "$IP ro del 172.16.101.1/32 nhid 122" + + ipmout=$(start_ip_monitor route) + run_cmd "$IP ro add 172.16.101.1/32 nhid 122" + stop_ip_monitor $ipmout 1 + # route add notification should not contain expanded nexthops + log_test $? 0 "IPv4 compat mode off - route add notification" + + # route dump should not contain expanded nexthops + check_route "172.16.101.1" "172.16.101.1 nhid 122" + log_test $? 0 "IPv4 compat mode off - route dump" + + # change in nexthop group should not generate route notification + ipmout=$(start_ip_monitor route) + run_cmd "$IP nexthop replace id 122 group 21/22" + stop_ip_monitor $ipmout 0 + log_test $? 0 "IPv4 compat mode off - nexthop change" + + # nexthop delete should not generate route notification + ipmout=$(start_ip_monitor route) + run_cmd "$IP nexthop del id 122" + stop_ip_monitor $ipmout 0 + log_test $? 0 "IPv4 compat mode off - nexthop delete" + + sysctl_nexthop_compat_mode_set 1 "IPv4" +} + +ipv4_del_add_loop1() +{ + while :; do + $IP nexthop del id 100 + $IP nexthop add id 100 via 172.16.1.2 dev veth1 + done >/dev/null 2>&1 +} + +ipv4_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 + done >/dev/null 2>&1 +} + +ipv4_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv4 runtime torture" + echo "--------------------" + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101" + run_cmd "$IP route add 172.16.101.1 nhid 102" + run_cmd "$IP route add 172.16.101.2 nhid 102" + + ipv4_del_add_loop1 & + pid1=$! + ipv4_grp_replace_loop & + pid2=$! + ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + pid3=$! + ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + pid4=$! + ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + + # if we did not crash, success + log_test 0 0 "IPv4 torture test" +} + basic() { echo diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 155d48bd4d9e..f0e6be4c09e9 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -365,7 +365,9 @@ devlink_trap_group_stats_idle_test() devlink_trap_exception_test() { local trap_name=$1; shift - local group_name=$1; shift + local group_name + + group_name=$(devlink_trap_group_get $trap_name) devlink_trap_stats_idle_test $trap_name check_fail $? "Trap stats idle when packets should have been trapped" @@ -377,9 +379,11 @@ devlink_trap_exception_test() devlink_trap_drop_test() { local trap_name=$1; shift - local group_name=$1; shift local dev=$1; shift local handle=$1; shift + local group_name + + group_name=$(devlink_trap_group_get $trap_name) # This is the common part of all the tests. It checks that stats are # initially idle, then non-idle after changing the trap action and @@ -390,7 +394,6 @@ devlink_trap_drop_test() devlink_trap_group_stats_idle_test $group_name check_err $? "Trap group stats not idle with initial drop action" - devlink_trap_action_set $trap_name "trap" devlink_trap_stats_idle_test $trap_name check_fail $? "Trap stats idle after setting action to trap" @@ -420,6 +423,29 @@ devlink_trap_drop_cleanup() tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } +devlink_trap_stats_test() +{ + local test_name=$1; shift + local trap_name=$1; shift + local send_one="$@" + local t0_packets + local t1_packets + + RET=0 + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + + $send_one && sleep 1 + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t1_packets -eq $t0_packets ]]; then + check_err 1 "Trap stats did not increase" + fi + + log_test "$test_name" +} + devlink_trap_policers_num_get() { devlink -j -p trap policer show | jq '.[]["'$DEVLINK_DEV'"] | length' diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 00797597fcf5..c33bfd7ba214 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -29,11 +29,9 @@ mirror_test() local pref=$1; shift local expect=$1; shift - local ping_timeout=$((PING_TIMEOUT * 5)) local t0=$(tc_rule_stats_get $dev $pref) - ip vrf exec $vrf_name \ - ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.5 -w $ping_timeout \ - &> /dev/null + $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ + -c 10 -d 100ms -t icmp type=8 sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh index b50081855913..55eeacf59241 100755 --- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -20,10 +20,14 @@ ALL_TESTS=" ping_ipv4 + ping_ipv6 test_ip_dsfield test_ip_dscp test_ip_ecn test_ip_dscp_ecn + test_ip6_dsfield + test_ip6_dscp + test_ip6_ecn " NUM_NETIFS=4 @@ -107,6 +111,11 @@ ping_ipv4() ping_test $h1 192.0.2.2 } +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + do_test_pedit_dsfield_common() { local pedit_locus=$1; shift @@ -123,7 +132,12 @@ do_test_pedit_dsfield_common() local pkts pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \ tc_rule_handle_stats_get "dev $h2 ingress" 101) - check_err $? "Expected to get 10 packets, but got $pkts." + check_err $? "Expected to get 10 packets on test probe, but got $pkts." + + pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101) + ((pkts >= 10)) + check_err $? "Expected to get 10 packets on pedit rule, but got $pkts." + log_test "$pedit_locus pedit $pedit_action" } @@ -228,6 +242,63 @@ test_ip_dscp_ecn() do_test_ip_dscp_ecn "dev $swp2 egress" } +do_test_ip6_dsfield() +{ + local locus=$1; shift + local dsfield + + for dsfield in 0 1 2 3 128 252 253 254 255; do + do_test_pedit_dsfield "$locus" \ + "ip6 traffic_class set $dsfield" \ + ipv6 "ip_tos $dsfield" \ + "-6 -A 2001:db8:1::1 -B 2001:db8:1::2" + done +} + +test_ip6_dsfield() +{ + do_test_ip6_dsfield "dev $swp1 ingress" + do_test_ip6_dsfield "dev $swp2 egress" +} + +do_test_ip6_dscp() +{ + local locus=$1; shift + local dscp + + for dscp in 0 1 2 3 32 61 62 63; do + do_test_pedit_dsfield "$locus" \ + "ip6 traffic_class set $((dscp << 2)) retain 0xfc" \ + ipv6 "ip_tos $(((dscp << 2) | 1))" \ + "-6 -A 2001:db8:1::1 -B 2001:db8:1::2" + done +} + +test_ip6_dscp() +{ + do_test_ip6_dscp "dev $swp1 ingress" + do_test_ip6_dscp "dev $swp2 egress" +} + +do_test_ip6_ecn() +{ + local locus=$1; shift + local ecn + + for ecn in 0 1 2 3; do + do_test_pedit_dsfield "$locus" \ + "ip6 traffic_class set $ecn retain 0x3" \ + ipv6 "ip_tos $((124 | $ecn))" \ + "-6 -A 2001:db8:1::1 -B 2001:db8:1::2" + done +} + +test_ip6_ecn() +{ + do_test_ip6_ecn "dev $swp1 ingress" + do_test_ip6_ecn "dev $swp2 egress" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 813d02d1939d..d9eca227136b 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -2,7 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ - mirred_egress_mirror_test gact_trap_test" + mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ + gact_trap_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -50,6 +51,9 @@ switch_destroy() mirred_egress_test() { local action=$1 + local protocol=$2 + local classifier=$3 + local classifier_args=$4 RET=0 @@ -62,9 +66,9 @@ mirred_egress_test() tc_check_packets "dev $h2 ingress" 101 1 check_fail $? "Matched without redirect rule inserted" - tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ - $tcflags dst_ip 192.0.2.2 action mirred egress $action \ - dev $swp2 + tc filter add dev $swp1 ingress protocol $protocol pref 1 handle 101 \ + $classifier $tcflags $classifier_args \ + action mirred egress $action dev $swp2 $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -72,10 +76,11 @@ mirred_egress_test() tc_check_packets "dev $h2 ingress" 101 1 check_err $? "Did not match incoming $action packet" - tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol $protocol pref 1 handle 101 \ + $classifier tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower - log_test "mirred egress $action ($tcflags)" + log_test "mirred egress $classifier $action ($tcflags)" } gact_drop_and_ok_test() @@ -187,12 +192,17 @@ cleanup() mirred_egress_redirect_test() { - mirred_egress_test "redirect" + mirred_egress_test "redirect" "ip" "flower" "dst_ip 192.0.2.2" } mirred_egress_mirror_test() { - mirred_egress_test "mirror" + mirred_egress_test "mirror" "ip" "flower" "dst_ip 192.0.2.2" +} + +matchall_mirred_egress_mirror_test() +{ + mirred_egress_test "mirror" "all" "matchall" "" } trap cleanup EXIT diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c index c0c9ecb891e1..f9ed749fd8c7 100644 --- a/tools/testing/selftests/net/ip_defrag.c +++ b/tools/testing/selftests/net/ip_defrag.c @@ -192,9 +192,9 @@ static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen, } res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); - if (res < 0) + if (res < 0 && errno != EPERM) error(1, errno, "send_fragment"); - if (res != frag_len) + if (res >= 0 && res != frag_len) error(1, 0, "send_fragment: %d vs %d", res, frag_len); frag_counter++; @@ -313,9 +313,9 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, iphdr->ip_len = htons(frag_len); } res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); - if (res < 0) + if (res < 0 && errno != EPERM) error(1, errno, "sendto overlap: %d", frag_len); - if (res != frag_len) + if (res >= 0 && res != frag_len) error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len); frag_counter++; } diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 71a62e7e35b1..77c09cd339c3 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -67,6 +67,10 @@ # Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6 # encapsulation (GUE) over IPv4/IPv6, instead of VXLAN # +# - pmtu_ipv{4,6}_ipv{4,6}_exception +# Same as pmtu_ipv4_vxlan4, but using a IPv4/IPv6 tunnel over IPv4/IPv6, +# instead of VXLAN +# # - pmtu_vti4_exception # Set up vti tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is not @@ -151,6 +155,10 @@ tests=" pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1 pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1 pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1 + pmtu_ipv4_ipv4_exception IPv4 over IPv4: PMTU exceptions 1 + pmtu_ipv6_ipv4_exception IPv6 over IPv4: PMTU exceptions 1 + pmtu_ipv4_ipv6_exception IPv4 over IPv6: PMTU exceptions 1 + pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 pmtu_vti6_exception vti6: PMTU exceptions 0 pmtu_vti4_exception vti4: PMTU exceptions 0 pmtu_vti4_default_mtu vti4: default MTU assignment 0 @@ -363,6 +371,62 @@ setup_gue66() { setup_fou_or_gue 6 6 gue } +setup_ipvX_over_ipvY() { + inner=${1} + outer=${2} + + if [ "${outer}" -eq 4 ]; then + a_addr="${prefix4}.${a_r1}.1" + b_addr="${prefix4}.${b_r1}.1" + if [ "${inner}" -eq 4 ]; then + type="ipip" + mode="ipip" + else + type="sit" + mode="ip6ip" + fi + else + a_addr="${prefix6}:${a_r1}::1" + b_addr="${prefix6}:${b_r1}::1" + type="ip6tnl" + if [ "${inner}" -eq 4 ]; then + mode="ipip6" + else + mode="ip6ip6" + fi + fi + + run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2 + run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode} + + run_cmd ${ns_a} ip link set ip_a up + run_cmd ${ns_b} ip link set ip_b up + + if [ "${inner}" = "4" ]; then + run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ip_a + run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ip_b + else + run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ip_a + run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ip_b + fi +} + +setup_ip4ip4() { + setup_ipvX_over_ipvY 4 4 +} + +setup_ip6ip4() { + setup_ipvX_over_ipvY 6 4 +} + +setup_ip4ip6() { + setup_ipvX_over_ipvY 4 6 +} + +setup_ip6ip6() { + setup_ipvX_over_ipvY 6 6 +} + setup_namespaces() { for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do ip netns add ${n} || return 1 @@ -908,6 +972,64 @@ test_pmtu_ipv6_gue6_exception() { test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue } +test_pmtu_ipvX_over_ipvY_exception() { + inner=${1} + outer=${2} + ll_mtu=4000 + + setup namespaces routing ip${inner}ip${outer} || return 2 + + trace "${ns_a}" ip_a "${ns_b}" ip_b \ + "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B + + if [ ${inner} -eq 4 ]; then + ping=ping + dst=${tunnel4_b_addr} + else + ping=${ping6} + dst=${tunnel6_b_addr} + fi + + if [ ${outer} -eq 4 ]; then + # IPv4 header + exp_mtu=$((${ll_mtu} - 20)) + else + # IPv6 header Option 4 + exp_mtu=$((${ll_mtu} - 40 - 8)) + fi + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "${ns_a}" ip_a $((${ll_mtu} + 1000)) || return + mtu "${ns_b}" ip_b $((${ll_mtu} + 1000)) || return + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ip${inner}ip${outer} interface" +} + +test_pmtu_ipv4_ipv4_exception() { + test_pmtu_ipvX_over_ipvY_exception 4 4 +} + +test_pmtu_ipv6_ipv4_exception() { + test_pmtu_ipvX_over_ipvY_exception 6 4 +} + +test_pmtu_ipv4_ipv6_exception() { + test_pmtu_ipvX_over_ipvY_exception 4 6 +} + +test_pmtu_ipv6_ipv6_exception() { + test_pmtu_ipvX_over_ipvY_exception 6 6 +} + test_pmtu_vti4_exception() { setup namespaces veth vti4 xfrm4 || return 2 trace "${ns_a}" veth_a "${ns_b}" veth_b \ diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 0ea44d975b6c..c5282e62df75 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -101,6 +101,21 @@ FIXTURE(tls) bool notls; }; +FIXTURE_VARIANT(tls) +{ + unsigned int tls_version; +}; + +FIXTURE_VARIANT_ADD(tls, 12) +{ + .tls_version = TLS_1_2_VERSION, +}; + +FIXTURE_VARIANT_ADD(tls, 13) +{ + .tls_version = TLS_1_3_VERSION, +}; + FIXTURE_SETUP(tls) { struct tls12_crypto_info_aes_gcm_128 tls12; @@ -112,7 +127,7 @@ FIXTURE_SETUP(tls) len = sizeof(addr); memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = TLS_1_3_VERSION; + tls12.info.version = variant->tls_version; tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128; addr.sin_family = AF_INET; @@ -733,7 +748,7 @@ TEST_F(tls, bidir) struct tls12_crypto_info_aes_gcm_128 tls12; memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = TLS_1_3_VERSION; + tls12.info.version = variant->tls_version; tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128; ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, @@ -1258,78 +1273,4 @@ TEST(keysizes) { close(cfd); } -TEST(tls12) { - int fd, cfd; - bool notls; - - struct tls12_crypto_info_aes_gcm_128 tls12; - struct sockaddr_in addr; - socklen_t len; - int sfd, ret; - - notls = false; - len = sizeof(addr); - - memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = TLS_1_2_VERSION; - tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128; - - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = 0; - - fd = socket(AF_INET, SOCK_STREAM, 0); - sfd = socket(AF_INET, SOCK_STREAM, 0); - - ret = bind(sfd, &addr, sizeof(addr)); - ASSERT_EQ(ret, 0); - ret = listen(sfd, 10); - ASSERT_EQ(ret, 0); - - ret = getsockname(sfd, &addr, &len); - ASSERT_EQ(ret, 0); - - ret = connect(fd, &addr, sizeof(addr)); - ASSERT_EQ(ret, 0); - - ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); - if (ret != 0) { - notls = true; - printf("Failure setting TCP_ULP, testing without tls\n"); - } - - if (!notls) { - ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); - ASSERT_EQ(ret, 0); - } - - cfd = accept(sfd, &addr, &len); - ASSERT_GE(cfd, 0); - - if (!notls) { - ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", - sizeof("tls")); - ASSERT_EQ(ret, 0); - - ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); - ASSERT_EQ(ret, 0); - } - - close(sfd); - - char const *test_str = "test_read"; - int send_len = 10; - char buf[10]; - - send_len = strlen(test_str) + 1; - EXPECT_EQ(send(fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(cfd, buf, send_len, 0), -1); - EXPECT_EQ(memcmp(buf, test_str, send_len), 0); - - close(fd); - close(cfd); -} - TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh new file mode 100755 index 000000000000..184da81f554f --- /dev/null +++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh @@ -0,0 +1,436 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Various combinations of VRF with xfrms and qdisc. + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +PAUSE_ON_FAIL=no +VERBOSE=0 +ret=0 + +HOST1_4=192.168.1.1 +HOST2_4=192.168.1.2 +HOST1_6=2001:db8:1::1 +HOST2_6=2001:db8:1::2 + +XFRM1_4=10.0.1.1 +XFRM2_4=10.0.1.2 +XFRM1_6=fc00:1000::1 +XFRM2_6=fc00:1000::2 +IF_ID=123 + +VRF=red +TABLE=300 + +AUTH_1=0xd94fcfea65fddf21dc6e0d24a0253508 +AUTH_2=0xdc6e0d24a0253508d94fcfea65fddf21 +ENC_1=0xfc46c20f8048be9725930ff3fb07ac2a91f0347dffeacf62 +ENC_2=0x3fb07ac2a91f0347dffeacf62fc46c20f8048be9725930ff +SPI_1=0x02122b77 +SPI_2=0x2b770212 + +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + +################################################################################ +# +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +run_cmd_host1() +{ + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: $cmd\n" + fi + + out=$(eval ip netns exec host1 $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" ]; then + if [ -n "$out" ]; then + echo + echo " $out" + fi + echo + fi + + return $rc +} + +################################################################################ +# create namespaces for hosts and sws + +create_vrf() +{ + local ns=$1 + local vrf=$2 + local table=$3 + + if [ -n "${ns}" ]; then + ns="-netns ${ns}" + fi + + ip ${ns} link add ${vrf} type vrf table ${table} + ip ${ns} link set ${vrf} up + ip ${ns} route add vrf ${vrf} unreachable default metric 8192 + ip ${ns} -6 route add vrf ${vrf} unreachable default metric 8192 + + ip ${ns} addr add 127.0.0.1/8 dev ${vrf} + ip ${ns} -6 addr add ::1 dev ${vrf} nodad + + ip ${ns} ru del pref 0 + ip ${ns} ru add pref 32765 from all lookup local + ip ${ns} -6 ru del pref 0 + ip ${ns} -6 ru add pref 32765 from all lookup local +} + +create_ns() +{ + local ns=$1 + local addr=$2 + local addr6=$3 + + [ -z "${addr}" ] && addr="-" + [ -z "${addr6}" ] && addr6="-" + + ip netns add ${ns} + + ip -netns ${ns} link set lo up + if [ "${addr}" != "-" ]; then + ip -netns ${ns} addr add dev lo ${addr} + fi + if [ "${addr6}" != "-" ]; then + ip -netns ${ns} -6 addr add dev lo ${addr6} + fi + + ip -netns ${ns} ro add unreachable default metric 8192 + ip -netns ${ns} -6 ro add unreachable default metric 8192 + + ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0 +} + +# create veth pair to connect namespaces and apply addresses. +connect_ns() +{ + local ns1=$1 + local ns1_dev=$2 + local ns1_addr=$3 + local ns1_addr6=$4 + local ns2=$5 + local ns2_dev=$6 + local ns2_addr=$7 + local ns2_addr6=$8 + local ns1arg + local ns2arg + + if [ -n "${ns1}" ]; then + ns1arg="-netns ${ns1}" + fi + if [ -n "${ns2}" ]; then + ns2arg="-netns ${ns2}" + fi + + ip ${ns1arg} li add ${ns1_dev} type veth peer name tmp + ip ${ns1arg} li set ${ns1_dev} up + ip ${ns1arg} li set tmp netns ${ns2} name ${ns2_dev} + ip ${ns2arg} li set ${ns2_dev} up + + if [ "${ns1_addr}" != "-" ]; then + ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr} + ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr} + fi + + if [ "${ns1_addr6}" != "-" ]; then + ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr6} nodad + ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr6} nodad + fi +} + +################################################################################ + +cleanup() +{ + ip netns del host1 + ip netns del host2 +} + +setup() +{ + create_ns "host1" + create_ns "host2" + + connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \ + "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64 + + create_vrf "host1" ${VRF} ${TABLE} + ip -netns host1 link set dev eth0 master ${VRF} +} + +cleanup_xfrm() +{ + for ns in host1 host2 + do + for x in state policy + do + ip -netns ${ns} xfrm ${x} flush + ip -6 -netns ${ns} xfrm ${x} flush + done + done +} + +setup_xfrm() +{ + local h1_4=$1 + local h2_4=$2 + local h1_6=$3 + local h2_6=$4 + local devarg="$5" + + # + # policy + # + + # host1 - IPv4 out + ip -netns host1 xfrm policy add \ + src ${h1_4} dst ${h2_4} ${devarg} dir out \ + tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel + + # host2 - IPv4 in + ip -netns host2 xfrm policy add \ + src ${h1_4} dst ${h2_4} dir in \ + tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel + + # host1 - IPv4 in + ip -netns host1 xfrm policy add \ + src ${h2_4} dst ${h1_4} ${devarg} dir in \ + tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel + + # host2 - IPv4 out + ip -netns host2 xfrm policy add \ + src ${h2_4} dst ${h1_4} dir out \ + tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel + + + # host1 - IPv6 out + ip -6 -netns host1 xfrm policy add \ + src ${h1_6} dst ${h2_6} ${devarg} dir out \ + tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel + + # host2 - IPv6 in + ip -6 -netns host2 xfrm policy add \ + src ${h1_6} dst ${h2_6} dir in \ + tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel + + # host1 - IPv6 in + ip -6 -netns host1 xfrm policy add \ + src ${h2_6} dst ${h1_6} ${devarg} dir in \ + tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel + + # host2 - IPv6 out + ip -6 -netns host2 xfrm policy add \ + src ${h2_6} dst ${h1_6} dir out \ + tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel + + # + # state + # + ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + proto esp spi ${SPI_1} reqid 0 mode tunnel \ + replay-window 4 replay-oseq 0x4 \ + auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ + enc 'cbc(des3_ede)' ${ENC_1} \ + sel src ${h1_4} dst ${h2_4} ${devarg} + + ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + proto esp spi ${SPI_1} reqid 0 mode tunnel \ + replay-window 4 replay-oseq 0x4 \ + auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ + enc 'cbc(des3_ede)' ${ENC_1} \ + sel src ${h1_4} dst ${h2_4} + + + ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + proto esp spi ${SPI_2} reqid 0 mode tunnel \ + replay-window 4 replay-oseq 0x4 \ + auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ + enc 'cbc(des3_ede)' ${ENC_2} \ + sel src ${h2_4} dst ${h1_4} ${devarg} + + ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + proto esp spi ${SPI_2} reqid 0 mode tunnel \ + replay-window 4 replay-oseq 0x4 \ + auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ + enc 'cbc(des3_ede)' ${ENC_2} \ + sel src ${h2_4} dst ${h1_4} + + + ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + proto esp spi ${SPI_1} reqid 0 mode tunnel \ + replay-window 4 replay-oseq 0x4 \ + auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ + enc 'cbc(des3_ede)' ${ENC_1} \ + sel src ${h1_6} dst ${h2_6} ${devarg} + + ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + proto esp spi ${SPI_1} reqid 0 mode tunnel \ + replay-window 4 replay-oseq 0x4 \ + auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ + enc 'cbc(des3_ede)' ${ENC_1} \ + sel src ${h1_6} dst ${h2_6} + + + ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + proto esp spi ${SPI_2} reqid 0 mode tunnel \ + replay-window 4 replay-oseq 0x4 \ + auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ + enc 'cbc(des3_ede)' ${ENC_2} \ + sel src ${h2_6} dst ${h1_6} ${devarg} + + ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + proto esp spi ${SPI_2} reqid 0 mode tunnel \ + replay-window 4 replay-oseq 0x4 \ + auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ + enc 'cbc(des3_ede)' ${ENC_2} \ + sel src ${h2_6} dst ${h1_6} +} + +cleanup_xfrm_dev() +{ + ip -netns host1 li del xfrm0 + ip -netns host2 addr del ${XFRM2_4}/24 dev eth0 + ip -netns host2 addr del ${XFRM2_6}/64 dev eth0 +} + +setup_xfrm_dev() +{ + local vrfarg="vrf ${VRF}" + + ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID} + ip -netns host1 li set xfrm0 ${vrfarg} up + ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0 + ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0 + + ip -netns host2 addr add ${XFRM2_4}/24 dev eth0 + ip -netns host2 addr add ${XFRM2_6}/64 dev eth0 + + setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}" +} + +run_tests() +{ + cleanup_xfrm + + # no IPsec + run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4} + log_test $? 0 "IPv4 no xfrm policy" + run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6} + log_test $? 0 "IPv6 no xfrm policy" + + # xfrm without VRF in sel + setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} + run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4} + log_test $? 0 "IPv4 xfrm policy based on address" + run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6} + log_test $? 0 "IPv6 xfrm policy based on address" + cleanup_xfrm + + # xfrm with VRF in sel + # Known failure: ipv4 resets the flow oif after the lookup. Fix is + # not straightforward. + # setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev ${VRF}" + # run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4} + # log_test $? 0 "IPv4 xfrm policy with VRF in selector" + run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6} + log_test $? 0 "IPv6 xfrm policy with VRF in selector" + cleanup_xfrm + + # xfrm with enslaved device in sel + # Known failures: combined with the above, __xfrm{4,6}_selector_match + # needs to consider both l3mdev and enslaved device index. + # setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev eth0" + # run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4} + # log_test $? 0 "IPv4 xfrm policy with enslaved device in selector" + # run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6} + # log_test $? 0 "IPv6 xfrm policy with enslaved device in selector" + # cleanup_xfrm + + # xfrm device + setup_xfrm_dev + run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${XFRM2_4} + log_test $? 0 "IPv4 xfrm policy with xfrm device" + run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${XFRM2_6} + log_test $? 0 "IPv6 xfrm policy with xfrm device" + cleanup_xfrm_dev +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -p Pause on fail + -v verbose mode (show commands and output) + +done +EOF +} + +################################################################################ +# main + +while getopts :pv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +cleanup 2>/dev/null +setup + +echo +echo "No qdisc on VRF device" +run_tests + +run_cmd_host1 tc qdisc add dev ${VRF} root netem delay 100ms +echo +echo "netem qdisc on VRF device" +run_tests + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} + +exit $ret diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index c0dd10257df5..da7a9dda9490 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -269,14 +269,16 @@ int main(int argc, char *argv[]) " %d programmable periodic signals\n" " %d pulse per second\n" " %d programmable pins\n" - " %d cross timestamping\n", + " %d cross timestamping\n" + " %d adjust_phase\n", caps.max_adj, caps.n_alarm, caps.n_ext_ts, caps.n_per_out, caps.pps, caps.n_pins, - caps.cross_timestamping); + caps.cross_timestamping, + caps.adjust_phase); } } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json index f8ea6f5fa8e9..72cdc3c800a5 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json @@ -1472,6 +1472,31 @@ ] }, { + "id": "94bb", + "name": "Add pedit action with LAYERED_OP ip6 traffic_class", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 traffic_class set 0x40 continue", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "ipv6\\+0: val 04000000 mask f00fffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "6f5e", "name": "Add pedit action with LAYERED_OP ip6 flow_lbl", "category": [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index 8877f7b2b809..bb543bf69d69 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -32,7 +32,7 @@ "setup": [ "$TC qdisc add dev $DEV2 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 parent ffff: handle 0xffffffff flower action ok", + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok", "expExitCode": "0", "verifyCmd": "$TC filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff", @@ -77,9 +77,9 @@ }, "setup": [ "$TC qdisc add dev $DEV2 ingress", - "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop" + "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop" ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop", + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop", "expExitCode": "2", "verifyCmd": "$TC -s filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", @@ -87,5 +87,43 @@ "teardown": [ "$TC qdisc del dev $DEV2 ingress" ] + }, + { + "id": "7c65", + "name": "Add flower filter and then terse dump it", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", + "expExitCode": "0", + "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower.*handle", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] + }, + { + "id": "d45e", + "name": "Add flower filter and verify that terse dump doesn't output filter key", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", + "expExitCode": "0", + "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "matchPattern": " dst_mac e4:11:22:11:4a:51", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py index 6a2bd2cf528e..995f66ce43eb 100755 --- a/tools/testing/selftests/tc-testing/tdc_batch.py +++ b/tools/testing/selftests/tc-testing/tdc_batch.py @@ -72,21 +72,21 @@ mac_prefix = args.mac_prefix def format_add_filter(device, prio, handle, skip, src_mac, dst_mac, share_action): - return ("filter add dev {} {} protocol ip parent ffff: handle {} " + return ("filter add dev {} {} protocol ip ingress handle {} " " flower {} src_mac {} dst_mac {} action drop {}".format( device, prio, handle, skip, src_mac, dst_mac, share_action)) def format_rep_filter(device, prio, handle, skip, src_mac, dst_mac, share_action): - return ("filter replace dev {} {} protocol ip parent ffff: handle {} " + return ("filter replace dev {} {} protocol ip ingress handle {} " " flower {} src_mac {} dst_mac {} action drop {}".format( device, prio, handle, skip, src_mac, dst_mac, share_action)) def format_del_filter(device, prio, handle, skip, src_mac, dst_mac, share_action): - return ("filter del dev {} {} protocol ip parent ffff: handle {} " + return ("filter del dev {} {} protocol ip ingress handle {} " "flower".format(device, prio, handle)) |