summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/parisc/include/uapi/asm/mman.h1
-rw-r--r--tools/arch/x86/include/asm/mcsafe_test.h13
-rw-r--r--tools/arch/x86/include/asm/orc_types.h34
-rw-r--r--tools/arch/x86/lib/memcpy_64.S115
-rw-r--r--tools/arch/x86/tools/gen-insn-attr-x86.awk50
-rw-r--r--tools/bootconfig/main.c147
-rwxr-xr-xtools/bootconfig/scripts/bconf2ftrace.sh199
-rw-r--r--tools/bootconfig/scripts/ftrace.sh109
-rwxr-xr-xtools/bootconfig/scripts/ftrace2bconf.sh244
-rw-r--r--tools/bootconfig/scripts/xbc.sh56
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile15
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst37
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst33
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst33
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst37
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst27
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst36
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst48
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst34
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst34
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst34
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst35
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst34
-rw-r--r--tools/bpf/bpftool/Documentation/common_options.rst22
-rw-r--r--tools/bpf/bpftool/Makefile8
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool25
-rw-r--r--tools/bpf/bpftool/gen.c2
-rw-r--r--tools/bpf/bpftool/json_writer.c6
-rw-r--r--tools/bpf/bpftool/json_writer.h3
-rw-r--r--tools/bpf/bpftool/link.c44
-rw-r--r--tools/bpf/bpftool/main.c33
-rw-r--r--tools/bpf/bpftool/map.c152
-rw-r--r--tools/bpf/bpftool/net.c299
-rw-r--r--tools/bpf/bpftool/prog.c203
-rw-r--r--tools/bpf/resolve_btfids/Makefile2
-rw-r--r--tools/bpf/resolve_btfids/main.c29
-rw-r--r--tools/build/Makefile2
-rw-r--r--tools/build/Makefile.feature1
-rw-r--r--tools/build/feature/Makefile4
-rw-r--r--tools/build/feature/test-all.c4
-rw-r--r--tools/build/feature/test-libelf-mmap.c9
-rw-r--r--tools/cgroup/iocost_monitor.py54
-rw-r--r--tools/gpio/gpio-event-mon.c146
-rw-r--r--tools/gpio/gpio-hammer.c56
-rw-r--r--tools/gpio/gpio-utils.c176
-rw-r--r--tools/gpio/gpio-utils.h48
-rw-r--r--tools/gpio/gpio-watch.c16
-rw-r--r--tools/gpio/lsgpio.c60
-rw-r--r--tools/iio/iio_event_monitor.c2
-rw-r--r--tools/include/linux/btf_ids.h59
-rw-r--r--tools/include/linux/objtool.h129
-rw-r--r--tools/include/linux/static_call_types.h35
-rw-r--r--tools/include/uapi/asm-generic/unistd.h14
-rw-r--r--tools/include/uapi/linux/bpf.h655
-rw-r--r--tools/lib/bpf/Makefile28
-rw-r--r--tools/lib/bpf/bpf.c70
-rw-r--r--tools/lib/bpf/bpf.h39
-rw-r--r--tools/lib/bpf/bpf_core_read.h120
-rw-r--r--tools/lib/bpf/bpf_helpers.h49
-rw-r--r--tools/lib/bpf/bpf_prog_linfo.c3
-rw-r--r--tools/lib/bpf/bpf_tracing.h4
-rw-r--r--tools/lib/bpf/btf.c1625
-rw-r--r--tools/lib/bpf/btf.h103
-rw-r--r--tools/lib/bpf/btf_dump.c87
-rw-r--r--tools/lib/bpf/hashmap.c3
-rw-r--r--tools/lib/bpf/hashmap.h12
-rw-r--r--tools/lib/bpf/libbpf.c3409
-rw-r--r--tools/lib/bpf/libbpf.h12
-rw-r--r--tools/lib/bpf/libbpf.map38
-rw-r--r--tools/lib/bpf/libbpf_common.h2
-rw-r--r--tools/lib/bpf/libbpf_internal.h147
-rw-r--r--tools/lib/bpf/libbpf_probes.c8
-rw-r--r--tools/lib/bpf/netlink.c128
-rw-r--r--tools/lib/bpf/nlattr.c9
-rw-r--r--tools/lib/bpf/ringbuf.c8
-rw-r--r--tools/lib/bpf/xsk.c383
-rw-r--r--tools/lib/bpf/xsk.h9
-rw-r--r--tools/lib/subcmd/help.c10
-rw-r--r--tools/memory-model/Documentation/cheatsheet.txt33
-rw-r--r--tools/memory-model/Documentation/litmus-tests.txt1074
-rw-r--r--tools/memory-model/Documentation/recipes.txt4
-rw-r--r--tools/memory-model/Documentation/references.txt2
-rw-r--r--tools/memory-model/Documentation/simple.txt271
-rw-r--r--tools/memory-model/README160
-rw-r--r--tools/objtool/Makefile6
-rw-r--r--tools/objtool/arch.h4
-rw-r--r--tools/objtool/arch/x86/Build1
-rw-r--r--tools/objtool/arch/x86/decode.c37
-rw-r--r--tools/objtool/arch/x86/include/arch_special.h20
-rw-r--r--tools/objtool/arch/x86/special.c145
-rw-r--r--tools/objtool/builtin-check.c15
-rw-r--r--tools/objtool/builtin-orc.c27
-rw-r--r--tools/objtool/check.c422
-rw-r--r--tools/objtool/check.h10
-rw-r--r--tools/objtool/elf.c8
-rw-r--r--tools/objtool/elf.h3
-rw-r--r--tools/objtool/objtool.c30
-rw-r--r--tools/objtool/objtool.h7
-rw-r--r--tools/objtool/orc_dump.c9
-rw-r--r--tools/objtool/orc_gen.c12
-rw-r--r--tools/objtool/special.c48
-rw-r--r--tools/objtool/special.h10
-rwxr-xr-xtools/objtool/sync-check.sh33
-rw-r--r--tools/objtool/weak.c6
-rw-r--r--tools/perf/Makefile.config6
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl12
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl12
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl10
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-lib.c24
-rw-r--r--tools/perf/util/bpf-loader.c12
-rw-r--r--tools/perf/util/symbol.h2
-rw-r--r--tools/power/acpi/Makefile2
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c2
-rw-r--r--tools/power/cpupower/utils/cpufreq-set.c14
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c23
-rw-r--r--tools/power/x86/intel-speed-select/isst-core.c8
-rw-r--r--tools/power/x86/intel-speed-select/isst.h2
-rw-r--r--tools/testing/kunit/configs/broken_on_uml.config1
-rwxr-xr-xtools/testing/kunit/kunit.py58
-rw-r--r--tools/testing/kunit/kunit_json.py63
-rw-r--r--tools/testing/kunit/kunit_kernel.py27
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py33
-rw-r--r--tools/testing/nvdimm/dax-dev.c22
-rw-r--r--tools/testing/nvdimm/test/iomap.c2
-rw-r--r--tools/testing/nvdimm/test/nfit.c49
-rw-r--r--tools/testing/selftests/Makefile34
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore5
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile17
-rw-r--r--tools/testing/selftests/arm64/fp/README100
-rw-r--r--tools/testing/selftests/arm64/fp/asm-offsets.h11
-rw-r--r--tools/testing/selftests/arm64/fp/assembler.h57
-rwxr-xr-xtools/testing/selftests/arm64/fp/fpsimd-stress60
-rw-r--r--tools/testing/selftests/arm64/fp/fpsimd-test.S482
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c58
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace-asm.S33
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c336
-rwxr-xr-xtools/testing/selftests/arm64/fp/sve-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S672
-rw-r--r--tools/testing/selftests/arm64/fp/vlset.c155
-rw-r--r--tools/testing/selftests/arm64/mte/.gitignore6
-rw-r--r--tools/testing/selftests/arm64/mte/Makefile29
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c475
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c195
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c159
-rw-r--r--tools/testing/selftests/arm64/mte/check_mmap_options.c262
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c185
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c111
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c341
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.h118
-rw-r--r--tools/testing/selftests/arm64/mte/mte_def.h60
-rw-r--r--tools/testing/selftests/arm64/mte/mte_helper.S128
-rw-r--r--tools/testing/selftests/arm64/pauth/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/pauth/Makefile39
-rw-r--r--tools/testing/selftests/arm64/pauth/exec_target.c34
-rw-r--r--tools/testing/selftests/arm64/pauth/helper.c39
-rw-r--r--tools/testing/selftests/arm64/pauth/helper.h28
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c368
-rw-r--r--tools/testing/selftests/arm64/pauth/pac_corruptor.S19
-rw-r--r--tools/testing/selftests/bpf/.gitignore2
-rw-r--r--tools/testing/selftests/bpf/Makefile14
-rw-r--r--tools/testing/selftests/bpf/README.rst59
-rw-r--r--tools/testing/selftests/bpf/bench.c5
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c17
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c17
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h13
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.h8
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c37
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c115
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c (renamed from tools/testing/selftests/bpf/test_btf.c)410
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c105
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_endian.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c234
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c244
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_autosize.c225
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c350
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c293
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data_init.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c88
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/metadata.c141
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c66
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c96
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf_btf.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c382
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c189
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subprogs.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c332
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c610
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpffs.c94
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_local_storage.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_profiler.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_ext.c111
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_noinline.c51
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c12
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h32
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c59
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c50
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c10
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c4
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf_ptr.h27
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c19
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h352
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c27
-rw-r--r--tools/testing/selftests/bpf/progs/fmod_ret_freplace.c14
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_attach_probe.c40
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_cls_redirect.c34
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c19
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_get_constant.c15
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c140
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c64
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c16
-rw-r--r--tools/testing/selftests/bpf/progs/metadata_unused.c15
-rw-r--r--tools/testing/selftests/bpf/progs/metadata_used.c15
-rw-r--r--tools/testing/selftests/bpf/progs/netif_receive_skb.c249
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.h177
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h969
-rw-r--r--tools/testing/selftests/bpf/progs/profiler1.c6
-rw-r--r--tools/testing/selftests/bpf/progs/profiler2.c6
-rw-r--r--tools/testing/selftests/bpf/progs/profiler3.c6
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h11
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf_subprogs.c5
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h30
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_subprogs.c10
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall1.c28
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall2.c14
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c4
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c38
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c41
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c61
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_map_in_map.c74
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c174
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c105
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_autosize.c172
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c72
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c110
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c115
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf.c55
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c325
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_access.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c216
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c (renamed from tools/testing/selftests/bpf/progs/test_sock_fields_kern.c)176
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h34
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_update.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs.c103
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh.c148
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_peer.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c626
-rw-r--r--tools/testing/selftests/bpf/progs/test_trace_ext.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_vmlinux.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c36
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c7
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh21
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_metadata.sh82
-rw-r--r--tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c1
-rw-r--r--tools/testing/selftests/bpf/test_progs.h63
-rw-r--r--tools/testing/selftests/bpf/test_sock_fields.c482
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c2
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c81
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_redirect.sh204
-rw-r--r--tools/testing/selftests/bpf/test_tcp_hdr_options.h152
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c19
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/verifier/and.c16
-rw-r--r--tools/testing/selftests/bpf/verifier/basic.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c146
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/d_path.c37
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_packet_access.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/ld_imm64.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c32
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c47
-rw-r--r--tools/testing/selftests/bpf/verifier/regalloc.c269
-rw-r--r--tools/testing/selftests/clone3/clone3.c45
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c4
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h24
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh33
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh379
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh403
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_ets.sh6
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh1
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh21
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh108
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh167
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh316
-rw-r--r--tools/testing/selftests/exec/.gitignore1
-rw-r--r--tools/testing/selftests/exec/Makefile9
-rw-r--r--tools/testing/selftests/exec/load_address.c68
-rw-r--r--tools/testing/selftests/firmware/.gitignore2
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh91
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc14
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc21
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/profile.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc8
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc31
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc19
-rw-r--r--tools/testing/selftests/kselftest_harness.h5
-rw-r--r--tools/testing/selftests/lib.mk9
-rw-r--r--tools/testing/selftests/livepatch/functions.sh2
-rwxr-xr-xtools/testing/selftests/lkdtm/run.sh2
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile3
-rw-r--r--tools/testing/selftests/net/config6
-rwxr-xr-xtools/testing/selftests/net/drop_monitor_tests.sh215
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh44
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh70
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh43
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh2
-rw-r--r--tools/testing/selftests/net/ipsec.c2195
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile3
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c22
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh21
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh193
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh293
-rw-r--r--tools/testing/selftests/net/nettest.c2
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh16
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c42
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh626
-rw-r--r--tools/testing/selftests/netfilter/.gitignore2
-rw-r--r--tools/testing/selftests/netfilter/nf-queue.c61
-rwxr-xr-xtools/testing/selftests/netfilter/nft_meta.sh32
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh70
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h4
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c304
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c12
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c6
-rw-r--r--tools/testing/selftests/powerpc/copyloops/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile6
l---------tools/testing/selftests/powerpc/copyloops/copy_mc_64.S1
l---------tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S1
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_default_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_user_test.c2
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-basic.sh9
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h2
-rw-r--r--tools/testing/selftests/powerpc/mm/bad_accesses.c1
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_stcx_fail.c1
-rw-r--r--tools/testing/selftests/powerpc/pmu/l3_bank_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/per_event_excludes.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c48
-rw-r--r--tools/testing/selftests/powerpc/security/rfi_flush.c38
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c3
-rw-r--r--tools/testing/selftests/powerpc/stringloops/memcmp.c2
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S23
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/syscalls/rtas_filter.c285
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c11
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c10
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c10
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c9
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h3
-rw-r--r--tools/testing/selftests/powerpc/utils.c39
-rw-r--r--tools/testing/selftests/ptrace/.gitignore1
-rw-r--r--tools/testing/selftests/rseq/param_test.c223
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h57
-rwxr-xr-xtools/testing/selftests/rseq/run_param_test.sh2
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh93
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c440
-rw-r--r--tools/testing/selftests/vm/Makefile17
-rw-r--r--tools/testing/selftests/vm/compaction_test.c11
-rw-r--r--tools/testing/selftests/vm/config1
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c32
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c4
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c296
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c68
-rw-r--r--tools/vm/page-types.c2
437 files changed, 32730 insertions, 5266 deletions
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
index f9fd1325f5bd..506c06a6536f 100644
--- a/tools/arch/parisc/include/uapi/asm/mman.h
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -39,6 +39,5 @@
#define MADV_SOFT_OFFLINE 101
/* MAP_32BIT is undefined on parisc, fix it for perf */
#define MAP_32BIT 0
-/* MAP_UNINITIALIZED is undefined on parisc, fix it for perf */
#define MAP_UNINITIALIZED 0
#endif
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index 2ccd588fbad4..000000000000
--- a/tools/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index d25534940bde..fdbffec4cfde 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -39,27 +39,6 @@
#define ORC_REG_SP_INDIRECT 9
#define ORC_REG_MAX 15
-/*
- * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
- * caller's SP right before it made the call). Used for all callable
- * functions, i.e. all C code and all callable asm functions.
- *
- * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
- * to a fully populated pt_regs from a syscall, interrupt, or exception.
- *
- * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
- * points to the iret return frame.
- *
- * The UNWIND_HINT macros are used only for the unwind_hint struct. They
- * aren't used in struct orc_entry due to size and complexity constraints.
- * Objtool converts them to real types when it converts the hints to orc
- * entries.
- */
-#define ORC_TYPE_CALL 0
-#define ORC_TYPE_REGS 1
-#define ORC_TYPE_REGS_IRET 2
-#define UNWIND_HINT_TYPE_RET_OFFSET 3
-
#ifndef __ASSEMBLY__
/*
* This struct is more or less a vastly simplified version of the DWARF Call
@@ -78,19 +57,6 @@ struct orc_entry {
unsigned end:1;
} __packed;
-/*
- * This struct is used by asm and inline asm code to manually annotate the
- * location of registers on the stack for the ORC unwinder.
- *
- * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
- */
-struct unwind_hint {
- u32 ip;
- s16 sp_offset;
- u8 sp_reg;
- u8 type;
- u8 end;
-};
#endif /* __ASSEMBLY__ */
#endif /* _ORC_TYPES_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 45f8e1b02241..0b5b8ae56bd9 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
SYM_FUNC_END(memcpy_orig)
.popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
- cmpl $8, %edx
- /* Less than 8 bytes? Go to byte copy loop */
- jb .L_no_whole_words
-
- /* Check for bad alignment of source */
- testl $7, %esi
- /* Already aligned */
- jz .L_8byte_aligned
-
- /* Copy one byte at a time until source is 8-byte aligned */
- movl %esi, %ecx
- andl $7, %ecx
- subl $8, %ecx
- negl %ecx
- subl %ecx, %edx
-.L_read_leading_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
- movl %edx, %ecx
- andl $7, %edx
- shrl $3, %ecx
- jz .L_no_whole_words
-
-.L_read_words:
- movq (%rsi), %r8
- MCSAFE_TEST_SRC %rsi 8 .E_read_words
- MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
- movq %r8, (%rdi)
- addq $8, %rsi
- addq $8, %rdi
- decl %ecx
- jnz .L_read_words
-
- /* Any trailing bytes? */
-.L_no_whole_words:
- andl %edx, %edx
- jz .L_done_memcpy_trap
-
- /* Copy trailing bytes */
- movl %edx, %ecx
-.L_read_trailing_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_trailing_bytes
-
- /* Copy successful. Return zero */
-.L_done_memcpy_trap:
- xorl %eax, %eax
-.L_done:
- ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
- .section .fixup, "ax"
- /*
- * Return number of bytes not copied for any failure. Note that
- * there is no "tail" handling since the source buffer is 8-byte
- * aligned and poison is cacheline aligned.
- */
-.E_read_words:
- shll $3, %ecx
-.E_leading_bytes:
- addl %edx, %ecx
-.E_trailing_bytes:
- mov %ecx, %eax
- jmp .L_done
-
- /*
- * For write fault handling, given the destination is unaligned,
- * we handle faults on multi-byte writes with a byte-by-byte
- * copy up to the write-protected page.
- */
-.E_write_words:
- shll $3, %ecx
- addl %edx, %ecx
- movl %ecx, %edx
- jmp mcsafe_handle_tail
-
- .previous
-
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE(.L_write_words, .E_write_words)
- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index a42015b305f4..af38469afd14 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -362,6 +362,9 @@ function convert_operands(count,opnd, i,j,imm,mod)
END {
if (awkchecked != "")
exit 1
+
+ print "#ifndef __BOOT_COMPRESSED\n"
+
# print escape opcode map's array
print "/* Escape opcode map array */"
print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
@@ -388,6 +391,51 @@ END {
for (j = 0; j < max_lprefix; j++)
if (atable[i,j])
print " ["i"]["j"] = "atable[i,j]","
- print "};"
+ print "};\n"
+
+ print "#else /* !__BOOT_COMPRESSED */\n"
+
+ print "/* Escape opcode map array */"
+ print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "/* Group opcode map array */"
+ print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "/* AVX opcode map array */"
+ print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "static void inat_init_tables(void)"
+ print "{"
+
+ # print escape opcode map's array
+ print "\t/* Print Escape opcode map array */"
+ for (i = 0; i < geid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (etable[i,j])
+ print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";"
+ print ""
+
+ # print group opcode map's array
+ print "\t/* Print Group opcode map array */"
+ for (i = 0; i < ggid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (gtable[i,j])
+ print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";"
+ print ""
+ # print AVX opcode map's array
+ print "\t/* Print AVX opcode map array */"
+ for (i = 0; i < gaid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (atable[i,j])
+ print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
+
+ print "}"
+ print "#endif"
}
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index e0878f5f74b1..eb92027817a7 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -14,18 +14,19 @@
#include <linux/kernel.h>
#include <linux/bootconfig.h>
-static int xbc_show_value(struct xbc_node *node)
+static int xbc_show_value(struct xbc_node *node, bool semicolon)
{
- const char *val;
+ const char *val, *eol;
char q;
int i = 0;
+ eol = semicolon ? ";\n" : "\n";
xbc_array_for_each_value(node, val) {
if (strchr(val, '"'))
q = '\'';
else
q = '"';
- printf("%c%s%c%s", q, val, q, node->next ? ", " : ";\n");
+ printf("%c%s%c%s", q, val, q, node->next ? ", " : eol);
i++;
}
return i;
@@ -53,7 +54,7 @@ static void xbc_show_compact_tree(void)
continue;
} else if (cnode && xbc_node_is_value(cnode)) {
printf("%s = ", xbc_node_get_data(node));
- xbc_show_value(cnode);
+ xbc_show_value(cnode, true);
} else {
printf("%s;\n", xbc_node_get_data(node));
}
@@ -77,8 +78,28 @@ static void xbc_show_compact_tree(void)
}
}
+static void xbc_show_list(void)
+{
+ char key[XBC_KEYLEN_MAX];
+ struct xbc_node *leaf;
+ const char *val;
+ int ret = 0;
+
+ xbc_for_each_key_value(leaf, val) {
+ ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX);
+ if (ret < 0)
+ break;
+ printf("%s = ", key);
+ if (!val || val[0] == '\0') {
+ printf("\"\"\n");
+ continue;
+ }
+ xbc_show_value(xbc_node_get_child(leaf), false);
+ }
+}
+
/* Simple real checksum */
-int checksum(unsigned char *buf, int len)
+static int checksum(unsigned char *buf, int len)
{
int i, sum = 0;
@@ -90,7 +111,7 @@ int checksum(unsigned char *buf, int len)
#define PAGE_SIZE 4096
-int load_xbc_fd(int fd, char **buf, int size)
+static int load_xbc_fd(int fd, char **buf, int size)
{
int ret;
@@ -107,7 +128,7 @@ int load_xbc_fd(int fd, char **buf, int size)
}
/* Return the read size or -errno */
-int load_xbc_file(const char *path, char **buf)
+static int load_xbc_file(const char *path, char **buf)
{
struct stat stat;
int fd, ret;
@@ -126,7 +147,7 @@ int load_xbc_file(const char *path, char **buf)
return ret;
}
-int load_xbc_from_initrd(int fd, char **buf)
+static int load_xbc_from_initrd(int fd, char **buf)
{
struct stat stat;
int ret;
@@ -195,10 +216,55 @@ int load_xbc_from_initrd(int fd, char **buf)
return size;
}
-int show_xbc(const char *path)
+static void show_xbc_error(const char *data, const char *msg, int pos)
+{
+ int lin = 1, col, i;
+
+ if (pos < 0) {
+ pr_err("Error: %s.\n", msg);
+ return;
+ }
+
+ /* Note that pos starts from 0 but lin and col should start from 1. */
+ col = pos + 1;
+ for (i = 0; i < pos; i++) {
+ if (data[i] == '\n') {
+ lin++;
+ col = pos - i;
+ }
+ }
+ pr_err("Parse Error: %s at %d:%d\n", msg, lin, col);
+
+}
+
+static int init_xbc_with_error(char *buf, int len)
+{
+ char *copy = strdup(buf);
+ const char *msg;
+ int ret, pos;
+
+ if (!copy)
+ return -ENOMEM;
+
+ ret = xbc_init(buf, &msg, &pos);
+ if (ret < 0)
+ show_xbc_error(copy, msg, pos);
+ free(copy);
+
+ return ret;
+}
+
+static int show_xbc(const char *path, bool list)
{
int ret, fd;
char *buf = NULL;
+ struct stat st;
+
+ ret = stat(path, &st);
+ if (ret < 0) {
+ pr_err("Failed to stat %s: %d\n", path, -errno);
+ return -errno;
+ }
fd = open(path, O_RDONLY);
if (fd < 0) {
@@ -207,20 +273,33 @@ int show_xbc(const char *path)
}
ret = load_xbc_from_initrd(fd, &buf);
+ close(fd);
if (ret < 0) {
pr_err("Failed to load a boot config from initrd: %d\n", ret);
goto out;
}
- xbc_show_compact_tree();
+ /* Assume a bootconfig file if it is enough small */
+ if (ret == 0 && st.st_size <= XBC_DATA_MAX) {
+ ret = load_xbc_file(path, &buf);
+ if (ret < 0) {
+ pr_err("Failed to load a boot config: %d\n", ret);
+ goto out;
+ }
+ if (init_xbc_with_error(buf, ret) < 0)
+ goto out;
+ }
+ if (list)
+ xbc_show_list();
+ else
+ xbc_show_compact_tree();
ret = 0;
out:
- close(fd);
free(buf);
return ret;
}
-int delete_xbc(const char *path)
+static int delete_xbc(const char *path)
{
struct stat stat;
int ret = 0, fd, size;
@@ -251,28 +330,7 @@ int delete_xbc(const char *path)
return ret;
}
-static void show_xbc_error(const char *data, const char *msg, int pos)
-{
- int lin = 1, col, i;
-
- if (pos < 0) {
- pr_err("Error: %s.\n", msg);
- return;
- }
-
- /* Note that pos starts from 0 but lin and col should start from 1. */
- col = pos + 1;
- for (i = 0; i < pos; i++) {
- if (data[i] == '\n') {
- lin++;
- col = pos - i;
- }
- }
- pr_err("Parse Error: %s at %d:%d\n", msg, lin, col);
-
-}
-
-int apply_xbc(const char *path, const char *xbc_path)
+static int apply_xbc(const char *path, const char *xbc_path)
{
u32 size, csum;
char *buf, *data;
@@ -349,14 +407,16 @@ out:
return ret;
}
-int usage(void)
+static int usage(void)
{
printf("Usage: bootconfig [OPTIONS] <INITRD>\n"
+ "Or bootconfig <CONFIG>\n"
" Apply, delete or show boot config to initrd.\n"
" Options:\n"
" -a <config>: Apply boot config to initrd\n"
- " -d : Delete boot config file from initrd\n\n"
- " If no option is given, show current applied boot config.\n");
+ " -d : Delete boot config file from initrd\n"
+ " -l : list boot config in initrd or file\n\n"
+ " If no option is given, show the bootconfig in the given file.\n");
return -1;
}
@@ -364,10 +424,10 @@ int main(int argc, char **argv)
{
char *path = NULL;
char *apply = NULL;
- bool delete = false;
+ bool delete = false, list = false;
int opt;
- while ((opt = getopt(argc, argv, "hda:")) != -1) {
+ while ((opt = getopt(argc, argv, "hda:l")) != -1) {
switch (opt) {
case 'd':
delete = true;
@@ -375,14 +435,17 @@ int main(int argc, char **argv)
case 'a':
apply = optarg;
break;
+ case 'l':
+ list = true;
+ break;
case 'h':
default:
return usage();
}
}
- if (apply && delete) {
- pr_err("Error: You can not specify both -a and -d at once.\n");
+ if ((apply && delete) || (delete && list) || (apply && list)) {
+ pr_err("Error: You can give one of -a, -d or -l at once.\n");
return usage();
}
@@ -398,5 +461,5 @@ int main(int argc, char **argv)
else if (delete)
return delete_xbc(path);
- return show_xbc(path);
+ return show_xbc(path, list);
}
diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh
new file mode 100755
index 000000000000..595e164dc352
--- /dev/null
+++ b/tools/bootconfig/scripts/bconf2ftrace.sh
@@ -0,0 +1,199 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+usage() {
+ echo "Ftrace boottime trace test tool"
+ echo "Usage: $0 [--apply|--init] [--debug] BOOTCONFIG-FILE"
+ echo " --apply: Test actual apply to tracefs (need sudo)"
+ echo " --init: Initialize ftrace before applying (imply --apply)"
+ exit 1
+}
+
+[ $# -eq 0 ] && usage
+
+BCONF=
+DEBUG=
+APPLY=
+INIT=
+while [ x"$1" != x ]; do
+ case "$1" in
+ "--debug")
+ DEBUG=$1;;
+ "--apply")
+ APPLY=$1;;
+ "--init")
+ APPLY=$1
+ INIT=$1;;
+ *)
+ [ ! -f $1 ] && usage
+ BCONF=$1;;
+ esac
+ shift 1
+done
+
+if [ x"$APPLY" != x ]; then
+ if [ `id -u` -ne 0 ]; then
+ echo "This must be run by root user. Try sudo." 1>&2
+ exec sudo $0 $DEBUG $APPLY $BCONF
+ fi
+fi
+
+run_cmd() { # command
+ echo "$*"
+ if [ x"$APPLY" != x ]; then # apply command
+ eval $*
+ fi
+}
+
+if [ x"$DEBUG" != x ]; then
+ set -x
+fi
+
+TRACEFS=`grep -m 1 -w tracefs /proc/mounts | cut -f 2 -d " "`
+if [ -z "$TRACEFS" ]; then
+ if ! grep -wq debugfs /proc/mounts; then
+ echo "Error: No tracefs/debugfs was mounted." 1>&2
+ exit 1
+ fi
+ TRACEFS=`grep -m 1 -w debugfs /proc/mounts | cut -f 2 -d " "`/tracing
+ if [ ! -d $TRACEFS ]; then
+ echo "Error: ftrace is not enabled on this kernel." 1>&2
+ exit 1
+ fi
+fi
+
+if [ x"$INIT" != x ]; then
+ . `dirname $0`/ftrace.sh
+ (cd $TRACEFS; initialize_ftrace)
+fi
+
+. `dirname $0`/xbc.sh
+
+######## main #########
+set -e
+
+xbc_init $BCONF
+
+set_value_of() { # key file
+ if xbc_has_key $1; then
+ val=`xbc_get_val $1 1`
+ run_cmd "echo '$val' >> $2"
+ fi
+}
+
+set_array_of() { # key file
+ if xbc_has_key $1; then
+ xbc_get_val $1 | while read line; do
+ run_cmd "echo '$line' >> $2"
+ done
+ fi
+}
+
+compose_synth() { # event_name branch
+ echo -n "$1 "
+ xbc_get_val $2 | while read field; do echo -n "$field; "; done
+}
+
+setup_event() { # prefix group event [instance]
+ branch=$1.$2.$3
+ if [ "$4" ]; then
+ eventdir="$TRACEFS/instances/$4/events/$2/$3"
+ else
+ eventdir="$TRACEFS/events/$2/$3"
+ fi
+ case $2 in
+ kprobes)
+ xbc_get_val ${branch}.probes | while read line; do
+ run_cmd "echo 'p:kprobes/$3 $line' >> $TRACEFS/kprobe_events"
+ done
+ ;;
+ synthetic)
+ run_cmd "echo '`compose_synth $3 ${branch}.fields`' >> $TRACEFS/synthetic_events"
+ ;;
+ esac
+
+ set_value_of ${branch}.filter ${eventdir}/filter
+ set_array_of ${branch}.actions ${eventdir}/trigger
+
+ if xbc_has_key ${branch}.enable; then
+ run_cmd "echo 1 > ${eventdir}/enable"
+ fi
+}
+
+setup_events() { # prefix("ftrace" or "ftrace.instance.INSTANCE") [instance]
+ prefix="${1}.event"
+ if xbc_has_branch ${1}.event; then
+ for grpev in `xbc_subkeys ${1}.event 2`; do
+ setup_event $prefix ${grpev%.*} ${grpev#*.} $2
+ done
+ fi
+}
+
+size2kb() { # size[KB|MB]
+ case $1 in
+ *KB)
+ echo ${1%KB};;
+ *MB)
+ expr ${1%MB} \* 1024;;
+ *)
+ expr $1 / 1024 ;;
+ esac
+}
+
+setup_instance() { # [instance]
+ if [ "$1" ]; then
+ instance="ftrace.instance.${1}"
+ instancedir=$TRACEFS/instances/$1
+ else
+ instance="ftrace"
+ instancedir=$TRACEFS
+ fi
+
+ set_array_of ${instance}.options ${instancedir}/trace_options
+ set_value_of ${instance}.trace_clock ${instancedir}/trace_clock
+ set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask
+ set_value_of ${instance}.tracer ${instancedir}/current_tracer
+ set_array_of ${instance}.ftrace.filters \
+ ${instancedir}/set_ftrace_filter
+ set_array_of ${instance}.ftrace.notrace \
+ ${instancedir}/set_ftrace_notrace
+
+ if xbc_has_key ${instance}.alloc_snapshot; then
+ run_cmd "echo 1 > ${instancedir}/snapshot"
+ fi
+
+ if xbc_has_key ${instance}.buffer_size; then
+ size=`xbc_get_val ${instance}.buffer_size 1`
+ size=`eval size2kb $size`
+ run_cmd "echo $size >> ${instancedir}/buffer_size_kb"
+ fi
+
+ setup_events ${instance} $1
+ set_array_of ${instance}.events ${instancedir}/set_event
+}
+
+# ftrace global configs (kernel.*)
+if xbc_has_key "kernel.dump_on_oops"; then
+ dump_mode=`xbc_get_val "kernel.dump_on_oops" 1`
+ [ "$dump_mode" ] && dump_mode=`eval echo $dump_mode` || dump_mode=1
+ run_cmd "echo \"$dump_mode\" > /proc/sys/kernel/ftrace_dump_on_oops"
+fi
+
+set_value_of kernel.fgraph_max_depth $TRACEFS/max_graph_depth
+set_array_of kernel.fgraph_filters $TRACEFS/set_graph_function
+set_array_of kernel.fgraph_notraces $TRACEFS/set_graph_notrace
+
+# Per-instance/per-event configs
+if ! xbc_has_branch "ftrace" ; then
+ exit 0
+fi
+
+setup_instance # root instance
+
+if xbc_has_branch "ftrace.instance"; then
+ for i in `xbc_subkeys "ftrace.instance" 1`; do
+ run_cmd "mkdir -p $TRACEFS/instances/$i"
+ setup_instance $i
+ done
+fi
+
diff --git a/tools/bootconfig/scripts/ftrace.sh b/tools/bootconfig/scripts/ftrace.sh
new file mode 100644
index 000000000000..186eed923041
--- /dev/null
+++ b/tools/bootconfig/scripts/ftrace.sh
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+clear_trace() { # reset trace output
+ echo > trace
+}
+
+disable_tracing() { # stop trace recording
+ echo 0 > tracing_on
+}
+
+enable_tracing() { # start trace recording
+ echo 1 > tracing_on
+}
+
+reset_tracer() { # reset the current tracer
+ echo nop > current_tracer
+}
+
+reset_trigger_file() {
+ # remove action triggers first
+ grep -H ':on[^:]*(' $@ |
+ while read line; do
+ cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+ file=`echo $line | cut -f1 -d:`
+ echo "!$cmd" >> $file
+ done
+ grep -Hv ^# $@ |
+ while read line; do
+ cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+ file=`echo $line | cut -f1 -d:`
+ echo "!$cmd" > $file
+ done
+}
+
+reset_trigger() { # reset all current setting triggers
+ if [ -d events/synthetic ]; then
+ reset_trigger_file events/synthetic/*/trigger
+ fi
+ reset_trigger_file events/*/*/trigger
+}
+
+reset_events_filter() { # reset all current setting filters
+ grep -v ^none events/*/*/filter |
+ while read line; do
+ echo 0 > `echo $line | cut -f1 -d:`
+ done
+}
+
+reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
+ if [ ! -f set_ftrace_filter ]; then
+ return 0
+ fi
+ echo > set_ftrace_filter
+ grep -v '^#' set_ftrace_filter | while read t; do
+ tr=`echo $t | cut -d: -f2`
+ if [ "$tr" = "" ]; then
+ continue
+ fi
+ if ! grep -q "$t" set_ftrace_filter; then
+ continue;
+ fi
+ name=`echo $t | cut -d: -f1 | cut -d' ' -f1`
+ if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
+ tr=`echo $t | cut -d: -f2-4`
+ limit=`echo $t | cut -d: -f5`
+ else
+ tr=`echo $t | cut -d: -f2`
+ limit=`echo $t | cut -d: -f3`
+ fi
+ if [ "$limit" != "unlimited" ]; then
+ tr="$tr:$limit"
+ fi
+ echo "!$name:$tr" > set_ftrace_filter
+ done
+}
+
+disable_events() {
+ echo 0 > events/enable
+}
+
+clear_synthetic_events() { # reset all current synthetic events
+ grep -v ^# synthetic_events |
+ while read line; do
+ echo "!$line" >> synthetic_events
+ done
+}
+
+initialize_ftrace() { # Reset ftrace to initial-state
+# As the initial state, ftrace will be set to nop tracer,
+# no events, no triggers, no filters, no function filters,
+# no probes, and tracing on.
+ disable_tracing
+ reset_tracer
+ reset_trigger
+ reset_events_filter
+ reset_ftrace_filter
+ disable_events
+ [ -f set_event_pid ] && echo > set_event_pid
+ [ -f set_ftrace_pid ] && echo > set_ftrace_pid
+ [ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
+ [ -f set_graph_function ] && echo | tee set_graph_*
+ [ -f stack_trace_filter ] && echo > stack_trace_filter
+ [ -f kprobe_events ] && echo > kprobe_events
+ [ -f uprobe_events ] && echo > uprobe_events
+ [ -f synthetic_events ] && echo > synthetic_events
+ [ -f snapshot ] && echo 0 > snapshot
+ clear_trace
+ enable_tracing
+}
diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh
new file mode 100755
index 000000000000..6c0d4b61e0c2
--- /dev/null
+++ b/tools/bootconfig/scripts/ftrace2bconf.sh
@@ -0,0 +1,244 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+usage() {
+ echo "Dump boot-time tracing bootconfig from ftrace"
+ echo "Usage: $0 [--debug] [ > BOOTCONFIG-FILE]"
+ exit 1
+}
+
+DEBUG=
+while [ x"$1" != x ]; do
+ case "$1" in
+ "--debug")
+ DEBUG=$1;;
+ -*)
+ usage
+ ;;
+ esac
+ shift 1
+done
+
+if [ x"$DEBUG" != x ]; then
+ set -x
+fi
+
+TRACEFS=`grep -m 1 -w tracefs /proc/mounts | cut -f 2 -d " "`
+if [ -z "$TRACEFS" ]; then
+ if ! grep -wq debugfs /proc/mounts; then
+ echo "Error: No tracefs/debugfs was mounted."
+ exit 1
+ fi
+ TRACEFS=`grep -m 1 -w debugfs /proc/mounts | cut -f 2 -d " "`/tracing
+ if [ ! -d $TRACEFS ]; then
+ echo "Error: ftrace is not enabled on this kernel." 1>&2
+ exit 1
+ fi
+fi
+
+######## main #########
+
+set -e
+
+emit_kv() { # key =|+= value
+ echo "$@"
+}
+
+global_options() {
+ val=`cat $TRACEFS/max_graph_depth`
+ [ $val != 0 ] && emit_kv kernel.fgraph_max_depth = $val
+ if grep -qv "^#" $TRACEFS/set_graph_function $TRACEFS/set_graph_notrace ; then
+ cat 1>&2 << EOF
+# WARN: kernel.fgraph_filters and kernel.fgraph_notrace are not supported, since the wild card expression was expanded and lost from memory.
+EOF
+ fi
+}
+
+kprobe_event_options() {
+ cat $TRACEFS/kprobe_events | while read p args; do
+ case $p in
+ r*)
+ cat 1>&2 << EOF
+# WARN: A return probe found but it is not supported by bootconfig. Skip it.
+EOF
+ continue;;
+ esac
+ p=${p#*:}
+ event=${p#*/}
+ group=${p%/*}
+ if [ $group != "kprobes" ]; then
+ cat 1>&2 << EOF
+# WARN: kprobes group name $group is changed to "kprobes" for bootconfig.
+EOF
+ fi
+ emit_kv $PREFIX.event.kprobes.$event.probes += $args
+ done
+}
+
+synth_event_options() {
+ cat $TRACEFS/synthetic_events | while read event fields; do
+ emit_kv $PREFIX.event.synthetic.$event.fields = `echo $fields | sed "s/;/,/g"`
+ done
+}
+
+# Variables resolver
+DEFINED_VARS=
+UNRESOLVED_EVENTS=
+
+defined_vars() { # event-dir
+ grep "^hist" $1/trigger | grep -o ':[a-zA-Z0-9]*='
+}
+referred_vars() {
+ grep "^hist" $1/trigger | grep -o '$[a-zA-Z0-9]*'
+}
+
+per_event_options() { # event-dir
+ evdir=$1
+ # Check the special event which has no filter and no trigger
+ [ ! -f $evdir/filter ] && return
+
+ if grep -q "^hist:" $evdir/trigger; then
+ # hist action can refer the undefined variables
+ __vars=`defined_vars $evdir`
+ for v in `referred_vars $evdir`; do
+ if echo $DEFINED_VARS $__vars | grep -vqw ${v#$}; then
+ # $v is not defined yet, defer it
+ UNRESOLVED_EVENTS="$UNRESOLVED_EVENTS $evdir"
+ return;
+ fi
+ done
+ DEFINED_VARS="$DEFINED_VARS "`defined_vars $evdir`
+ fi
+ grep -v "^#" $evdir/trigger | while read action active; do
+ emit_kv $PREFIX.event.$group.$event.actions += \'$action\'
+ done
+
+ # enable is not checked; this is done by set_event in the instance.
+ val=`cat $evdir/filter`
+ if [ "$val" != "none" ]; then
+ emit_kv $PREFIX.event.$group.$event.filter = "$val"
+ fi
+}
+
+retry_unresolved() {
+ unresolved=$UNRESOLVED_EVENTS
+ UNRESOLVED_EVENTS=
+ for evdir in $unresolved; do
+ event=${evdir##*/}
+ group=${evdir%/*}; group=${group##*/}
+ per_event_options $evdir
+ done
+}
+
+event_options() {
+ # PREFIX and INSTANCE must be set
+ if [ $PREFIX = "ftrace" ]; then
+ # define the dynamic events
+ kprobe_event_options
+ synth_event_options
+ fi
+ for group in `ls $INSTANCE/events/` ; do
+ [ ! -d $INSTANCE/events/$group ] && continue
+ for event in `ls $INSTANCE/events/$group/` ;do
+ [ ! -d $INSTANCE/events/$group/$event ] && continue
+ per_event_options $INSTANCE/events/$group/$event
+ done
+ done
+ retry=0
+ while [ $retry -lt 3 ]; do
+ retry_unresolved
+ retry=$((retry + 1))
+ done
+ if [ "$UNRESOLVED_EVENTS" ]; then
+ cat 1>&2 << EOF
+! ERROR: hist triggers in $UNRESOLVED_EVENTS use some undefined variables.
+EOF
+ fi
+}
+
+is_default_trace_option() { # option
+grep -qw $1 << EOF
+print-parent
+nosym-offset
+nosym-addr
+noverbose
+noraw
+nohex
+nobin
+noblock
+trace_printk
+annotate
+nouserstacktrace
+nosym-userobj
+noprintk-msg-only
+context-info
+nolatency-format
+record-cmd
+norecord-tgid
+overwrite
+nodisable_on_free
+irq-info
+markers
+noevent-fork
+nopause-on-trace
+function-trace
+nofunction-fork
+nodisplay-graph
+nostacktrace
+notest_nop_accept
+notest_nop_refuse
+EOF
+}
+
+instance_options() { # [instance-name]
+ if [ $# -eq 0 ]; then
+ PREFIX="ftrace"
+ INSTANCE=$TRACEFS
+ else
+ PREFIX="ftrace.instance.$1"
+ INSTANCE=$TRACEFS/instances/$1
+ fi
+ val=
+ for i in `cat $INSTANCE/trace_options`; do
+ is_default_trace_option $i && continue
+ val="$val, $i"
+ done
+ [ "$val" ] && emit_kv $PREFIX.options = "${val#,}"
+ val="local"
+ for i in `cat $INSTANCE/trace_clock` ; do
+ [ "${i#*]}" ] && continue
+ i=${i%]}; val=${i#[}
+ done
+ [ $val != "local" ] && emit_kv $PREFIX.trace_clock = $val
+ val=`cat $INSTANCE/buffer_size_kb`
+ if echo $val | grep -vq "expanded" ; then
+ emit_kv $PREFIX.buffer_size = $val"KB"
+ fi
+ if grep -q "is allocated" $INSTANCE/snapshot ; then
+ emit_kv $PREFIX.alloc_snapshot
+ fi
+ val=`cat $INSTANCE/tracing_cpumask`
+ if [ `echo $val | sed -e s/f//g`x != x ]; then
+ emit_kv $PREFIX.cpumask = $val
+ fi
+
+ val=
+ for i in `cat $INSTANCE/set_event`; do
+ val="$val, $i"
+ done
+ [ "$val" ] && emit_kv $PREFIX.events = "${val#,}"
+ val=`cat $INSTANCE/current_tracer`
+ [ $val != nop ] && emit_kv $PREFIX.tracer = $val
+ if grep -qv "^#" $INSTANCE/set_ftrace_filter $INSTANCE/set_ftrace_notrace; then
+ cat 1>&2 << EOF
+# WARN: kernel.ftrace.filters and kernel.ftrace.notrace are not supported, since the wild card expression was expanded and lost from memory.
+EOF
+ fi
+ event_options
+}
+
+global_options
+instance_options
+for i in `ls $TRACEFS/instances` ; do
+ instance_options $i
+done
diff --git a/tools/bootconfig/scripts/xbc.sh b/tools/bootconfig/scripts/xbc.sh
new file mode 100644
index 000000000000..b8c84e654556
--- /dev/null
+++ b/tools/bootconfig/scripts/xbc.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+# bootconfig utility functions
+
+XBC_TMPFILE=
+XBC_BASEDIR=`dirname $0`
+BOOTCONFIG=${BOOTCONFIG:=$XBC_BASEDIR/../bootconfig}
+if [ ! -x "$BOOTCONFIG" ]; then
+ BOOTCONFIG=`which bootconfig`
+ if [ -z "$BOOTCONFIG" ]; then
+ echo "Erorr: bootconfig command is not found" 1>&2
+ exit 1
+ fi
+fi
+
+xbc_cleanup() {
+ if [ "$XBC_TMPFILE" ]; then
+ rm -f "$XBC_TMPFILE"
+ fi
+}
+
+xbc_init() { # bootconfig-file
+ xbc_cleanup
+ XBC_TMPFILE=`mktemp bconf-XXXX`
+ trap xbc_cleanup EXIT TERM
+
+ $BOOTCONFIG -l $1 > $XBC_TMPFILE || exit 1
+}
+
+nr_args() { # args
+ echo $#
+}
+
+xbc_get_val() { # key [maxnum]
+ if [ "$2" ]; then
+ MAXOPT="-L $2"
+ fi
+ grep "^$1 =" $XBC_TMPFILE | cut -d= -f2- | \
+ sed -e 's/", /" /g' -e "s/',/' /g" | \
+ xargs $MAXOPT -n 1 echo
+}
+
+xbc_has_key() { # key
+ grep -q "^$1 =" $XBC_TMPFILE
+}
+
+xbc_has_branch() { # prefix-key
+ grep -q "^$1" $XBC_TMPFILE
+}
+
+xbc_subkeys() { # prefix-key depth
+ __keys=`echo $1 | sed "s/\./ /g"`
+ __s=`nr_args $__keys`
+ grep "^$1" $XBC_TMPFILE | cut -d= -f1| cut -d. -f$((__s + 1))-$((__s + $2)) | uniq
+}
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index 815ac9804aee..f33cb02de95c 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -19,7 +19,7 @@ man8dir = $(mandir)/man8
# Load targets for building eBPF helpers man page.
include ../../Makefile.helpers
-MAN8_RST = $(filter-out $(HELPERS_RST),$(wildcard *.rst))
+MAN8_RST = $(wildcard bpftool*.rst)
_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
@@ -28,12 +28,23 @@ man: man8 helpers
man8: $(DOC_MAN8)
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+RST2MAN_OPTS += --verbose
+
+list_pages = $(sort $(basename $(filter-out $(1),$(MAN8_RST))))
+see_also = $(subst " ",, \
+ "\n" \
+ "SEE ALSO\n" \
+ "========\n" \
+ "\t**bpf**\ (2),\n" \
+ "\t**bpf-helpers**\\ (7)" \
+ $(foreach page,$(call list_pages,$(1)),",\n\t**$(page)**\\ (8)") \
+ "\n")
$(OUTPUT)%.8: %.rst
ifndef RST2MAN_DEP
$(error "rst2man not found, but required to generate man pages")
endif
- $(QUIET_GEN)rst2man $< > $@
+ $(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@
clean: helpers-clean
$(call QUIET_CLEAN, Documentation)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 896f4c6c2870..ff4d327a582e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -71,26 +71,12 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
**# bpftool btf dump id 1226**
+
::
[1] PTR '(anon)' type_id=2
@@ -104,6 +90,7 @@ EXAMPLES
This gives an example of default output for all supported BTF kinds.
**$ cat prog.c**
+
::
struct fwd_struct;
@@ -144,6 +131,7 @@ This gives an example of default output for all supported BTF kinds.
}
**$ bpftool btf dump file prog.o**
+
::
[1] PTR '(anon)' type_id=2
@@ -229,20 +217,3 @@ All the standard ways to specify map or program are supported:
**# bpftool btf dump prog tag b88e0a09b1d9759d**
**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index a226aee3574f..790944c35602 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -116,26 +116,11 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
Show file names of pinned programs.
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
-
EXAMPLES
========
|
@@ -158,19 +143,3 @@ EXAMPLES
::
ID AttachType AttachFlags Name
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 8609f06e71de..dd3771bdbc57 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -71,35 +71,4 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
+ .. include:: common_options.rst
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index df85dbd962c0..84cf0639696f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -126,26 +126,12 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON,
- this option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
**$ cat example.c**
+
::
#include <stdbool.h>
@@ -187,6 +173,7 @@ This is example BPF application with two BPF programs and a mix of BPF maps
and global variables.
**$ bpftool gen skeleton example.o**
+
::
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
@@ -241,6 +228,7 @@ and global variables.
#endif /* __EXAMPLE_SKEL_H__ */
**$ cat example_user.c**
+
::
#include "example.skel.h"
@@ -283,6 +271,7 @@ and global variables.
}
**# ./example_user**
+
::
my_map name: my_map
@@ -290,19 +279,3 @@ and global variables.
my_static_var: 7
This is a stripped-out version of skeleton generated for above example code.
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
index 070ffacb42b5..51f49bead619 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -51,16 +51,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -77,19 +68,3 @@ EXAMPLES
Create a file-based bpf iterator from bpf_iter_hashmap.o and map with
id 20, and pin it to /sys/fs/bpf/my_hashmap
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 4a52e7a93339..5f7db2a837cc 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -21,7 +21,7 @@ LINK COMMANDS
| **bpftool** **link { show | list }** [*LINK*]
| **bpftool** **link pin** *LINK* *FILE*
-| **bpftool** **link detach *LINK*
+| **bpftool** **link detach** *LINK*
| **bpftool** **link help**
|
| *LINK* := { **id** *LINK_ID* | **pinned** *FILE* }
@@ -62,18 +62,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
When showing BPF links, show file names of pinned
@@ -83,10 +72,6 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf.
-
EXAMPLES
========
**# bpftool link show**
@@ -121,20 +106,3 @@ EXAMPLES
::
-rw------- 1 root root 0 Apr 23 21:39 link
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 41e2a74252d0..dade10cdf295 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -23,7 +23,8 @@ MAP COMMANDS
| **bpftool** **map** { **show** | **list** } [*MAP*]
| **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
-| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \
+| [**dev** *NAME*]
| **bpftool** **map dump** *MAP*
| **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*]
| **bpftool** **map lookup** *MAP* [**key** *DATA*]
@@ -49,7 +50,7 @@ MAP COMMANDS
| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** }
+| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** }
DESCRIPTION
===========
@@ -67,7 +68,7 @@ DESCRIPTION
maps. On such kernels bpftool will automatically emit this
information as well.
- **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+ **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*]
Create a new map with given parameters and pin it to *bpffs*
as *FILE*.
@@ -75,6 +76,11 @@ DESCRIPTION
desired flags, e.g. 1024 for **BPF_F_MMAPABLE** (see bpf.h
UAPI header for existing flags).
+ To create maps of type array-of-maps or hash-of-maps, the
+ **inner_map** keyword must be used to pass an inner map. The
+ kernel needs it to collect metadata related to the inner maps
+ that the new map will work with.
+
Keyword **dev** expects a network interface name, and is used
to request hardware offload for the map.
@@ -155,18 +161,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
Show file names of pinned maps.
@@ -175,13 +170,10 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
-
EXAMPLES
========
**# bpftool map show**
+
::
10: hash name some_map flags 0x0
@@ -203,6 +195,7 @@ The following three commands are equivalent:
**# bpftool map dump id 10**
+
::
key: 00 01 02 03 value: 00 01 02 03 04 05 06 07
@@ -210,6 +203,7 @@ The following three commands are equivalent:
Found 2 elements
**# bpftool map getnext id 10 key 0 1 2 3**
+
::
key:
@@ -276,19 +270,3 @@ would be lost as soon as bpftool exits).
key: 00 00 00 00 value: 22 02 00 00
Found 1 element
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index aa7450736179..d8165d530937 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -75,22 +75,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -187,20 +172,3 @@ EXAMPLES
::
xdp:
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index 9c592b7c6775..e958ce91de72 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -40,22 +40,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -78,20 +63,3 @@ EXAMPLES
{"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
{"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
{"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 82e356b664e8..358c7309d419 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -210,18 +210,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
When showing BPF programs, show file names of pinned
@@ -234,11 +223,6 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
-
EXAMPLES
========
**# bpftool prog show**
@@ -342,19 +326,3 @@ EXAMPLES
40176203 cycles (83.05%)
42518139 instructions # 1.06 insns per cycle (83.39%)
123 llc_misses # 2.89 LLC misses per million insns (83.15%)
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
index d93cd1cb8b0f..506e70ee78e9 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -60,23 +60,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -98,20 +82,3 @@ EXAMPLES
::
Registered tcp_congestion_ops cubic id 110
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 420d4d5df8b6..e7d949334961 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -46,18 +46,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-m, --mapcompat
Allow loading maps with unknown map definitions.
@@ -65,24 +54,3 @@ OPTIONS
-n, --nomount
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
-
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst
new file mode 100644
index 000000000000..05d06c74dcaa
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/common_options.rst
@@ -0,0 +1,22 @@
+-h, --help
+ Print short help message (similar to **bpftool help**).
+
+-V, --version
+ Print version number (similar to **bpftool version**), and optional
+ features that were included when bpftool was compiled. Optional
+ features include linking against libbfd to provide the disassembler
+ for JIT-ted programs (**bpftool prog dump jited**) and usage of BPF
+ skeletons (some features like **bpftool prog profile** or showing
+ pids associated to BPF objects may rely on it).
+
+-j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+-p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+-d, --debug
+ Print all logs available, even debug-level information. This includes
+ logs from libbpf as well as from the verifier, when attempting to
+ load programs.
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 8462690a039b..f60e6ad3a1df 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -25,7 +25,7 @@ endif
LIBBPF = $(LIBBPF_PATH)libbpf.a
-BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
$(LIBBPF): FORCE
$(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))
@@ -176,7 +176,11 @@ $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
$(OUTPUT)%.o: %.c
$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
-clean: $(LIBBPF)-clean
+feature-detect-clean:
+ $(call QUIET_CLEAN, feature-detect)
+ $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
+
+clean: $(LIBBPF)-clean feature-detect-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index f53ed2f1a4aa..3f1da30c4da6 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -704,13 +704,31 @@ _bpftool()
lru_percpu_hash lpm_trie array_of_maps \
hash_of_maps devmap devmap_hash sockmap cpumap \
xskmap sockhash cgroup_storage reuseport_sockarray \
- percpu_cgroup_storage queue stack' -- \
+ percpu_cgroup_storage queue stack sk_storage \
+ struct_ops inode_storage' -- \
"$cur" ) )
return 0
;;
- key|value|flags|name|entries)
+ key|value|flags|entries)
return 0
;;
+ inner_map)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ id)
+ _bpftool_get_map_ids
+ ;;
+ name)
+ case $pprev in
+ inner_map)
+ _bpftool_get_map_names
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+ ;;
*)
_bpftool_once_attr 'type'
_bpftool_once_attr 'key'
@@ -718,6 +736,9 @@ _bpftool()
_bpftool_once_attr 'entries'
_bpftool_once_attr 'name'
_bpftool_once_attr 'flags'
+ if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then
+ _bpftool_once_attr 'inner_map'
+ fi
_bpftool_once_attr 'dev'
return 0
;;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index f61184653633..4033c46d83e7 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -19,11 +19,9 @@
#include <sys/mman.h>
#include <bpf/btf.h>
-#include "bpf/libbpf_internal.h"
#include "json_writer.h"
#include "main.h"
-
#define MAX_OBJ_NAME_LEN 64
static void sanitize_identifier(char *name)
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
index 86501cd3c763..7fea83bedf48 100644
--- a/tools/bpf/bpftool/json_writer.c
+++ b/tools/bpf/bpftool/json_writer.c
@@ -119,6 +119,12 @@ void jsonw_pretty(json_writer_t *self, bool on)
self->pretty = on;
}
+void jsonw_reset(json_writer_t *self)
+{
+ assert(self->depth == 0);
+ self->sep = '\0';
+}
+
/* Basic blocks */
static void jsonw_begin(json_writer_t *self, int c)
{
diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h
index 35cf1f00f96c..8ace65cdb92f 100644
--- a/tools/bpf/bpftool/json_writer.h
+++ b/tools/bpf/bpftool/json_writer.h
@@ -27,6 +27,9 @@ void jsonw_destroy(json_writer_t **self_p);
/* Cause output to have pretty whitespace */
void jsonw_pretty(json_writer_t *self, bool on);
+/* Reset separator to create new JSON */
+void jsonw_reset(json_writer_t *self);
+
/* Add property name */
void jsonw_name(json_writer_t *self, const char *name);
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index a89f09e3c848..e77e1525d20a 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -77,6 +77,22 @@ static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
jsonw_uint_field(wtr, "attach_type", attach_type);
}
+static bool is_iter_map_target(const char *target_name)
+{
+ return strcmp(target_name, "bpf_map_elem") == 0 ||
+ strcmp(target_name, "bpf_sk_storage_map") == 0;
+}
+
+static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ const char *target_name = u64_to_ptr(info->iter.target_name);
+
+ jsonw_string_field(wtr, "target_name", target_name);
+
+ if (is_iter_map_target(target_name))
+ jsonw_uint_field(wtr, "map_id", info->iter.map.map_id);
+}
+
static int get_prog_info(int prog_id, struct bpf_prog_info *info)
{
__u32 len = sizeof(*info);
@@ -128,6 +144,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
info->cgroup.cgroup_id);
show_link_attach_type_json(info->cgroup.attach_type, json_wtr);
break;
+ case BPF_LINK_TYPE_ITER:
+ show_iter_json(info, json_wtr);
+ break;
case BPF_LINK_TYPE_NETNS:
jsonw_uint_field(json_wtr, "netns_ino",
info->netns.netns_ino);
@@ -175,6 +194,16 @@ static void show_link_attach_type_plain(__u32 attach_type)
printf("attach_type %u ", attach_type);
}
+static void show_iter_plain(struct bpf_link_info *info)
+{
+ const char *target_name = u64_to_ptr(info->iter.target_name);
+
+ printf("target_name %s ", target_name);
+
+ if (is_iter_map_target(target_name))
+ printf("map_id %u ", info->iter.map.map_id);
+}
+
static int show_link_close_plain(int fd, struct bpf_link_info *info)
{
struct bpf_prog_info prog_info;
@@ -204,6 +233,9 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id);
show_link_attach_type_plain(info->cgroup.attach_type);
break;
+ case BPF_LINK_TYPE_ITER:
+ show_iter_plain(info);
+ break;
case BPF_LINK_TYPE_NETNS:
printf("\n\tnetns_ino %u ", info->netns.netns_ino);
show_link_attach_type_plain(info->netns.attach_type);
@@ -231,7 +263,7 @@ static int do_show_link(int fd)
{
struct bpf_link_info info;
__u32 len = sizeof(info);
- char raw_tp_name[256];
+ char buf[256];
int err;
memset(&info, 0, sizeof(info));
@@ -245,8 +277,14 @@ again:
}
if (info.type == BPF_LINK_TYPE_RAW_TRACEPOINT &&
!info.raw_tracepoint.tp_name) {
- info.raw_tracepoint.tp_name = (unsigned long)&raw_tp_name;
- info.raw_tracepoint.tp_name_len = sizeof(raw_tp_name);
+ info.raw_tracepoint.tp_name = (unsigned long)&buf;
+ info.raw_tracepoint.tp_name_len = sizeof(buf);
+ goto again;
+ }
+ if (info.type == BPF_LINK_TYPE_ITER &&
+ !info.iter.target_name) {
+ info.iter.target_name = (unsigned long)&buf;
+ info.iter.target_name_len = sizeof(buf);
goto again;
}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 4a191fcbeb82..682daaa49e6a 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -70,13 +70,42 @@ static int do_help(int argc, char **argv)
static int do_version(int argc, char **argv)
{
+#ifdef HAVE_LIBBFD_SUPPORT
+ const bool has_libbfd = true;
+#else
+ const bool has_libbfd = false;
+#endif
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+ const bool has_skeletons = false;
+#else
+ const bool has_skeletons = true;
+#endif
+
if (json_output) {
- jsonw_start_object(json_wtr);
+ jsonw_start_object(json_wtr); /* root object */
+
jsonw_name(json_wtr, "version");
jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION);
- jsonw_end_object(json_wtr);
+
+ jsonw_name(json_wtr, "features");
+ jsonw_start_object(json_wtr); /* features */
+ jsonw_bool_field(json_wtr, "libbfd", has_libbfd);
+ jsonw_bool_field(json_wtr, "skeletons", has_skeletons);
+ jsonw_end_object(json_wtr); /* features */
+
+ jsonw_end_object(json_wtr); /* root object */
} else {
+ unsigned int nb_features = 0;
+
printf("%s v%s\n", bin_name, BPFTOOL_VERSION);
+ printf("features:");
+ if (has_libbfd) {
+ printf(" libbfd");
+ nb_features++;
+ }
+ if (has_skeletons)
+ printf("%s skeletons", nb_features++ ? "," : "");
+ printf("\n");
}
return 0;
}
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 3a27d31a1856..a7efbd84fbcc 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -50,6 +50,7 @@ const char * const map_type_name[] = {
[BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
[BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
[BPF_MAP_TYPE_RINGBUF] = "ringbuf",
+ [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
};
const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -212,8 +213,9 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
jsonw_end_object(json_wtr);
}
-static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
- const char *error_msg)
+static void
+print_entry_error_msg(struct bpf_map_info *info, unsigned char *key,
+ const char *error_msg)
{
int msg_size = strlen(error_msg);
bool single_line, break_names;
@@ -231,6 +233,40 @@ static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
printf("\n");
}
+static void
+print_entry_error(struct bpf_map_info *map_info, void *key, int lookup_errno)
+{
+ /* For prog_array maps or arrays of maps, failure to lookup the value
+ * means there is no entry for that key. Do not print an error message
+ * in that case.
+ */
+ if ((map_is_map_of_maps(map_info->type) ||
+ map_is_map_of_progs(map_info->type)) && lookup_errno == ENOENT)
+ return;
+
+ if (json_output) {
+ jsonw_start_object(json_wtr); /* entry */
+ jsonw_name(json_wtr, "key");
+ print_hex_data_json(key, map_info->key_size);
+ jsonw_name(json_wtr, "value");
+ jsonw_start_object(json_wtr); /* error */
+ jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
+ jsonw_end_object(json_wtr); /* error */
+ jsonw_end_object(json_wtr); /* entry */
+ } else {
+ const char *msg = NULL;
+
+ if (lookup_errno == ENOENT)
+ msg = "<no entry>";
+ else if (lookup_errno == ENOSPC &&
+ map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
+ msg = "<cannot read>";
+
+ print_entry_error_msg(map_info, key,
+ msg ? : strerror(lookup_errno));
+ }
+}
+
static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
unsigned char *value)
{
@@ -712,56 +748,23 @@ static int dump_map_elem(int fd, void *key, void *value,
struct bpf_map_info *map_info, struct btf *btf,
json_writer_t *btf_wtr)
{
- int num_elems = 0;
- int lookup_errno;
-
- if (!bpf_map_lookup_elem(fd, key, value)) {
- if (json_output) {
- print_entry_json(map_info, key, value, btf);
- } else {
- if (btf) {
- struct btf_dumper d = {
- .btf = btf,
- .jw = btf_wtr,
- .is_plain_text = true,
- };
-
- do_dump_btf(&d, map_info, key, value);
- } else {
- print_entry_plain(map_info, key, value);
- }
- num_elems++;
- }
- return num_elems;
+ if (bpf_map_lookup_elem(fd, key, value)) {
+ print_entry_error(map_info, key, errno);
+ return -1;
}
- /* lookup error handling */
- lookup_errno = errno;
-
- if (map_is_map_of_maps(map_info->type) ||
- map_is_map_of_progs(map_info->type))
- return 0;
-
if (json_output) {
- jsonw_start_object(json_wtr);
- jsonw_name(json_wtr, "key");
- print_hex_data_json(key, map_info->key_size);
- jsonw_name(json_wtr, "value");
- jsonw_start_object(json_wtr);
- jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
- jsonw_end_object(json_wtr);
- jsonw_end_object(json_wtr);
- } else {
- const char *msg = NULL;
-
- if (lookup_errno == ENOENT)
- msg = "<no entry>";
- else if (lookup_errno == ENOSPC &&
- map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
- msg = "<cannot read>";
+ print_entry_json(map_info, key, value, btf);
+ } else if (btf) {
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = btf_wtr,
+ .is_plain_text = true,
+ };
- print_entry_error(map_info, key,
- msg ? : strerror(lookup_errno));
+ do_dump_btf(&d, map_info, key, value);
+ } else {
+ print_entry_plain(map_info, key, value);
}
return 0;
@@ -872,7 +875,8 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
err = 0;
break;
}
- num_elems += dump_map_elem(fd, key, value, info, btf, wtr);
+ if (!dump_map_elem(fd, key, value, info, btf, wtr))
+ num_elems++;
prev_key = key;
}
@@ -1246,7 +1250,7 @@ static int do_create(int argc, char **argv)
{
struct bpf_create_map_attr attr = { NULL, };
const char *pinfile;
- int err, fd;
+ int err = -1, fd;
if (!REQ_ARGS(7))
return -1;
@@ -1261,13 +1265,13 @@ static int do_create(int argc, char **argv)
if (attr.map_type) {
p_err("map type already specified");
- return -1;
+ goto exit;
}
attr.map_type = map_type_from_str(*argv);
if ((int)attr.map_type < 0) {
p_err("unrecognized map type: %s", *argv);
- return -1;
+ goto exit;
}
NEXT_ARG();
} else if (is_prefix(*argv, "name")) {
@@ -1276,43 +1280,56 @@ static int do_create(int argc, char **argv)
} else if (is_prefix(*argv, "key")) {
if (parse_u32_arg(&argc, &argv, &attr.key_size,
"key size"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "value")) {
if (parse_u32_arg(&argc, &argv, &attr.value_size,
"value size"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "entries")) {
if (parse_u32_arg(&argc, &argv, &attr.max_entries,
"max entries"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "flags")) {
if (parse_u32_arg(&argc, &argv, &attr.map_flags,
"flags"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "dev")) {
NEXT_ARG();
if (attr.map_ifindex) {
p_err("offload device already specified");
- return -1;
+ goto exit;
}
attr.map_ifindex = if_nametoindex(*argv);
if (!attr.map_ifindex) {
p_err("unrecognized netdevice '%s': %s",
*argv, strerror(errno));
- return -1;
+ goto exit;
}
NEXT_ARG();
+ } else if (is_prefix(*argv, "inner_map")) {
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int inner_map_fd;
+
+ NEXT_ARG();
+ if (!REQ_ARGS(2))
+ usage();
+ inner_map_fd = map_parse_fd_and_info(&argc, &argv,
+ &info, &len);
+ if (inner_map_fd < 0)
+ return -1;
+ attr.inner_map_fd = inner_map_fd;
} else {
p_err("unknown arg %s", *argv);
- return -1;
+ goto exit;
}
}
if (!attr.name) {
p_err("map name not specified");
- return -1;
+ goto exit;
}
set_max_rlimit();
@@ -1320,17 +1337,22 @@ static int do_create(int argc, char **argv)
fd = bpf_create_map_xattr(&attr);
if (fd < 0) {
p_err("map create failed: %s", strerror(errno));
- return -1;
+ goto exit;
}
err = do_pin_fd(fd, pinfile);
close(fd);
if (err)
- return err;
+ goto exit;
if (json_output)
jsonw_null(json_wtr);
- return 0;
+
+exit:
+ if (attr.inner_map_fd > 0)
+ close(attr.inner_map_fd);
+
+ return err;
}
static int do_pop_dequeue(int argc, char **argv)
@@ -1416,7 +1438,7 @@ static int do_help(int argc, char **argv)
"Usage: %1$s %2$s { show | list } [MAP]\n"
" %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
" entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
- " [dev NAME]\n"
+ " [inner_map MAP] [dev NAME]\n"
" %1$s %2$s dump MAP\n"
" %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
" %1$s %2$s lookup MAP [key DATA]\n"
@@ -1442,7 +1464,7 @@ static int do_help(int argc, char **argv)
" lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
- " queue | stack | sk_storage | struct_ops | ringbuf }\n"
+ " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 56c3a2bae3ef..910e7bac6e9e 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -6,22 +6,27 @@
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
+#include <time.h>
#include <unistd.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <net/if.h>
#include <linux/if.h>
#include <linux/rtnetlink.h>
+#include <linux/socket.h>
#include <linux/tc_act/tc_bpf.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "bpf/nlattr.h"
-#include "bpf/libbpf_internal.h"
#include "main.h"
#include "netlink_dumper.h"
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
struct ip_devname_ifindex {
char devname[64];
int ifindex;
@@ -85,6 +90,266 @@ static enum net_attach_type parse_attach_type(const char *str)
return net_attach_type_size;
}
+typedef int (*dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, dump_nlmsg_t, void *cookie);
+
+static int netlink_open(__u32 *nl_pid)
+{
+ struct sockaddr_nl sa;
+ socklen_t addrlen;
+ int one = 1, ret;
+ int sock;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0)
+ return -errno;
+
+ if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one)) < 0) {
+ p_err("Netlink error reporting not supported");
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ addrlen = sizeof(sa);
+ if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ if (addrlen != sizeof(sa)) {
+ ret = -LIBBPF_ERRNO__INTERNAL;
+ goto cleanup;
+ }
+
+ *nl_pid = sa.nl_pid;
+ return sock;
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
+ __dump_nlmsg_t _fn, dump_nlmsg_t fn,
+ void *cookie)
+{
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+ char buf[4096];
+ int len, ret;
+
+ while (multipart) {
+ multipart = false;
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ ret = -errno;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != nl_pid) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+ goto done;
+ }
+ if (nh->nlmsg_seq != seq) {
+ ret = -LIBBPF_ERRNO__INVSEQ;
+ goto done;
+ }
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = true;
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ libbpf_nla_dump_errormsg(nh);
+ goto done;
+ case NLMSG_DONE:
+ return 0;
+ default:
+ break;
+ }
+ if (_fn) {
+ ret = _fn(nh, fn, cookie);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ ret = 0;
+done:
+ return ret;
+}
+
+static int __dump_class_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_class_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_class_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_class(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTCLASS,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
+ dump_class_nlmsg, cookie);
+}
+
+static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_qdisc_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_qdisc_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETQDISC,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
+ dump_qdisc_nlmsg, cookie);
+}
+
+static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_filter_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_filter_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTFILTER,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ .t.tcm_parent = handle,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
+ dump_filter_nlmsg, cookie);
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct nlattr *tb[IFLA_MAX + 1], *attr;
+ struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+ attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+ if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+static int netlink_get_link(int sock, unsigned int nl_pid,
+ dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nlh.nlmsg_type = RTM_GETLINK,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifm.ifi_family = AF_PACKET,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+ dump_link_nlmsg, cookie);
+}
+
static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
{
struct bpf_netdev_t *netinfo = cookie;
@@ -168,14 +433,14 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
tcinfo.array_len = 0;
tcinfo.is_qdisc = false;
- ret = libbpf_nl_get_class(sock, nl_pid, dev->ifindex,
- dump_class_qdisc_nlmsg, &tcinfo);
+ ret = netlink_get_class(sock, nl_pid, dev->ifindex,
+ dump_class_qdisc_nlmsg, &tcinfo);
if (ret)
goto out;
tcinfo.is_qdisc = true;
- ret = libbpf_nl_get_qdisc(sock, nl_pid, dev->ifindex,
- dump_class_qdisc_nlmsg, &tcinfo);
+ ret = netlink_get_qdisc(sock, nl_pid, dev->ifindex,
+ dump_class_qdisc_nlmsg, &tcinfo);
if (ret)
goto out;
@@ -183,9 +448,9 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
filter_info.ifindex = dev->ifindex;
for (i = 0; i < tcinfo.used_len; i++) {
filter_info.kind = tcinfo.handle_array[i].kind;
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex,
- tcinfo.handle_array[i].handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex,
+ tcinfo.handle_array[i].handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
}
@@ -193,22 +458,22 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
/* root, ingress and egress handle */
handle = TC_H_ROOT;
filter_info.kind = "root";
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
filter_info.kind = "clsact/ingress";
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS);
filter_info.kind = "clsact/egress";
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
@@ -386,7 +651,7 @@ static int do_show(int argc, char **argv)
struct bpf_attach_info attach_info = {};
int i, sock, ret, filter_idx = -1;
struct bpf_netdev_t dev_array;
- unsigned int nl_pid;
+ unsigned int nl_pid = 0;
char err_buf[256];
if (argc == 2) {
@@ -401,7 +666,7 @@ static int do_show(int argc, char **argv)
if (ret)
return -1;
- sock = libbpf_netlink_open(&nl_pid);
+ sock = netlink_open(&nl_pid);
if (sock < 0) {
fprintf(stderr, "failed to open netlink sock\n");
return -1;
@@ -416,7 +681,7 @@ static int do_show(int argc, char **argv)
jsonw_start_array(json_wtr);
NET_START_OBJECT;
NET_START_ARRAY("xdp", "%s:\n");
- ret = libbpf_nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
+ ret = netlink_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
NET_END_ARRAY("\n");
if (!ret) {
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index d393eb8263a6..d942c1e3372c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -29,6 +29,9 @@
#include "main.h"
#include "xlated_dumper.h"
+#define BPF_METADATA_PREFIX "bpf_metadata_"
+#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1)
+
const char * const prog_type_name[] = {
[BPF_PROG_TYPE_UNSPEC] = "unspec",
[BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
@@ -151,6 +154,198 @@ static void show_prog_maps(int fd, __u32 num_maps)
}
}
+static void *find_metadata(int prog_fd, struct bpf_map_info *map_info)
+{
+ struct bpf_prog_info prog_info;
+ __u32 prog_info_len;
+ __u32 map_info_len;
+ void *value = NULL;
+ __u32 *map_ids;
+ int nr_maps;
+ int key = 0;
+ int map_fd;
+ int ret;
+ __u32 i;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info_len = sizeof(prog_info);
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ return NULL;
+
+ if (!prog_info.nr_map_ids)
+ return NULL;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32));
+ if (!map_ids)
+ return NULL;
+
+ nr_maps = prog_info.nr_map_ids;
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.nr_map_ids = nr_maps;
+ prog_info.map_ids = ptr_to_u64(map_ids);
+ prog_info_len = sizeof(prog_info);
+
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ goto free_map_ids;
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ map_fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (map_fd < 0)
+ goto free_map_ids;
+
+ memset(map_info, 0, sizeof(*map_info));
+ map_info_len = sizeof(*map_info);
+ ret = bpf_obj_get_info_by_fd(map_fd, map_info, &map_info_len);
+ if (ret < 0) {
+ close(map_fd);
+ goto free_map_ids;
+ }
+
+ if (map_info->type != BPF_MAP_TYPE_ARRAY ||
+ map_info->key_size != sizeof(int) ||
+ map_info->max_entries != 1 ||
+ !map_info->btf_value_type_id ||
+ !strstr(map_info->name, ".rodata")) {
+ close(map_fd);
+ continue;
+ }
+
+ value = malloc(map_info->value_size);
+ if (!value) {
+ close(map_fd);
+ goto free_map_ids;
+ }
+
+ if (bpf_map_lookup_elem(map_fd, &key, value)) {
+ close(map_fd);
+ free(value);
+ value = NULL;
+ goto free_map_ids;
+ }
+
+ close(map_fd);
+ break;
+ }
+
+free_map_ids:
+ free(map_ids);
+ return value;
+}
+
+static bool has_metadata_prefix(const char *s)
+{
+ return strncmp(s, BPF_METADATA_PREFIX, BPF_METADATA_PREFIX_LEN) == 0;
+}
+
+static void show_prog_metadata(int fd, __u32 num_maps)
+{
+ const struct btf_type *t_datasec, *t_var;
+ struct bpf_map_info map_info;
+ struct btf_var_secinfo *vsi;
+ bool printed_header = false;
+ struct btf *btf = NULL;
+ unsigned int i, vlen;
+ void *value = NULL;
+ const char *name;
+ int err;
+
+ if (!num_maps)
+ return;
+
+ memset(&map_info, 0, sizeof(map_info));
+ value = find_metadata(fd, &map_info);
+ if (!value)
+ return;
+
+ err = btf__get_from_id(map_info.btf_id, &btf);
+ if (err || !btf)
+ goto out_free;
+
+ t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
+ if (!btf_is_datasec(t_datasec))
+ goto out_free;
+
+ vlen = btf_vlen(t_datasec);
+ vsi = btf_var_secinfos(t_datasec);
+
+ /* We don't proceed to check the kinds of the elements of the DATASEC.
+ * The verifier enforces them to be BTF_KIND_VAR.
+ */
+
+ if (json_output) {
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = json_wtr,
+ .is_plain_text = false,
+ };
+
+ for (i = 0; i < vlen; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ name = btf__name_by_offset(btf, t_var->name_off);
+
+ if (!has_metadata_prefix(name))
+ continue;
+
+ if (!printed_header) {
+ jsonw_name(json_wtr, "metadata");
+ jsonw_start_object(json_wtr);
+ printed_header = true;
+ }
+
+ jsonw_name(json_wtr, name + BPF_METADATA_PREFIX_LEN);
+ err = btf_dumper_type(&d, t_var->type, value + vsi->offset);
+ if (err) {
+ p_err("btf dump failed: %d", err);
+ break;
+ }
+ }
+ if (printed_header)
+ jsonw_end_object(json_wtr);
+ } else {
+ json_writer_t *btf_wtr = jsonw_new(stdout);
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = btf_wtr,
+ .is_plain_text = true,
+ };
+
+ if (!btf_wtr) {
+ p_err("jsonw alloc failed");
+ goto out_free;
+ }
+
+ for (i = 0; i < vlen; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ name = btf__name_by_offset(btf, t_var->name_off);
+
+ if (!has_metadata_prefix(name))
+ continue;
+
+ if (!printed_header) {
+ printf("\tmetadata:");
+ printed_header = true;
+ }
+
+ printf("\n\t\t%s = ", name + BPF_METADATA_PREFIX_LEN);
+
+ jsonw_reset(btf_wtr);
+ err = btf_dumper_type(&d, t_var->type, value + vsi->offset);
+ if (err) {
+ p_err("btf dump failed: %d", err);
+ break;
+ }
+ }
+ if (printed_header)
+ jsonw_destroy(&btf_wtr);
+ }
+
+out_free:
+ btf__free(btf);
+ free(value);
+}
+
static void print_prog_header_json(struct bpf_prog_info *info)
{
jsonw_uint_field(json_wtr, "id", info->id);
@@ -228,6 +423,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
emit_obj_refs_json(&refs_table, info->id, json_wtr);
+ show_prog_metadata(fd, info->nr_map_ids);
+
jsonw_end_object(json_wtr);
}
@@ -297,6 +494,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
printf("\n");
+
+ show_prog_metadata(fd, info->nr_map_ids);
}
static int show_prog(int fd)
@@ -1304,7 +1503,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
enum bpf_prog_type prog_type = common_prog_type;
if (prog_type == BPF_PROG_TYPE_UNSPEC) {
- const char *sec_name = bpf_program__title(pos, false);
+ const char *sec_name = bpf_program__section_name(pos);
err = get_prog_type_by_name(sec_name, &prog_type,
&expected_attach_type);
@@ -1398,7 +1597,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
err = bpf_obj_pin(bpf_program__fd(prog), pinfile);
if (err) {
p_err("failed to pin program %s",
- bpf_program__title(prog, false));
+ bpf_program__section_name(prog));
goto err_close_obj;
}
} else {
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index fe8eb537688b..66cb92136de4 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
include ../../scripts/Makefile.include
+include ../../scripts/Makefile.arch
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
@@ -29,6 +30,7 @@ endif
AR = $(HOSTAR)
CC = $(HOSTCC)
LD = $(HOSTLD)
+ARCH = $(HOSTARCH)
OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 0def0bb1f783..dfa540d8a02d 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -199,9 +199,16 @@ static char *get_id(const char *prefix_end)
/*
* __BTF_ID__func__vfs_truncate__0
* prefix_end = ^
+ * pos = ^
*/
- char *p, *id = strdup(prefix_end + sizeof("__") - 1);
+ int len = strlen(prefix_end);
+ int pos = sizeof("__") - 1;
+ char *p, *id;
+ if (pos >= len)
+ return NULL;
+
+ id = strdup(prefix_end + pos);
if (id) {
/*
* __BTF_ID__func__vfs_truncate__0
@@ -220,6 +227,24 @@ static char *get_id(const char *prefix_end)
return id;
}
+static struct btf_id *add_set(struct object *obj, char *name)
+{
+ /*
+ * __BTF_ID__set__name
+ * name = ^
+ * id = ^
+ */
+ char *id = name + sizeof(BTF_SET "__") - 1;
+ int len = strlen(name);
+
+ if (id >= name + len) {
+ pr_err("FAILED to parse set name: %s\n", name);
+ return NULL;
+ }
+
+ return btf_id__add(&obj->sets, id, true);
+}
+
static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
{
char *id;
@@ -412,7 +437,7 @@ static int symbols_collect(struct object *obj)
id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
/* set */
} else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
- id = add_symbol(&obj->sets, prefix, sizeof(BTF_SET) - 1);
+ id = add_set(obj, prefix);
/*
* SET objects store list's count, which is encoded
* in symbol's size, together with 'cnt' field hence
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 727050c40f09..722f1700d96a 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -38,6 +38,8 @@ clean:
$(call QUIET_CLEAN, fixdep)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)rm -f $(OUTPUT)fixdep
+ $(call QUIET_CLEAN, feature-detect)
+ $(Q)$(MAKE) -C feature/ clean >/dev/null
$(OUTPUT)fixdep-in.o: FORCE
$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c1daf4d57518..38415d251075 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -46,7 +46,6 @@ FEATURE_TESTS_BASIC := \
libelf-getphdrnum \
libelf-gelf_getnote \
libelf-getshdrstrndx \
- libelf-mmap \
libnuma \
numa_num_possible_cpus \
libperl \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index d220fe952747..b2a2347c67ed 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -25,7 +25,6 @@ FILES= \
test-libelf-getphdrnum.bin \
test-libelf-gelf_getnote.bin \
test-libelf-getshdrstrndx.bin \
- test-libelf-mmap.bin \
test-libdebuginfod.bin \
test-libnuma.bin \
test-numa_num_possible_cpus.bin \
@@ -146,9 +145,6 @@ $(OUTPUT)test-dwarf.bin:
$(OUTPUT)test-dwarf_getlocations.bin:
$(BUILD) $(DWARFLIBS)
-$(OUTPUT)test-libelf-mmap.bin:
- $(BUILD) -lelf
-
$(OUTPUT)test-libelf-getphdrnum.bin:
$(BUILD) -lelf
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 5479e543b194..5284e6e9c756 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -30,10 +30,6 @@
# include "test-libelf.c"
#undef main
-#define main main_test_libelf_mmap
-# include "test-libelf-mmap.c"
-#undef main
-
#define main main_test_get_current_dir_name
# include "test-get_current_dir_name.c"
#undef main
diff --git a/tools/build/feature/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c
deleted file mode 100644
index 2c3ef81affe2..000000000000
--- a/tools/build/feature/test-libelf-mmap.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <libelf.h>
-
-int main(void)
-{
- Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
-
- return (long)elf;
-}
diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py
index f4699f9b46ba..c4ff907c078b 100644
--- a/tools/cgroup/iocost_monitor.py
+++ b/tools/cgroup/iocost_monitor.py
@@ -45,8 +45,7 @@ except:
err('The kernel does not have iocost enabled')
IOC_RUNNING = prog['IOC_RUNNING'].value_()
-NR_USAGE_SLOTS = prog['NR_USAGE_SLOTS'].value_()
-HWEIGHT_WHOLE = prog['HWEIGHT_WHOLE'].value_()
+WEIGHT_ONE = prog['WEIGHT_ONE'].value_()
VTIME_PER_SEC = prog['VTIME_PER_SEC'].value_()
VTIME_PER_USEC = prog['VTIME_PER_USEC'].value_()
AUTOP_SSD_FAST = prog['AUTOP_SSD_FAST'].value_()
@@ -100,7 +99,7 @@ class IocStat:
self.period_ms = ioc.period_us.value_() / 1_000
self.period_at = ioc.period_at.value_() / 1_000_000
self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC
- self.vrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC
+ self.vrate_pct = ioc.vtime_base_rate.value_() * 100 / VTIME_PER_USEC
self.busy_level = ioc.busy_level.value_()
self.autop_idx = ioc.autop_idx.value_()
self.user_cost_model = ioc.user_cost_model.value_()
@@ -136,7 +135,7 @@ class IocStat:
def table_header_str(self):
return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \
- f'{"dbt":>3} {"delay":>6} {"usages%"}'
+ f'{"debt":>7} {"delay":>7} {"usage%"}'
class IocgStat:
def __init__(self, iocg):
@@ -144,11 +143,11 @@ class IocgStat:
blkg = iocg.pd.blkg
self.is_active = not list_empty(iocg.active_list.address_of_())
- self.weight = iocg.weight.value_()
- self.active = iocg.active.value_()
- self.inuse = iocg.inuse.value_()
- self.hwa_pct = iocg.hweight_active.value_() * 100 / HWEIGHT_WHOLE
- self.hwi_pct = iocg.hweight_inuse.value_() * 100 / HWEIGHT_WHOLE
+ self.weight = iocg.weight.value_() / WEIGHT_ONE
+ self.active = iocg.active.value_() / WEIGHT_ONE
+ self.inuse = iocg.inuse.value_() / WEIGHT_ONE
+ self.hwa_pct = iocg.hweight_active.value_() * 100 / WEIGHT_ONE
+ self.hwi_pct = iocg.hweight_inuse.value_() * 100 / WEIGHT_ONE
self.address = iocg.value_()
vdone = iocg.done_vtime.counter.value_()
@@ -160,23 +159,13 @@ class IocgStat:
else:
self.inflight_pct = 0
- # vdebt used to be an atomic64_t and is now u64, support both
- try:
- self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
- except:
- self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
-
- self.use_delay = blkg.use_delay.counter.value_()
- self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
-
- usage_idx = iocg.usage_idx.value_()
- self.usages = []
- self.usage = 0
- for i in range(NR_USAGE_SLOTS):
- usage = iocg.usages[(usage_idx + 1 + i) % NR_USAGE_SLOTS].value_()
- upct = usage * 100 / HWEIGHT_WHOLE
- self.usages.append(upct)
- self.usage = max(self.usage, upct)
+ self.usage = (100 * iocg.usage_delta_us.value_() /
+ ioc.period_us.value_()) if self.active else 0
+ self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
+ if blkg.use_delay.counter.value_() != 0:
+ self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
+ else:
+ self.delay_ms = 0
def dict(self, now, path):
out = { 'cgroup' : path,
@@ -189,25 +178,20 @@ class IocgStat:
'hweight_inuse_pct' : self.hwi_pct,
'inflight_pct' : self.inflight_pct,
'debt_ms' : self.debt_ms,
- 'use_delay' : self.use_delay,
'delay_ms' : self.delay_ms,
'usage_pct' : self.usage,
'address' : self.address }
- for i in range(len(self.usages)):
- out[f'usage_pct_{i}'] = str(self.usages[i])
return out
def table_row_str(self, path):
out = f'{path[-28:]:28} ' \
f'{"*" if self.is_active else " "} ' \
- f'{self.inuse:5}/{self.active:5} ' \
+ f'{round(self.inuse):5}/{round(self.active):5} ' \
f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \
f'{self.inflight_pct:6.2f} ' \
- f'{min(math.ceil(self.debt_ms), 999):3} ' \
- f'{min(self.use_delay, 99):2}*'\
- f'{min(math.ceil(self.delay_ms), 999):03} '
- for u in self.usages:
- out += f'{min(round(u), 999):03d}:'
+ f'{self.debt_ms:7.2f} ' \
+ f'{self.delay_ms:7.2f} '\
+ f'{min(self.usage, 999):6.2f}'
out = out.rstrip(':')
return out
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 1a303a81aeef..90c3155f05b1 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -23,17 +23,17 @@
#include <sys/ioctl.h>
#include <sys/types.h>
#include <linux/gpio.h>
+#include "gpio-utils.h"
int monitor_device(const char *device_name,
- unsigned int line,
- uint32_t handleflags,
- uint32_t eventflags,
+ unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
unsigned int loops)
{
- struct gpioevent_request req;
- struct gpiohandle_data data;
+ struct gpio_v2_line_values values;
char *chrdev_name;
- int fd;
+ int cfd, lfd;
int ret;
int i = 0;
@@ -41,44 +41,55 @@ int monitor_device(const char *device_name,
if (ret < 0)
return -ENOMEM;
- fd = open(chrdev_name, 0);
- if (fd == -1) {
+ cfd = open(chrdev_name, 0);
+ if (cfd == -1) {
ret = -errno;
fprintf(stderr, "Failed to open %s\n", chrdev_name);
goto exit_free_name;
}
- req.lineoffset = line;
- req.handleflags = handleflags;
- req.eventflags = eventflags;
- strcpy(req.consumer_label, "gpio-event-mon");
-
- ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req);
- if (ret == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to issue GET EVENT "
- "IOCTL (%d)\n",
- ret);
- goto exit_close_error;
- }
+ ret = gpiotools_request_line(device_name, lines, num_lines, config,
+ "gpio-event-mon");
+ if (ret < 0)
+ goto exit_device_close;
+ else
+ lfd = ret;
/* Read initial states */
- ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data);
- if (ret == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE "
- "VALUES IOCTL (%d)\n",
+ values.mask = 0;
+ values.bits = 0;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&values.mask, i);
+ ret = gpiotools_get_values(lfd, &values);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to issue GPIO LINE GET VALUES IOCTL (%d)\n",
ret);
- goto exit_close_error;
+ goto exit_line_close;
}
- fprintf(stdout, "Monitoring line %d on %s\n", line, device_name);
- fprintf(stdout, "Initial line value: %d\n", data.values[0]);
+ if (num_lines == 1) {
+ fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name);
+ fprintf(stdout, "Initial line value: %d\n",
+ gpiotools_test_bit(values.bits, 0));
+ } else {
+ fprintf(stdout, "Monitoring lines %d", lines[0]);
+ for (i = 1; i < num_lines - 1; i++)
+ fprintf(stdout, ", %d", lines[i]);
+ fprintf(stdout, " and %d on %s\n", lines[i], device_name);
+ fprintf(stdout, "Initial line values: %d",
+ gpiotools_test_bit(values.bits, 0));
+ for (i = 1; i < num_lines - 1; i++)
+ fprintf(stdout, ", %d",
+ gpiotools_test_bit(values.bits, i));
+ fprintf(stdout, " and %d\n",
+ gpiotools_test_bit(values.bits, i));
+ }
while (1) {
- struct gpioevent_data event;
+ struct gpio_v2_line_event event;
- ret = read(req.fd, &event, sizeof(event));
+ ret = read(lfd, &event, sizeof(event));
if (ret == -1) {
if (errno == -EAGAIN) {
fprintf(stderr, "nothing available\n");
@@ -96,12 +107,14 @@ int monitor_device(const char *device_name,
ret = -EIO;
break;
}
- fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp);
+ fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ",
+ event.timestamp_ns, event.offset, event.line_seqno,
+ event.seqno);
switch (event.id) {
- case GPIOEVENT_EVENT_RISING_EDGE:
+ case GPIO_V2_LINE_EVENT_RISING_EDGE:
fprintf(stdout, "rising edge");
break;
- case GPIOEVENT_EVENT_FALLING_EDGE:
+ case GPIO_V2_LINE_EVENT_FALLING_EDGE:
fprintf(stdout, "falling edge");
break;
default:
@@ -114,8 +127,11 @@ int monitor_device(const char *device_name,
break;
}
-exit_close_error:
- if (close(fd) == -1)
+exit_line_close:
+ if (close(lfd) == -1)
+ perror("Failed to close line file");
+exit_device_close:
+ if (close(cfd) == -1)
perror("Failed to close GPIO character device file");
exit_free_name:
free(chrdev_name);
@@ -127,29 +143,37 @@ void print_usage(void)
fprintf(stderr, "Usage: gpio-event-mon [options]...\n"
"Listen to events on GPIO lines, 0->1 1->0\n"
" -n <name> Listen on GPIOs on a named device (must be stated)\n"
- " -o <n> Offset to monitor\n"
+ " -o <n> Offset of line to monitor (may be repeated)\n"
" -d Set line as open drain\n"
" -s Set line as open source\n"
" -r Listen for rising edges\n"
" -f Listen for falling edges\n"
+ " -b <n> Debounce the line with period n microseconds\n"
" [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n"
" -? This helptext\n"
"\n"
"Example:\n"
- "gpio-event-mon -n gpiochip0 -o 4 -r -f\n"
+ "gpio-event-mon -n gpiochip0 -o 4 -r -f -b 10000\n"
);
}
+#define EDGE_FLAGS \
+ (GPIO_V2_LINE_FLAG_EDGE_RISING | \
+ GPIO_V2_LINE_FLAG_EDGE_FALLING)
+
int main(int argc, char **argv)
{
const char *device_name = NULL;
- unsigned int line = -1;
+ unsigned int lines[GPIO_V2_LINES_MAX];
+ unsigned int num_lines = 0;
unsigned int loops = 0;
- uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT;
- uint32_t eventflags = 0;
- int c;
+ struct gpio_v2_line_config config;
+ int c, attr, i;
+ unsigned long debounce_period_us = 0;
- while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) {
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_INPUT;
+ while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) {
switch (c) {
case 'c':
loops = strtoul(optarg, NULL, 10);
@@ -158,19 +182,27 @@ int main(int argc, char **argv)
device_name = optarg;
break;
case 'o':
- line = strtoul(optarg, NULL, 10);
+ if (num_lines >= GPIO_V2_LINES_MAX) {
+ print_usage();
+ return -1;
+ }
+ lines[num_lines] = strtoul(optarg, NULL, 10);
+ num_lines++;
+ break;
+ case 'b':
+ debounce_period_us = strtoul(optarg, NULL, 10);
break;
case 'd':
- handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN;
+ config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN;
break;
case 's':
- handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE;
+ config.flags |= GPIO_V2_LINE_FLAG_OPEN_SOURCE;
break;
case 'r':
- eventflags |= GPIOEVENT_REQUEST_RISING_EDGE;
+ config.flags |= GPIO_V2_LINE_FLAG_EDGE_RISING;
break;
case 'f':
- eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE;
+ config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING;
break;
case '?':
print_usage();
@@ -178,15 +210,23 @@ int main(int argc, char **argv)
}
}
- if (!device_name || line == -1) {
+ if (debounce_period_us) {
+ attr = config.num_attrs;
+ config.num_attrs++;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&config.attrs[attr].mask, i);
+ config.attrs[attr].attr.id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE;
+ config.attrs[attr].attr.debounce_period_us = debounce_period_us;
+ }
+
+ if (!device_name || num_lines == 0) {
print_usage();
return -1;
}
- if (!eventflags) {
+ if (!(config.flags & EDGE_FLAGS)) {
printf("No flags specified, listening on both rising and "
"falling edges\n");
- eventflags = GPIOEVENT_REQUEST_BOTH_EDGES;
+ config.flags |= EDGE_FLAGS;
}
- return monitor_device(device_name, line, handleflags,
- eventflags, loops);
+ return monitor_device(device_name, lines, num_lines, &config, loops);
}
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
index 9fd926e8cb52..54fdf59dd320 100644
--- a/tools/gpio/gpio-hammer.c
+++ b/tools/gpio/gpio-hammer.c
@@ -22,39 +22,46 @@
#include <linux/gpio.h>
#include "gpio-utils.h"
-int hammer_device(const char *device_name, unsigned int *lines, int nlines,
+int hammer_device(const char *device_name, unsigned int *lines, int num_lines,
unsigned int loops)
{
- struct gpiohandle_data data;
+ struct gpio_v2_line_values values;
+ struct gpio_v2_line_config config;
char swirr[] = "-\\|/";
int fd;
int ret;
int i, j;
unsigned int iteration = 0;
- memset(&data.values, 0, sizeof(data.values));
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_OUTPUT, &data,
- "gpio-hammer");
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, "gpio-hammer");
if (ret < 0)
goto exit_error;
else
fd = ret;
- ret = gpiotools_get_values(fd, &data);
+ values.mask = 0;
+ values.bits = 0;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&values.mask, i);
+
+ ret = gpiotools_get_values(fd, &values);
if (ret < 0)
goto exit_close_error;
fprintf(stdout, "Hammer lines [");
- for (i = 0; i < nlines; i++) {
+ for (i = 0; i < num_lines; i++) {
fprintf(stdout, "%d", lines[i]);
- if (i != (nlines - 1))
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "] on %s, initial states: [", device_name);
- for (i = 0; i < nlines; i++) {
- fprintf(stdout, "%d", data.values[i]);
- if (i != (nlines - 1))
+ for (i = 0; i < num_lines; i++) {
+ fprintf(stdout, "%d", gpiotools_test_bit(values.bits, i));
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "]\n");
@@ -63,15 +70,15 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
j = 0;
while (1) {
/* Invert all lines so we blink */
- for (i = 0; i < nlines; i++)
- data.values[i] = !data.values[i];
+ for (i = 0; i < num_lines; i++)
+ gpiotools_change_bit(&values.bits, i);
- ret = gpiotools_set_values(fd, &data);
+ ret = gpiotools_set_values(fd, &values);
if (ret < 0)
goto exit_close_error;
/* Re-read values to get status */
- ret = gpiotools_get_values(fd, &data);
+ ret = gpiotools_get_values(fd, &values);
if (ret < 0)
goto exit_close_error;
@@ -81,9 +88,10 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
j = 0;
fprintf(stdout, "[");
- for (i = 0; i < nlines; i++) {
- fprintf(stdout, "%d: %d", lines[i], data.values[i]);
- if (i != (nlines - 1))
+ for (i = 0; i < num_lines; i++) {
+ fprintf(stdout, "%d: %d", lines[i],
+ gpiotools_test_bit(values.bits, i));
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "]\r");
@@ -97,7 +105,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
ret = 0;
exit_close_error:
- gpiotools_release_linehandle(fd);
+ gpiotools_release_line(fd);
exit_error:
return ret;
}
@@ -121,7 +129,7 @@ int main(int argc, char **argv)
const char *device_name = NULL;
unsigned int lines[GPIOHANDLES_MAX];
unsigned int loops = 0;
- int nlines;
+ int num_lines;
int c;
int i;
@@ -158,11 +166,11 @@ int main(int argc, char **argv)
return -1;
}
- nlines = i;
+ num_lines = i;
- if (!device_name || !nlines) {
+ if (!device_name || !num_lines) {
print_usage();
return -1;
}
- return hammer_device(device_name, lines, nlines, loops);
+ return hammer_device(device_name, lines, num_lines, loops);
}
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
index 16a5d9cb9da2..37187e056c8b 100644
--- a/tools/gpio/gpio-utils.c
+++ b/tools/gpio/gpio-utils.c
@@ -38,7 +38,7 @@
* such as "gpiochip0"
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
+ * @num_lines: The number of lines to request.
* @flag: The new flag for requsted gpio. Reference
* "linux/gpio.h" for the meaning of flag.
* @data: Default value will be set to gpio when flag is
@@ -56,7 +56,7 @@
* On failure return the errno.
*/
int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int nlines, unsigned int flag,
+ unsigned int num_lines, unsigned int flag,
struct gpiohandle_data *data,
const char *consumer_label)
{
@@ -78,12 +78,12 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
goto exit_free_name;
}
- for (i = 0; i < nlines; i++)
+ for (i = 0; i < num_lines; i++)
req.lineoffsets[i] = lines[i];
req.flags = flag;
strcpy(req.consumer_label, consumer_label);
- req.lines = nlines;
+ req.lines = num_lines;
if (flag & GPIOHANDLE_REQUEST_OUTPUT)
memcpy(req.default_values, data, sizeof(req.default_values));
@@ -100,20 +100,87 @@ exit_free_name:
free(chrdev_name);
return ret < 0 ? ret : req.fd;
}
+
+/**
+ * gpiotools_request_line() - request gpio lines in a gpiochip
+ * @device_name: The name of gpiochip without prefix "/dev/",
+ * such as "gpiochip0"
+ * @lines: An array desired lines, specified by offset
+ * index for the associated GPIO device.
+ * @num_lines: The number of lines to request.
+ * @config: The new config for requested gpio. Reference
+ * "linux/gpio.h" for config details.
+ * @consumer: The name of consumer, such as "sysfs",
+ * "powerkey". This is useful for other users to
+ * know who is using.
+ *
+ * Request gpio lines through the ioctl provided by chardev. User
+ * could call gpiotools_set_values() and gpiotools_get_values() to
+ * read and write respectively through the returned fd. Call
+ * gpiotools_release_line() to release these lines after that.
+ *
+ * Return: On success return the fd;
+ * On failure return the errno.
+ */
+int gpiotools_request_line(const char *device_name, unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
+ const char *consumer)
+{
+ struct gpio_v2_line_request req;
+ char *chrdev_name;
+ int fd;
+ int i;
+ int ret;
+
+ ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+ if (ret < 0)
+ return -ENOMEM;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s, %s\n",
+ chrdev_name, strerror(errno));
+ goto exit_free_name;
+ }
+
+ memset(&req, 0, sizeof(req));
+ for (i = 0; i < num_lines; i++)
+ req.offsets[i] = lines[i];
+
+ req.config = *config;
+ strcpy(req.consumer, consumer);
+ req.num_lines = num_lines;
+
+ ret = ioctl(fd, GPIO_V2_GET_LINE_IOCTL, &req);
+ if (ret == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to issue %s (%d), %s\n",
+ "GPIO_GET_LINE_IOCTL", ret, strerror(errno));
+ }
+
+ if (close(fd) == -1)
+ perror("Failed to close GPIO character device file");
+exit_free_name:
+ free(chrdev_name);
+ return ret < 0 ? ret : req.fd;
+}
+
/**
* gpiotools_set_values(): Set the value of gpio(s)
* @fd: The fd returned by
- * gpiotools_request_linehandle().
- * @data: The array of values want to set.
+ * gpiotools_request_line().
+ * @values: The array of values want to set.
*
* Return: On success return 0;
* On failure return the errno.
*/
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values)
{
int ret;
- ret = ioctl(fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, data);
+ ret = ioctl(fd, GPIO_V2_LINE_SET_VALUES_IOCTL, values);
if (ret == -1) {
ret = -errno;
fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -127,17 +194,17 @@ int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
/**
* gpiotools_get_values(): Get the value of gpio(s)
* @fd: The fd returned by
- * gpiotools_request_linehandle().
- * @data: The array of values get from hardware.
+ * gpiotools_request_line().
+ * @values: The array of values get from hardware.
*
* Return: On success return 0;
* On failure return the errno.
*/
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values)
{
int ret;
- ret = ioctl(fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, data);
+ ret = ioctl(fd, GPIO_V2_LINE_GET_VALUES_IOCTL, values);
if (ret == -1) {
ret = -errno;
fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -170,6 +237,27 @@ int gpiotools_release_linehandle(const int fd)
}
/**
+ * gpiotools_release_line(): Release the line(s) of gpiochip
+ * @fd: The fd returned by
+ * gpiotools_request_line().
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_release_line(const int fd)
+{
+ int ret;
+
+ ret = close(fd);
+ if (ret == -1) {
+ perror("Failed to close GPIO LINE device file");
+ ret = -errno;
+ }
+
+ return ret;
+}
+
+/**
* gpiotools_get(): Get value from specific line
* @device_name: The name of gpiochip without prefix "/dev/",
* such as "gpiochip0"
@@ -180,11 +268,14 @@ int gpiotools_release_linehandle(const int fd)
*/
int gpiotools_get(const char *device_name, unsigned int line)
{
- struct gpiohandle_data data;
+ int ret;
+ unsigned int value;
unsigned int lines[] = {line};
- gpiotools_gets(device_name, lines, 1, &data);
- return data.values[0];
+ ret = gpiotools_gets(device_name, lines, 1, &value);
+ if (ret)
+ return ret;
+ return value;
}
@@ -194,28 +285,36 @@ int gpiotools_get(const char *device_name, unsigned int line)
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
- * @data: The array of values get from gpiochip.
+ * @num_lines: The number of lines to request.
+ * @values: The array of values get from gpiochip.
*
* Return: On success return 0;
* On failure return the errno.
*/
int gpiotools_gets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data)
+ unsigned int num_lines, unsigned int *values)
{
- int fd;
+ int fd, i;
int ret;
int ret_close;
+ struct gpio_v2_line_config config;
+ struct gpio_v2_line_values lv;
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_INPUT, data,
- CONSUMER);
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_INPUT;
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, CONSUMER);
if (ret < 0)
return ret;
fd = ret;
- ret = gpiotools_get_values(fd, data);
- ret_close = gpiotools_release_linehandle(fd);
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&lv.mask, i);
+ ret = gpiotools_get_values(fd, &lv);
+ if (!ret)
+ for (i = 0; i < num_lines; i++)
+ values[i] = gpiotools_test_bit(lv.bits, i);
+ ret_close = gpiotools_release_line(fd);
return ret < 0 ? ret : ret_close;
}
@@ -232,11 +331,9 @@ int gpiotools_gets(const char *device_name, unsigned int *lines,
int gpiotools_set(const char *device_name, unsigned int line,
unsigned int value)
{
- struct gpiohandle_data data;
unsigned int lines[] = {line};
- data.values[0] = value;
- return gpiotools_sets(device_name, lines, 1, &data);
+ return gpiotools_sets(device_name, lines, 1, &value);
}
/**
@@ -245,23 +342,32 @@ int gpiotools_set(const char *device_name, unsigned int line,
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
- * @data: The array of values set to gpiochip, must be
+ * @num_lines: The number of lines to request.
+ * @value: The array of values set to gpiochip, must be
* 0(low) or 1(high).
*
* Return: On success return 0;
* On failure return the errno.
*/
int gpiotools_sets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data)
+ unsigned int num_lines, unsigned int *values)
{
- int ret;
+ int ret, i;
+ struct gpio_v2_line_config config;
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_OUTPUT, data,
- CONSUMER);
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+ config.num_attrs = 1;
+ config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES;
+ for (i = 0; i < num_lines; i++) {
+ gpiotools_set_bit(&config.attrs[0].mask, i);
+ gpiotools_assign_bit(&config.attrs[0].attr.values,
+ i, values[i]);
+ }
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, CONSUMER);
if (ret < 0)
return ret;
- return gpiotools_release_linehandle(ret);
+ return gpiotools_release_line(ret);
}
diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h
index cf37f13f3dcb..6c69a9f1c253 100644
--- a/tools/gpio/gpio-utils.h
+++ b/tools/gpio/gpio-utils.h
@@ -12,7 +12,9 @@
#ifndef _GPIO_UTILS_H_
#define _GPIO_UTILS_H_
+#include <stdbool.h>
#include <string.h>
+#include <linux/types.h>
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -23,19 +25,55 @@ static inline int check_prefix(const char *str, const char *prefix)
}
int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int nlines, unsigned int flag,
+ unsigned int num_lines, unsigned int flag,
struct gpiohandle_data *data,
const char *consumer_label);
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data);
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data);
int gpiotools_release_linehandle(const int fd);
+int gpiotools_request_line(const char *device_name,
+ unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
+ const char *consumer);
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_release_line(const int fd);
+
int gpiotools_get(const char *device_name, unsigned int line);
int gpiotools_gets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data);
+ unsigned int num_lines, unsigned int *values);
int gpiotools_set(const char *device_name, unsigned int line,
unsigned int value);
int gpiotools_sets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data);
+ unsigned int num_lines, unsigned int *values);
+
+/* helper functions for gpio_v2_line_values bits */
+static inline void gpiotools_set_bit(__u64 *b, int n)
+{
+ *b |= _BITULL(n);
+}
+
+static inline void gpiotools_change_bit(__u64 *b, int n)
+{
+ *b ^= _BITULL(n);
+}
+
+static inline void gpiotools_clear_bit(__u64 *b, int n)
+{
+ *b &= ~_BITULL(n);
+}
+
+static inline int gpiotools_test_bit(__u64 b, int n)
+{
+ return !!(b & _BITULL(n));
+}
+
+static inline void gpiotools_assign_bit(__u64 *b, int n, bool value)
+{
+ if (value)
+ gpiotools_set_bit(b, n);
+ else
+ gpiotools_clear_bit(b, n);
+}
#endif /* _GPIO_UTILS_H_ */
diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c
index 5cea24fddfa7..f229ec62301b 100644
--- a/tools/gpio/gpio-watch.c
+++ b/tools/gpio/gpio-watch.c
@@ -21,8 +21,8 @@
int main(int argc, char **argv)
{
- struct gpioline_info_changed chg;
- struct gpioline_info req;
+ struct gpio_v2_line_info_changed chg;
+ struct gpio_v2_line_info req;
struct pollfd pfd;
int fd, i, j, ret;
char *event, *end;
@@ -40,11 +40,11 @@ int main(int argc, char **argv)
for (i = 0, j = 2; i < argc - 2; i++, j++) {
memset(&req, 0, sizeof(req));
- req.line_offset = strtoul(argv[j], &end, 0);
+ req.offset = strtoul(argv[j], &end, 0);
if (*end != '\0')
goto err_usage;
- ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req);
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_WATCH_IOCTL, &req);
if (ret) {
perror("unable to set up line watch");
return EXIT_FAILURE;
@@ -71,13 +71,13 @@ int main(int argc, char **argv)
}
switch (chg.event_type) {
- case GPIOLINE_CHANGED_REQUESTED:
+ case GPIO_V2_LINE_CHANGED_REQUESTED:
event = "requested";
break;
- case GPIOLINE_CHANGED_RELEASED:
+ case GPIO_V2_LINE_CHANGED_RELEASED:
event = "released";
break;
- case GPIOLINE_CHANGED_CONFIG:
+ case GPIO_V2_LINE_CHANGED_CONFIG:
event = "config changed";
break;
default:
@@ -87,7 +87,7 @@ int main(int argc, char **argv)
}
printf("line %u: %s at %llu\n",
- chg.info.line_offset, event, chg.timestamp);
+ chg.info.offset, event, chg.timestamp_ns);
}
}
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index b08d7a5e779b..5a05a454d0c9 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -25,57 +25,73 @@
struct gpio_flag {
char *name;
- unsigned long mask;
+ unsigned long long mask;
};
struct gpio_flag flagnames[] = {
{
- .name = "kernel",
- .mask = GPIOLINE_FLAG_KERNEL,
+ .name = "used",
+ .mask = GPIO_V2_LINE_FLAG_USED,
+ },
+ {
+ .name = "input",
+ .mask = GPIO_V2_LINE_FLAG_INPUT,
},
{
.name = "output",
- .mask = GPIOLINE_FLAG_IS_OUT,
+ .mask = GPIO_V2_LINE_FLAG_OUTPUT,
},
{
.name = "active-low",
- .mask = GPIOLINE_FLAG_ACTIVE_LOW,
+ .mask = GPIO_V2_LINE_FLAG_ACTIVE_LOW,
},
{
.name = "open-drain",
- .mask = GPIOLINE_FLAG_OPEN_DRAIN,
+ .mask = GPIO_V2_LINE_FLAG_OPEN_DRAIN,
},
{
.name = "open-source",
- .mask = GPIOLINE_FLAG_OPEN_SOURCE,
+ .mask = GPIO_V2_LINE_FLAG_OPEN_SOURCE,
},
{
.name = "pull-up",
- .mask = GPIOLINE_FLAG_BIAS_PULL_UP,
+ .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_UP,
},
{
.name = "pull-down",
- .mask = GPIOLINE_FLAG_BIAS_PULL_DOWN,
+ .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN,
},
{
.name = "bias-disabled",
- .mask = GPIOLINE_FLAG_BIAS_DISABLE,
+ .mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED,
},
};
-void print_flags(unsigned long flags)
+static void print_attributes(struct gpio_v2_line_info *info)
{
int i;
- int printed = 0;
+ const char *field_format = "%s";
for (i = 0; i < ARRAY_SIZE(flagnames); i++) {
- if (flags & flagnames[i].mask) {
- if (printed)
- fprintf(stdout, " ");
- fprintf(stdout, "%s", flagnames[i].name);
- printed++;
+ if (info->flags & flagnames[i].mask) {
+ fprintf(stdout, field_format, flagnames[i].name);
+ field_format = ", %s";
}
}
+
+ if ((info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) &&
+ (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING))
+ fprintf(stdout, field_format, "both-edges");
+ else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING)
+ fprintf(stdout, field_format, "rising-edge");
+ else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING)
+ fprintf(stdout, field_format, "falling-edge");
+
+ for (i = 0; i < info->num_attrs; i++) {
+ if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE)
+ fprintf(stdout, ", debounce_period=%dusec",
+ info->attrs[0].debounce_period_us);
+ }
}
int list_device(const char *device_name)
@@ -109,18 +125,18 @@ int list_device(const char *device_name)
/* Loop over the lines and print info */
for (i = 0; i < cinfo.lines; i++) {
- struct gpioline_info linfo;
+ struct gpio_v2_line_info linfo;
memset(&linfo, 0, sizeof(linfo));
- linfo.line_offset = i;
+ linfo.offset = i;
- ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo);
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &linfo);
if (ret == -1) {
ret = -errno;
perror("Failed to issue LINEINFO IOCTL\n");
goto exit_close_error;
}
- fprintf(stdout, "\tline %2d:", linfo.line_offset);
+ fprintf(stdout, "\tline %2d:", linfo.offset);
if (linfo.name[0])
fprintf(stdout, " \"%s\"", linfo.name);
else
@@ -131,7 +147,7 @@ int list_device(const char *device_name)
fprintf(stdout, " unused");
if (linfo.flags) {
fprintf(stdout, " [");
- print_flags(linfo.flags);
+ print_attributes(&linfo);
fprintf(stdout, "]");
}
fprintf(stdout, "\n");
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index f115d166c985..bb03859db89d 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -119,6 +119,7 @@ static const char * const iio_modifier_names[] = {
[IIO_MOD_PM2P5] = "pm2p5",
[IIO_MOD_PM4] = "pm4",
[IIO_MOD_PM10] = "pm10",
+ [IIO_MOD_O2] = "o2",
};
static bool event_is_known(struct iio_event_data *event)
@@ -211,6 +212,7 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_MOD_PM2P5:
case IIO_MOD_PM4:
case IIO_MOD_PM10:
+ case IIO_MOD_O2:
break;
default:
return false;
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 4867d549e3c1..57890b357f85 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,6 +3,11 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
+struct btf_id_set {
+ u32 cnt;
+ u32 ids[];
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
@@ -62,7 +67,7 @@ asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
"." #scope " " #name "; \n" \
#name ":; \n" \
-".popsection; \n"); \
+".popsection; \n");
#define BTF_ID_LIST(name) \
__BTF_ID_LIST(name, local) \
@@ -71,6 +76,13 @@ extern u32 name[];
#define BTF_ID_LIST_GLOBAL(name) \
__BTF_ID_LIST(name, globl)
+/* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with
+ * a single entry.
+ */
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) \
+ BTF_ID_LIST(name) \
+ BTF_ID(prefix, typename)
+
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
* It's used when we want to define 'unused' entry
@@ -88,12 +100,57 @@ asm( \
".zero 4 \n" \
".popsection; \n");
+/*
+ * The BTF_SET_START/END macros pair defines sorted list of
+ * BTF IDs plus its members count, with following layout:
+ *
+ * BTF_SET_START(list)
+ * BTF_ID(type1, name1)
+ * BTF_ID(type2, name2)
+ * BTF_SET_END(list)
+ *
+ * __BTF_ID__set__list:
+ * .zero 4
+ * list:
+ * __BTF_ID__type1__name1__3:
+ * .zero 4
+ * __BTF_ID__type2__name2__4:
+ * .zero 4
+ *
+ */
+#define __BTF_SET_START(name, scope) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+"." #scope " __BTF_ID__set__" #name "; \n" \
+"__BTF_ID__set__" #name ":; \n" \
+".zero 4 \n" \
+".popsection; \n");
+
+#define BTF_SET_START(name) \
+__BTF_ID_LIST(name, local) \
+__BTF_SET_START(name, local)
+
+#define BTF_SET_START_GLOBAL(name) \
+__BTF_ID_LIST(name, globl) \
+__BTF_SET_START(name, globl)
+
+#define BTF_SET_END(name) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+".size __BTF_ID__set__" #name ", .-" #name " \n" \
+".popsection; \n"); \
+extern struct btf_id_set name;
+
#else
#define BTF_ID_LIST(name) static u32 name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
+#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_END(name)
#endif /* CONFIG_DEBUG_INFO_BTF */
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
new file mode 100644
index 000000000000..ab82c793c897
--- /dev/null
+++ b/tools/include/linux/objtool.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_OBJTOOL_H
+#define _LINUX_OBJTOOL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack.
+ */
+struct unwind_hint {
+ u32 ip;
+ s16 sp_offset;
+ u8 sp_reg;
+ u8 type;
+ u8 end;
+};
+#endif
+
+/*
+ * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP
+ * (the caller's SP right before it made the call). Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset
+ * points to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
+ * sp_reg+sp_offset points to the iret return frame.
+ */
+#define UNWIND_HINT_TYPE_CALL 0
+#define UNWIND_HINT_TYPE_REGS 1
+#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+#define UNWIND_HINT_TYPE_RET_OFFSET 3
+
+#ifdef CONFIG_STACK_VALIDATION
+
+#ifndef __ASSEMBLY__
+
+#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
+ "987: \n\t" \
+ ".pushsection .discard.unwind_hints\n\t" \
+ /* struct unwind_hint */ \
+ ".long 987b - .\n\t" \
+ ".short " __stringify(sp_offset) "\n\t" \
+ ".byte " __stringify(sp_reg) "\n\t" \
+ ".byte " __stringify(type) "\n\t" \
+ ".byte " __stringify(end) "\n\t" \
+ ".balign 4 \n\t" \
+ ".popsection\n\t"
+
+/*
+ * This macro marks the given function's stack frame as "non-standard", which
+ * tells objtool to ignore the function when doing stack metadata validation.
+ * It should only be used in special cases where you're 100% sure it won't
+ * affect the reliability of frame pointers and kernel stack traces.
+ *
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+#define STACK_FRAME_NON_STANDARD(func) \
+ static void __used __section(.discard.func_stack_frame_non_standard) \
+ *__func_stack_frame_non_standard_##func = func
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * This macro indicates that the following intra-function call is valid.
+ * Any non-annotated intra-function call will cause objtool to issue a warning.
+ */
+#define ANNOTATE_INTRA_FUNCTION_CALL \
+ 999: \
+ .pushsection .discard.intra_function_calls; \
+ .long 999b; \
+ .popsection;
+
+/*
+ * In asm, there are two kinds of code: normal C-type callable functions and
+ * the rest. The normal callable functions can be called by other code, and
+ * don't do anything unusual with the stack. Such normal callable functions
+ * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
+ * category. In this case, no special debugging annotations are needed because
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
+ * at runtime.
+ *
+ * Anything which doesn't fall into the above category, such as syscall and
+ * interrupt handlers, tends to not be called directly by other functions, and
+ * often does unusual non-C-function-type things with the stack pointer. Such
+ * code needs to be annotated such that objtool can understand it. The
+ * following CFI hint macros are for this type of code.
+ *
+ * These macros provide hints to objtool about the state of the stack at each
+ * instruction. Objtool starts from the hints and follows the code flow,
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
+ * the debuginfo as necessary. It will also warn if it sees any
+ * inconsistencies.
+ */
+.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.Lunwind_hint_ip_\@:
+ .pushsection .discard.unwind_hints
+ /* struct unwind_hint */
+ .long .Lunwind_hint_ip_\@ - .
+ .short \sp_offset
+ .byte \sp_reg
+ .byte \type
+ .byte \end
+ .balign 4
+ .popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#else /* !CONFIG_STACK_VALIDATION */
+
+#ifndef __ASSEMBLY__
+
+#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
+ "\n\t"
+#define STACK_FRAME_NON_STANDARD(func)
+#else
+#define ANNOTATE_INTRA_FUNCTION_CALL
+.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.endm
+#endif
+
+#endif /* CONFIG_STACK_VALIDATION */
+
+#endif /* _LINUX_OBJTOOL_H */
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
new file mode 100644
index 000000000000..89135bb35bf7
--- /dev/null
+++ b/tools/include/linux/static_call_types.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _STATIC_CALL_TYPES_H
+#define _STATIC_CALL_TYPES_H
+
+#include <linux/types.h>
+#include <linux/stringify.h>
+
+#define STATIC_CALL_KEY_PREFIX __SCK__
+#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX)
+#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1)
+#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name)
+
+#define STATIC_CALL_TRAMP_PREFIX __SCT__
+#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX)
+#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1)
+#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
+#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))
+
+/*
+ * Flags in the low bits of static_call_site::key.
+ */
+#define STATIC_CALL_SITE_TAIL 1UL /* tail call */
+#define STATIC_CALL_SITE_INIT 2UL /* init section */
+#define STATIC_CALL_SITE_FLAGS 3UL
+
+/*
+ * The static call site table needs to be created by external tooling (objtool
+ * or a compiler plugin).
+ */
+struct static_call_site {
+ s32 addr;
+ s32 key;
+};
+
+#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d..f2b5d72a46c2 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat)
#define __NR_umount2 39
__SYSCALL(__NR_umount2, sys_umount)
#define __NR_mount 40
-__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
+__SYSCALL(__NR_mount, sys_mount)
#define __NR_pivot_root 41
__SYSCALL(__NR_pivot_root, sys_pivot_root)
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
#define __NR_write 64
__SYSCALL(__NR_write, sys_write)
#define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
#define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
#define __NR_pread64 67
__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
#define __NR_pwrite64 68
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
/* fs/splice.c */
#define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_splice 76
__SYSCALL(__NR_splice, sys_splice)
#define __NR_tee 77
@@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
#define __NR_sendmmsg 269
__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
#define __NR_process_vm_readv 270
-__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
- compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
#define __NR_process_vm_writev 271
-__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
- compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
#define __NR_kcmp 272
__SYSCALL(__NR_kcmp, sys_kcmp)
#define __NR_finit_module 273
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b6238b2209b7..bf5a99d803e4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -124,6 +124,7 @@ enum bpf_cmd {
BPF_ENABLE_STATS,
BPF_ITER_CREATE,
BPF_LINK_DETACH,
+ BPF_PROG_BIND_MAP,
};
enum bpf_map_type {
@@ -155,6 +156,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_DEVMAP_HASH,
BPF_MAP_TYPE_STRUCT_OPS,
BPF_MAP_TYPE_RINGBUF,
+ BPF_MAP_TYPE_INODE_STORAGE,
};
/* Note that tracing related programs such as
@@ -345,19 +347,45 @@ enum bpf_link_type {
/* The verifier internal test flag. Behavior is undefined */
#define BPF_F_TEST_STATE_FREQ (1U << 3)
+/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
+ * restrict map and helper usage for such programs. Sleepable BPF programs can
+ * only be attached to hooks where kernel execution context allows sleeping.
+ * Such programs are allowed to use helpers that may sleep like
+ * bpf_copy_from_user().
+ */
+#define BPF_F_SLEEPABLE (1U << 4)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
- * two extensions:
- *
- * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
- * insn[0].imm: map fd map fd
- * insn[1].imm: 0 offset into value
- * insn[0].off: 0 0
- * insn[1].off: 0 0
- * ldimm64 rewrite: address of map address of map[0]+offset
- * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ * the following extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD
+ * insn[0].imm: map fd
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of map
+ * verifier type: CONST_PTR_TO_MAP
*/
#define BPF_PSEUDO_MAP_FD 1
+/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd
+ * insn[1].imm: offset into value
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of map[0]+offset
+ * verifier type: PTR_TO_MAP_VALUE
+ */
#define BPF_PSEUDO_MAP_VALUE 2
+/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
+ * insn[0].imm: kernel btd id of VAR
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of the kernel variable
+ * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
+ * is struct/union.
+ */
+#define BPF_PSEUDO_BTF_ID 3
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@@ -404,6 +432,12 @@ enum {
/* Enable memory-mapping BPF map */
BPF_F_MMAPABLE = (1U << 10),
+
+/* Share perf_event among processes */
+ BPF_F_PRESERVE_ELEMS = (1U << 11),
+
+/* Create a map that is suitable to be an inner map with dynamic max entries */
+ BPF_F_INNER_MAP = (1U << 12),
};
/* Flags for BPF_PROG_QUERY. */
@@ -414,6 +448,11 @@ enum {
*/
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+/* Flags for BPF_PROG_TEST_RUN */
+
+/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
+#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
+
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
/* enabled run_time_ns and run_cnt */
@@ -556,6 +595,8 @@ union bpf_attr {
*/
__aligned_u64 ctx_in;
__aligned_u64 ctx_out;
+ __u32 flags;
+ __u32 cpu;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -622,8 +663,13 @@ union bpf_attr {
};
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
- __aligned_u64 iter_info; /* extra bpf_iter_link_info */
- __u32 iter_info_len; /* iter_info length */
+ union {
+ __u32 target_btf_id; /* btf_id of target to attach to */
+ struct {
+ __aligned_u64 iter_info; /* extra bpf_iter_link_info */
+ __u32 iter_info_len; /* iter_info length */
+ };
+ };
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
@@ -649,6 +695,12 @@ union bpf_attr {
__u32 flags;
} iter_create;
+ struct { /* struct used by BPF_PROG_BIND_MAP command */
+ __u32 prog_fd;
+ __u32 map_fd;
+ __u32 flags; /* extra flags */
+ } prog_bind_map;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -1438,8 +1490,8 @@ union bpf_attr {
* Return
* The return value depends on the result of the test, and can be:
*
- * * 0, if the *skb* task belongs to the cgroup2.
- * * 1, if the *skb* task does not belong to the cgroup2.
+ * * 0, if current task belongs to the cgroup2.
+ * * 1, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
* long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
@@ -1649,7 +1701,7 @@ union bpf_attr {
* **TCP_CONGESTION**, **TCP_BPF_IW**,
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
+ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* Return
@@ -2204,7 +2256,7 @@ union bpf_attr {
* Description
* This helper is used in programs implementing policies at the
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
- * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * if the verdict eBPF program returns **SK_PASS**), redirect it
* to the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
* egress interfaces can be used for redirection. The
@@ -2496,7 +2548,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * long bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(void *sock)
* Description
* Release the reference held by *sock*. *sock* must be a
* non-**NULL** pointer that was returned from
@@ -2676,7 +2728,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK for
* the listening socket in *sk*.
@@ -2807,7 +2859,7 @@ union bpf_attr {
*
* **-ERANGE** if resulting value was out of range.
*
- * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
* Description
* Get a bpf-local-storage from a *sk*.
*
@@ -2823,6 +2875,9 @@ union bpf_attr {
* "type". The bpf-local-storage "type" (i.e. the *map*) is
* searched against all bpf-local-storages residing at *sk*.
*
+ * *sk* is a kernel **struct sock** pointer for LSM program.
+ * *sk* is a **struct bpf_sock** pointer for other program types.
+ *
* An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
* used such that a new bpf-local-storage will be
* created if one does not exist. *value* can be used
@@ -2835,13 +2890,14 @@ union bpf_attr {
* **NULL** if not found or there was an error in adding
* a new bpf-local-storage.
*
- * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
* Description
* Delete a bpf-local-storage from a *sk*.
* Return
* 0 on success.
*
* **-ENOENT** if the bpf-local-storage cannot be found.
+ * **-EINVAL** if sk is not a fullsock (e.g. a request_sock).
*
* long bpf_send_signal(u32 sig)
* Description
@@ -2858,7 +2914,7 @@ union bpf_attr {
*
* **-EAGAIN** if bpf program can try again.
*
- * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
@@ -3087,7 +3143,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
* description applies to **BPF_PROG_TYPE_SCHED_CLS** and
@@ -3215,11 +3271,11 @@ union bpf_attr {
*
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
*
- * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * u64 bpf_sk_cgroup_id(void *sk)
* Description
* Return the cgroup v2 id of the socket *sk*.
*
- * *sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ * *sk* must be a non-**NULL** pointer to a socket, e.g. one
* returned from **bpf_sk_lookup_xxx**\ (),
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
* same as in **bpf_skb_cgroup_id**\ ().
@@ -3229,7 +3285,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
* Description
* Return id of cgroup v2 that is ancestor of cgroup associated
* with the *sk* at the *ancestor_level*. The root cgroup is at
@@ -3337,38 +3393,38 @@ union bpf_attr {
* Description
* Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
* To achieve this, the helper needs *task*, which is a valid
- * pointer to struct task_struct. To store the stacktrace, the
- * bpf program provides *buf* with a nonnegative *size*.
+ * pointer to **struct task_struct**. To store the stacktrace, the
+ * bpf program provides *buf* with a nonnegative *size*.
*
* The last argument, *flags*, holds the number of stack frames to
* skip (from 0 to 255), masked with
@@ -3395,6 +3451,293 @@ union bpf_attr {
* A non-negative value equal to or less than *size* on success,
* or a negative error in case of failure.
*
+ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
+ * Description
+ * Load header option. Support reading a particular TCP header
+ * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**).
+ *
+ * If *flags* is 0, it will search the option from the
+ * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops**
+ * has details on what skb_data contains under different
+ * *skops*\ **->op**.
+ *
+ * The first byte of the *searchby_res* specifies the
+ * kind that it wants to search.
+ *
+ * If the searching kind is an experimental kind
+ * (i.e. 253 or 254 according to RFC6994). It also
+ * needs to specify the "magic" which is either
+ * 2 bytes or 4 bytes. It then also needs to
+ * specify the size of the magic by using
+ * the 2nd byte which is "kind-length" of a TCP
+ * header option and the "kind-length" also
+ * includes the first 2 bytes "kind" and "kind-length"
+ * itself as a normal TCP header option also does.
+ *
+ * For example, to search experimental kind 254 with
+ * 2 byte magic 0xeB9F, the searchby_res should be
+ * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
+ *
+ * To search for the standard window scale option (3),
+ * the *searchby_res* should be [ 3, 0, 0, .... 0 ].
+ * Note, kind-length must be 0 for regular option.
+ *
+ * Searching for No-Op (0) and End-of-Option-List (1) are
+ * not supported.
+ *
+ * *len* must be at least 2 bytes which is the minimal size
+ * of a header option.
+ *
+ * Supported flags:
+ *
+ * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
+ * saved_syn packet or the just-received syn packet.
+ *
+ * Return
+ * > 0 when found, the header option is copied to *searchby_res*.
+ * The return value is the total length copied. On failure, a
+ * negative error code is returned:
+ *
+ * **-EINVAL** if a parameter is invalid.
+ *
+ * **-ENOMSG** if the option is not found.
+ *
+ * **-ENOENT** if no syn packet is available when
+ * **BPF_LOAD_HDR_OPT_TCP_SYN** is used.
+ *
+ * **-ENOSPC** if there is not enough space. Only *len* number of
+ * bytes are copied.
+ *
+ * **-EFAULT** on failure to parse the header options in the
+ * packet.
+ *
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
+ *
+ * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
+ * Description
+ * Store header option. The data will be copied
+ * from buffer *from* with length *len* to the TCP header.
+ *
+ * The buffer *from* should have the whole option that
+ * includes the kind, kind-length, and the actual
+ * option data. The *len* must be at least kind-length
+ * long. The kind-length does not have to be 4 byte
+ * aligned. The kernel will take care of the padding
+ * and setting the 4 bytes aligned value to th->doff.
+ *
+ * This helper will check for duplicated option
+ * by searching the same option in the outgoing skb.
+ *
+ * This helper can only be called during
+ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
+ *
+ * Return
+ * 0 on success, or negative error in case of failure:
+ *
+ * **-EINVAL** If param is invalid.
+ *
+ * **-ENOSPC** if there is not enough space in the header.
+ * Nothing has been written
+ *
+ * **-EEXIST** if the option already exists.
+ *
+ * **-EFAULT** on failrue to parse the existing header options.
+ *
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
+ *
+ * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
+ * Description
+ * Reserve *len* bytes for the bpf header option. The
+ * space will be used by **bpf_store_hdr_opt**\ () later in
+ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
+ *
+ * If **bpf_reserve_hdr_opt**\ () is called multiple times,
+ * the total number of bytes will be reserved.
+ *
+ * This helper can only be called during
+ * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**.
+ *
+ * Return
+ * 0 on success, or negative error in case of failure:
+ *
+ * **-EINVAL** if a parameter is invalid.
+ *
+ * **-ENOSPC** if there is not enough space in the header.
+ *
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
+ *
+ * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
+ * Description
+ * Get a bpf_local_storage from an *inode*.
+ *
+ * Logically, it could be thought of as getting the value from
+ * a *map* with *inode* as the **key**. From this
+ * perspective, the usage is not much different from
+ * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this
+ * helper enforces the key must be an inode and the map must also
+ * be a **BPF_MAP_TYPE_INODE_STORAGE**.
+ *
+ * Underneath, the value is stored locally at *inode* instead of
+ * the *map*. The *map* is used as the bpf-local-storage
+ * "type". The bpf-local-storage "type" (i.e. the *map*) is
+ * searched against all bpf_local_storage residing at *inode*.
+ *
+ * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ * used such that a new bpf_local_storage will be
+ * created if one does not exist. *value* can be used
+ * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ * the initial value of a bpf_local_storage. If *value* is
+ * **NULL**, the new bpf_local_storage will be zero initialized.
+ * Return
+ * A bpf_local_storage pointer is returned on success.
+ *
+ * **NULL** if not found or there was an error in adding
+ * a new bpf_local_storage.
+ *
+ * int bpf_inode_storage_delete(struct bpf_map *map, void *inode)
+ * Description
+ * Delete a bpf_local_storage from an *inode*.
+ * Return
+ * 0 on success.
+ *
+ * **-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ * Description
+ * Return full path for given **struct path** object, which
+ * needs to be the kernel BTF *path* object. The path is
+ * returned in the provided buffer *buf* of size *sz* and
+ * is zero terminated.
+ *
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* and store
+ * the data in *dst*. This is a wrapper of **copy_from_user**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags)
+ * Description
+ * Use BTF to store a string representation of *ptr*->ptr in *str*,
+ * using *ptr*->type_id. This value should specify the type
+ * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1)
+ * can be used to look up vmlinux BTF type ids. Traversing the
+ * data structure using BTF, the type information and values are
+ * stored in the first *str_size* - 1 bytes of *str*. Safe copy of
+ * the pointer data is carried out to avoid kernel crashes during
+ * operation. Smaller types can use string space on the stack;
+ * larger programs can use map data to store the string
+ * representation.
+ *
+ * The string can be subsequently shared with userspace via
+ * bpf_perf_event_output() or ring buffer interfaces.
+ * bpf_trace_printk() is to be avoided as it places too small
+ * a limit on string size to be useful.
+ *
+ * *flags* is a combination of
+ *
+ * **BTF_F_COMPACT**
+ * no formatting around type information
+ * **BTF_F_NONAME**
+ * no struct/union member names/types
+ * **BTF_F_PTR_RAW**
+ * show raw (unobfuscated) pointer values;
+ * equivalent to printk specifier %px.
+ * **BTF_F_ZERO**
+ * show zero-valued struct/union members; they
+ * are not displayed by default
+ *
+ * Return
+ * The number of bytes that were written (or would have been
+ * written if output had to be truncated due to string size),
+ * or a negative error in cases of failure.
+ *
+ * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags)
+ * Description
+ * Use BTF to write to seq_write a string representation of
+ * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf().
+ * *flags* are identical to those used for bpf_snprintf_btf.
+ * Return
+ * 0 on success or a negative error in case of failure.
+ *
+ * u64 bpf_skb_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * See **bpf_get_cgroup_classid**\ () for the main description.
+ * This helper differs from **bpf_get_cgroup_classid**\ () in that
+ * the cgroup v1 net_cls class is retrieved only from the *skb*'s
+ * associated socket instead of the current process.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*
+ * and fill in L2 addresses from neighboring subsystem. This helper
+ * is somewhat similar to **bpf_redirect**\ (), except that it
+ * populates L2 addresses as well, meaning, internally, the helper
+ * performs a FIB lookup based on the skb's networking header to
+ * get the address of the next hop and then relies on the neighbor
+ * lookup for the L2 address of the nexthop.
+ *
+ * The *flags* argument is reserved and must be 0. The helper is
+ * currently only supported for tc BPF program types, and enabled
+ * for IPv4 and IPv6 protocols.
+ * Return
+ * The helper returns **TC_ACT_REDIRECT** on success or
+ * **TC_ACT_SHOT** on error.
+ *
+ * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
+ * Description
+ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ * pointer to the percpu kernel variable on *cpu*. A ksym is an
+ * extern variable decorated with '__ksym'. For ksym, there is a
+ * global var (either static or global) defined of the same name
+ * in the kernel. The ksym is percpu if the global var is percpu.
+ * The returned pointer points to the global percpu var on *cpu*.
+ *
+ * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
+ * kernel, except that bpf_per_cpu_ptr() may return NULL. This
+ * happens if *cpu* is larger than nr_cpu_ids. The caller of
+ * bpf_per_cpu_ptr() must check the returned value.
+ * Return
+ * A pointer pointing to the kernel percpu variable on *cpu*, or
+ * NULL, if *cpu* is invalid.
+ *
+ * void *bpf_this_cpu_ptr(const void *percpu_ptr)
+ * Description
+ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ * pointer to the percpu kernel variable on this cpu. See the
+ * description of 'ksym' in **bpf_per_cpu_ptr**\ ().
+ *
+ * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
+ * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
+ * never return NULL.
+ * Return
+ * A pointer pointing to the kernel percpu variable on this cpu.
+ *
+ * long bpf_redirect_peer(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_redirect**\ (), except
+ * that the redirection happens to the *ifindex*' peer device and
+ * the netns switch takes place from ingress to ingress without
+ * going through the CPU's backlog queue.
+ *
+ * The *flags* argument is reserved and must be 0. The helper is
+ * currently only supported for tc BPF program types at the ingress
+ * hook and for veth device types. The peer device must reside in a
+ * different network namespace.
+ * Return
+ * The helper returns **TC_ACT_REDIRECT** on success or
+ * **TC_ACT_SHOT** on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3539,6 +3882,20 @@ union bpf_attr {
FN(skc_to_tcp_request_sock), \
FN(skc_to_udp6_sock), \
FN(get_task_stack), \
+ FN(load_hdr_opt), \
+ FN(store_hdr_opt), \
+ FN(reserve_hdr_opt), \
+ FN(inode_storage_get), \
+ FN(inode_storage_delete), \
+ FN(d_path), \
+ FN(copy_from_user), \
+ FN(snprintf_btf), \
+ FN(seq_printf_btf), \
+ FN(skb_cgroup_classid), \
+ FN(redirect_neigh), \
+ FN(bpf_per_cpu_ptr), \
+ FN(bpf_this_cpu_ptr), \
+ FN(redirect_peer), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -3648,9 +4005,13 @@ enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<kernel_obj>_storage_get flags */
enum {
- BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+ BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0),
+ /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+ * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+ */
+ BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE,
};
/* BPF_FUNC_read_branch_records flags. */
@@ -4071,6 +4432,15 @@ struct bpf_link_info {
__u64 cgroup_id;
__u32 attach_type;
} cgroup;
+ struct {
+ __aligned_u64 target_name; /* in/out: target_name buffer ptr */
+ __u32 target_name_len; /* in/out: target_name buffer len */
+ union {
+ struct {
+ __u32 map_id;
+ } map;
+ };
+ } iter;
struct {
__u32 netns_ino;
__u32 attach_type;
@@ -4158,6 +4528,36 @@ struct bpf_sock_ops {
__u64 bytes_received;
__u64 bytes_acked;
__bpf_md_ptr(struct bpf_sock *, sk);
+ /* [skb_data, skb_data_end) covers the whole TCP header.
+ *
+ * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
+ * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the
+ * header has not been written.
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
+ * been written so far.
+ * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes
+ * the 3WHS.
+ * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
+ * the 3WHS.
+ *
+ * bpf_load_hdr_opt() can also be used to read a particular option.
+ */
+ __bpf_md_ptr(void *, skb_data);
+ __bpf_md_ptr(void *, skb_data_end);
+ __u32 skb_len; /* The total length of a packet.
+ * It includes the header, options,
+ * and payload.
+ */
+ __u32 skb_tcp_flags; /* tcp_flags of the header. It provides
+ * an easy way to check for tcp_flags
+ * without parsing skb_data.
+ *
+ * In particular, the skb_tcp_flags
+ * will still be available in
+ * BPF_SOCK_OPS_HDR_OPT_LEN even though
+ * the outgoing header has not
+ * been written yet.
+ */
};
/* Definitions for bpf_sock_ops_cb_flags */
@@ -4166,8 +4566,51 @@ enum {
BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1),
BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2),
BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3),
+ /* Call bpf for all received TCP headers. The bpf prog will be
+ * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ * for the header option related helpers that will be useful
+ * to the bpf programs.
+ *
+ * It could be used at the client/active side (i.e. connect() side)
+ * when the server told it that the server was in syncookie
+ * mode and required the active side to resend the bpf-written
+ * options. The active side can keep writing the bpf-options until
+ * it received a valid packet from the server side to confirm
+ * the earlier packet (and options) has been received. The later
+ * example patch is using it like this at the active side when the
+ * server is in syncookie mode.
+ *
+ * The bpf prog will usually turn this off in the common cases.
+ */
+ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4),
+ /* Call bpf when kernel has received a header option that
+ * the kernel cannot handle. The bpf prog will be called under
+ * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB.
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ * for the header option related helpers that will be useful
+ * to the bpf programs.
+ */
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+ /* Call bpf when the kernel is writing header options for the
+ * outgoing packet. The bpf prog will first be called
+ * to reserve space in a skb under
+ * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then
+ * the bpf prog will be called to write the header option(s)
+ * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB
+ * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option
+ * related helpers that will be useful to the bpf programs.
+ *
+ * The kernel gets its chance to reserve space and write
+ * options first before the BPF program does.
+ */
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
/* Mask of all currently supported cb flags */
- BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF,
+ BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
};
/* List of known BPF sock_ops operators.
@@ -4223,6 +4666,63 @@ enum {
*/
BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
*/
+ BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option.
+ * It will be called to handle
+ * the packets received at
+ * an already established
+ * connection.
+ *
+ * sock_ops->skb_data:
+ * Referring to the received skb.
+ * It covers the TCP header only.
+ *
+ * bpf_load_hdr_opt() can also
+ * be used to search for a
+ * particular option.
+ */
+ BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the
+ * header option later in
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ * Arg1: bool want_cookie. (in
+ * writing SYNACK only)
+ *
+ * sock_ops->skb_data:
+ * Not available because no header has
+ * been written yet.
+ *
+ * sock_ops->skb_tcp_flags:
+ * The tcp_flags of the
+ * outgoing skb. (e.g. SYN, ACK, FIN).
+ *
+ * bpf_reserve_hdr_opt() should
+ * be used to reserve space.
+ */
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options
+ * Arg1: bool want_cookie. (in
+ * writing SYNACK only)
+ *
+ * sock_ops->skb_data:
+ * Referring to the outgoing skb.
+ * It covers the TCP header
+ * that has already been written
+ * by the kernel and the
+ * earlier bpf-progs.
+ *
+ * sock_ops->skb_tcp_flags:
+ * The tcp_flags of the outgoing
+ * skb. (e.g. SYN, ACK, FIN).
+ *
+ * bpf_store_hdr_opt() should
+ * be used to write the
+ * option.
+ *
+ * bpf_load_hdr_opt() can also
+ * be used to search for a
+ * particular option that
+ * has already been written
+ * by the kernel or the
+ * earlier bpf-progs.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -4250,6 +4750,63 @@ enum {
enum {
TCP_BPF_IW = 1001, /* Set TCP initial congestion window */
TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */
+ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */
+ TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */
+ /* Copy the SYN pkt to optval
+ *
+ * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the
+ * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit
+ * to only getting from the saved_syn. It can either get the
+ * syn packet from:
+ *
+ * 1. the just-received SYN packet (only available when writing the
+ * SYNACK). It will be useful when it is not necessary to
+ * save the SYN packet for latter use. It is also the only way
+ * to get the SYN during syncookie mode because the syn
+ * packet cannot be saved during syncookie.
+ *
+ * OR
+ *
+ * 2. the earlier saved syn which was done by
+ * bpf_setsockopt(TCP_SAVE_SYN).
+ *
+ * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the
+ * SYN packet is obtained.
+ *
+ * If the bpf-prog does not need the IP[46] header, the
+ * bpf-prog can avoid parsing the IP header by using
+ * TCP_BPF_SYN. Otherwise, the bpf-prog can get both
+ * IP[46] and TCP header by using TCP_BPF_SYN_IP.
+ *
+ * >0: Total number of bytes copied
+ * -ENOSPC: Not enough space in optval. Only optlen number of
+ * bytes is copied.
+ * -ENOENT: The SYN skb is not available now and the earlier SYN pkt
+ * is not saved by setsockopt(TCP_SAVE_SYN).
+ */
+ TCP_BPF_SYN = 1005, /* Copy the TCP header */
+ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
+ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+};
+
+enum {
+ BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0),
+};
+
+/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ */
+enum {
+ BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the
+ * total option spaces
+ * required for an established
+ * sk in order to calculate the
+ * MSS. No skb is actually
+ * sent.
+ */
+ BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode
+ * when sending a SYN.
+ */
};
struct bpf_perf_event_value {
@@ -4447,4 +5004,34 @@ struct bpf_sk_lookup {
__u32 local_port; /* Host byte order */
};
+/*
+ * struct btf_ptr is used for typed pointer representation; the
+ * type id is used to render the pointer data as the appropriate type
+ * via the bpf_snprintf_btf() helper described above. A flags field -
+ * potentially to specify additional details about the BTF pointer
+ * (rather than its mode of display) - is included for future use.
+ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately.
+ */
+struct btf_ptr {
+ void *ptr;
+ __u32 type_id;
+ __u32 flags; /* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_snprintf_btf() behaviour.
+ * - BTF_F_COMPACT: no formatting around type information
+ * - BTF_F_NONAME: no struct/union member names/types
+ * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ * equivalent to %px.
+ * - BTF_F_ZERO: show zero-valued struct/union members; they
+ * are not displayed by default
+ */
+enum {
+ BTF_F_COMPACT = (1ULL << 0),
+ BTF_F_NONAME = (1ULL << 1),
+ BTF_F_PTR_RAW = (1ULL << 2),
+ BTF_F_ZERO = (1ULL << 3),
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 9ae8f4ef0aac..5f9abed3e226 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -1,6 +1,9 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Most of this file is copied from tools/lib/traceevent/Makefile
+RM ?= rm
+srctree = $(abs_srctree)
+
LIBBPF_VERSION := $(shell \
grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
sort -rV | head -n1 | cut -d'_' -f2)
@@ -56,7 +59,7 @@ ifndef VERBOSE
endif
FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray
+FEATURE_TESTS = libelf zlib bpf
FEATURE_DISPLAY = libelf zlib bpf
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi
@@ -95,27 +98,18 @@ PC_FILE = libbpf.pc
ifdef EXTRA_CFLAGS
CFLAGS := $(EXTRA_CFLAGS)
else
- CFLAGS := -g -Wall
-endif
-
-ifeq ($(feature-libelf-mmap), 1)
- override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
-endif
-
-ifeq ($(feature-reallocarray), 0)
- override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
+ CFLAGS := -g -O2
endif
# Append required CFLAGS
-override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
override CFLAGS += -Werror -Wall
-override CFLAGS += -fPIC
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
# flags specific for shared library
-SHLIB_FLAGS := -DSHARED
+SHLIB_FLAGS := -DSHARED -fPIC
ifeq ($(VERBOSE),1)
Q =
@@ -197,7 +191,7 @@ $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
$(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
- $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+ $(QUIET_LINK)$(RM) -f $@; $(AR) rcs $@ $^
$(OUTPUT)libbpf.pc:
$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
@@ -271,10 +265,10 @@ install: install_lib install_pkgconfig install_headers
### Cleaning rules
config-clean:
- $(call QUIET_CLEAN, config)
+ $(call QUIET_CLEAN, feature-detect)
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
-clean:
+clean: config-clean
$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \
*~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \
$(SHARED_OBJDIR) $(STATIC_OBJDIR) \
@@ -301,7 +295,7 @@ cscope:
cscope -b -q -I $(srctree)/include -f cscope.out
tags:
- rm -f TAGS tags
+ $(RM) -f TAGS tags
ls *.c *.h | xargs $(TAGS_PROG) -a
# Declare the contents of the .PHONY variable as phony. We keep that
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 0750681057c2..d27e34133973 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -32,9 +32,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
/*
* When building perf, unistd.h is overridden. __NR_bpf is
* required to be defined explicitly.
@@ -589,19 +586,31 @@ int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts)
{
+ __u32 target_btf_id, iter_info_len;
union bpf_attr attr;
if (!OPTS_VALID(opts, bpf_link_create_opts))
return -EINVAL;
+ iter_info_len = OPTS_GET(opts, iter_info_len, 0);
+ target_btf_id = OPTS_GET(opts, target_btf_id, 0);
+
+ if (iter_info_len && target_btf_id)
+ return -EINVAL;
+
memset(&attr, 0, sizeof(attr));
attr.link_create.prog_fd = prog_fd;
attr.link_create.target_fd = target_fd;
attr.link_create.attach_type = attach_type;
attr.link_create.flags = OPTS_GET(opts, flags, 0);
- attr.link_create.iter_info =
- ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
- attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
+
+ if (iter_info_len) {
+ attr.link_create.iter_info =
+ ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+ attr.link_create.iter_info_len = iter_info_len;
+ } else if (target_btf_id) {
+ attr.link_create.target_btf_id = target_btf_id;
+ }
return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
}
@@ -715,6 +724,37 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
return ret;
}
+int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
+{
+ union bpf_attr attr;
+ int ret;
+
+ if (!OPTS_VALID(opts, bpf_test_run_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.test.prog_fd = prog_fd;
+ attr.test.cpu = OPTS_GET(opts, cpu, 0);
+ attr.test.flags = OPTS_GET(opts, flags, 0);
+ attr.test.repeat = OPTS_GET(opts, repeat, 0);
+ attr.test.duration = OPTS_GET(opts, duration, 0);
+ attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0);
+ attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0);
+ attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0);
+ attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0);
+ attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL));
+ attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL));
+ attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL));
+ attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
+
+ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+ OPTS_SET(opts, data_size_out, attr.test.data_size_out);
+ OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
+ OPTS_SET(opts, duration, attr.test.duration);
+ OPTS_SET(opts, retval, attr.test.retval);
+ return ret;
+}
+
static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
{
union bpf_attr attr;
@@ -818,7 +858,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
}
-int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
bool do_log)
{
union bpf_attr attr = {};
@@ -875,3 +915,19 @@ int bpf_enable_stats(enum bpf_stats_type type)
return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
}
+
+int bpf_prog_bind_map(int prog_fd, int map_fd,
+ const struct bpf_prog_bind_opts *opts)
+{
+ union bpf_attr attr;
+
+ if (!OPTS_VALID(opts, bpf_prog_bind_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_bind_map.prog_fd = prog_fd;
+ attr.prog_bind_map.map_fd = map_fd;
+ attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
+
+ return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 015d13f25fcc..875dde20d56e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -174,8 +174,9 @@ struct bpf_link_create_opts {
__u32 flags;
union bpf_iter_link_info *iter_info;
__u32 iter_info_len;
+ __u32 target_btf_id;
};
-#define bpf_link_create_opts__last_field iter_info_len
+#define bpf_link_create_opts__last_field target_btf_id
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
@@ -234,7 +235,7 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
__u32 query_flags, __u32 *attach_flags,
__u32 *prog_ids, __u32 *prog_cnt);
LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
-LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
+LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
__u32 log_buf_size, bool do_log);
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
@@ -243,6 +244,40 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
+struct bpf_prog_bind_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+};
+#define bpf_prog_bind_opts__last_field flags
+
+LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd,
+ const struct bpf_prog_bind_opts *opts);
+
+struct bpf_test_run_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ const void *data_in; /* optional */
+ void *data_out; /* optional */
+ __u32 data_size_in;
+ __u32 data_size_out; /* in: max length of data_out
+ * out: length of data_out
+ */
+ const void *ctx_in; /* optional */
+ void *ctx_out; /* optional */
+ __u32 ctx_size_in;
+ __u32 ctx_size_out; /* in: max length of ctx_out
+ * out: length of cxt_out
+ */
+ __u32 retval; /* out: return code of the BPF program */
+ int repeat;
+ __u32 duration; /* out: average per repetition in ns */
+ __u32 flags;
+ __u32 cpu;
+};
+#define bpf_test_run_opts__last_field cpu
+
+LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
+ struct bpf_test_run_opts *opts);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index eae5cccff761..bbcefb3ff5a5 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -19,32 +19,52 @@ enum bpf_field_info_kind {
BPF_FIELD_RSHIFT_U64 = 5,
};
+/* second argument to __builtin_btf_type_id() built-in */
+enum bpf_type_id_kind {
+ BPF_TYPE_ID_LOCAL = 0, /* BTF type ID in local program */
+ BPF_TYPE_ID_TARGET = 1, /* BTF type ID in target kernel */
+};
+
+/* second argument to __builtin_preserve_type_info() built-in */
+enum bpf_type_info_kind {
+ BPF_TYPE_EXISTS = 0, /* type existence in target kernel */
+ BPF_TYPE_SIZE = 1, /* type size in target kernel */
+};
+
+/* second argument to __builtin_preserve_enum_value() built-in */
+enum bpf_enum_value_kind {
+ BPF_ENUMVAL_EXISTS = 0, /* enum value existence in kernel */
+ BPF_ENUMVAL_VALUE = 1, /* enum value value relocation */
+};
+
#define __CORE_RELO(src, field, info) \
__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
- bpf_probe_read((void *)dst, \
- __CORE_RELO(src, fld, BYTE_SIZE), \
- (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+ bpf_probe_read_kernel( \
+ (void *)dst, \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#else
/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so
* for big-endian we need to adjust destination pointer accordingly, based on
* field byte size
*/
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
- bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
- __CORE_RELO(src, fld, BYTE_SIZE), \
- (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+ bpf_probe_read_kernel( \
+ (void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#endif
/*
* Extract bitfield, identified by s->field, and return its value as u64.
* All this is done in relocatable manner, so bitfield changes such as
* signedness, bit size, offset changes, this will be handled automatically.
- * This version of macro is using bpf_probe_read() to read underlying integer
- * storage. Macro functions as an expression and its return type is
- * bpf_probe_read()'s return value: 0, on success, <0 on error.
+ * This version of macro is using bpf_probe_read_kernel() to read underlying
+ * integer storage. Macro functions as an expression and its return type is
+ * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error.
*/
#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \
unsigned long long val = 0; \
@@ -92,15 +112,75 @@ enum bpf_field_info_kind {
__builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
/*
- * Convenience macro to get byte size of a field. Works for integers,
+ * Convenience macro to get the byte size of a field. Works for integers,
* struct/unions, pointers, arrays, and enums.
*/
#define bpf_core_field_size(field) \
__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
/*
- * bpf_core_read() abstracts away bpf_probe_read() call and captures offset
- * relocation for source address using __builtin_preserve_access_index()
+ * Convenience macro to get BTF type ID of a specified type, using a local BTF
+ * information. Return 32-bit unsigned integer with type ID from program's own
+ * BTF. Always succeeds.
+ */
+#define bpf_core_type_id_local(type) \
+ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL)
+
+/*
+ * Convenience macro to get BTF type ID of a target kernel's type that matches
+ * specified local type.
+ * Returns:
+ * - valid 32-bit unsigned type ID in kernel BTF;
+ * - 0, if no matching type was found in a target kernel BTF.
+ */
+#define bpf_core_type_id_kernel(type) \
+ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET)
+
+/*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) exists in a target kernel.
+ * Returns:
+ * 1, if such type is present in target kernel's BTF;
+ * 0, if no matching type is found.
+ */
+#define bpf_core_type_exists(type) \
+ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
+
+/*
+ * Convenience macro to get the byte size of a provided named type
+ * (struct/union/enum/typedef) in a target kernel.
+ * Returns:
+ * >= 0 size (in bytes), if type is present in target kernel's BTF;
+ * 0, if no matching type is found.
+ */
+#define bpf_core_type_size(type) \
+ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
+
+/*
+ * Convenience macro to check that provided enumerator value is defined in
+ * a target kernel.
+ * Returns:
+ * 1, if specified enum type and its enumerator value are present in target
+ * kernel's BTF;
+ * 0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value_exists(enum_type, enum_value) \
+ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS)
+
+/*
+ * Convenience macro to get the integer value of an enumerator value in
+ * a target kernel.
+ * Returns:
+ * 64-bit value, if specified enum type and its enumerator value are
+ * present in target kernel's BTF;
+ * 0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value(enum_type, enum_value) \
+ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE)
+
+/*
+ * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
+ * offset relocation for source address using __builtin_preserve_access_index()
* built-in, provided by Clang.
*
* __builtin_preserve_access_index() takes as an argument an expression of
@@ -115,8 +195,8 @@ enum bpf_field_info_kind {
* (local) BTF, used to record relocation.
*/
#define bpf_core_read(dst, sz, src) \
- bpf_probe_read(dst, sz, \
- (const void *)__builtin_preserve_access_index(src))
+ bpf_probe_read_kernel(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
/*
* bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
@@ -124,8 +204,8 @@ enum bpf_field_info_kind {
* argument.
*/
#define bpf_core_read_str(dst, sz, src) \
- bpf_probe_read_str(dst, sz, \
- (const void *)__builtin_preserve_access_index(src))
+ bpf_probe_read_kernel_str(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
#define ___concat(a, b) a ## b
#define ___apply(fn, n) ___concat(fn, n)
@@ -239,15 +319,17 @@ enum bpf_field_info_kind {
* int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
*
* BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
- * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to:
+ * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically
+ * equivalent to:
* 1. const void *__t = s->a.b.c;
* 2. __t = __t->d.e;
* 3. __t = __t->f;
* 4. return __t->g;
*
* Equivalence is logical, because there is a heavy type casting/preservation
- * involved, as well as all the reads are happening through bpf_probe_read()
- * calls using __builtin_preserve_access_index() to emit CO-RE relocations.
+ * involved, as well as all the reads are happening through
+ * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to
+ * emit CO-RE relocations.
*
* N.B. Only up to 9 "field accessors" are supported, which should be more
* than enough for any practical purpose.
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index e9a4ecddb7a5..2bdb7d6dbad2 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -32,6 +32,9 @@
#ifndef __always_inline
#define __always_inline __attribute__((always_inline))
#endif
+#ifndef __noinline
+#define __noinline __attribute__((noinline))
+#endif
#ifndef __weak
#define __weak __attribute__((weak))
#endif
@@ -51,6 +54,52 @@
#endif
/*
+ * Helper macro to throw a compilation error if __bpf_unreachable() gets
+ * built into the resulting code. This works given BPF back end does not
+ * implement __builtin_trap(). This is useful to assert that certain paths
+ * of the program code are never used and hence eliminated by the compiler.
+ *
+ * For example, consider a switch statement that covers known cases used by
+ * the program. __bpf_unreachable() can then reside in the default case. If
+ * the program gets extended such that a case is not covered in the switch
+ * statement, then it will throw a build error due to the default case not
+ * being compiled out.
+ */
+#ifndef __bpf_unreachable
+# define __bpf_unreachable() __builtin_trap()
+#endif
+
+/*
+ * Helper function to perform a tail call with a constant/immediate map slot.
+ */
+static __always_inline void
+bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
+{
+ if (!__builtin_constant_p(slot))
+ __bpf_unreachable();
+
+ /*
+ * Provide a hard guarantee that LLVM won't optimize setting r2 (map
+ * pointer) and r3 (constant map index) from _different paths_ ending
+ * up at the _same_ call insn as otherwise we won't be able to use the
+ * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
+ * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
+ * tracking for prog array pokes") for details on verifier tracking.
+ *
+ * Note on clobber list: we need to stay in-line with BPF calling
+ * convention, so even if we don't end up using r0, r4, r5, we need
+ * to mark them as clobber so that LLVM doesn't end up using them
+ * before / after the call.
+ */
+ asm volatile("r1 = %[ctx]\n\t"
+ "r2 = %[map]\n\t"
+ "r3 = %[slot]\n\t"
+ "call 12"
+ :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
+ : "r0", "r1", "r2", "r3", "r4", "r5");
+}
+
+/*
* Helper structure used by eBPF C program
* to describe BPF map attributes to libbpf loader
*/
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index bafca49cb1e6..3ed1a27b5f7c 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -8,9 +8,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
struct bpf_prog_linfo {
void *raw_linfo;
void *raw_jited_linfo;
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index eebf020cbe3e..f9ef37707888 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -289,9 +289,9 @@ struct pt_regs;
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
#else
#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
- ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
- ({ bpf_probe_read(&(ip), sizeof(ip), \
+ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 7dfca7016aaa..231b07203e3d 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2018 Facebook */
+#include <byteswap.h>
#include <endian.h>
#include <stdio.h>
#include <stdlib.h>
@@ -21,26 +22,78 @@
#include "libbpf_internal.h"
#include "hashmap.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#define BTF_MAX_NR_TYPES 0x7fffffffU
#define BTF_MAX_STR_OFFSET 0x7fffffffU
static struct btf_type btf_void;
struct btf {
- union {
- struct btf_header *hdr;
- void *data;
- };
- struct btf_type **types;
- const char *strings;
- void *nohdr_data;
+ /* raw BTF data in native endianness */
+ void *raw_data;
+ /* raw BTF data in non-native endianness */
+ void *raw_data_swapped;
+ __u32 raw_size;
+ /* whether target endianness differs from the native one */
+ bool swapped_endian;
+
+ /*
+ * When BTF is loaded from an ELF or raw memory it is stored
+ * in a contiguous memory block. The hdr, type_data, and, strs_data
+ * point inside that memory region to their respective parts of BTF
+ * representation:
+ *
+ * +--------------------------------+
+ * | Header | Types | Strings |
+ * +--------------------------------+
+ * ^ ^ ^
+ * | | |
+ * hdr | |
+ * types_data-+ |
+ * strs_data------------+
+ *
+ * If BTF data is later modified, e.g., due to types added or
+ * removed, BTF deduplication performed, etc, this contiguous
+ * representation is broken up into three independently allocated
+ * memory regions to be able to modify them independently.
+ * raw_data is nulled out at that point, but can be later allocated
+ * and cached again if user calls btf__get_raw_data(), at which point
+ * raw_data will contain a contiguous copy of header, types, and
+ * strings:
+ *
+ * +----------+ +---------+ +-----------+
+ * | Header | | Types | | Strings |
+ * +----------+ +---------+ +-----------+
+ * ^ ^ ^
+ * | | |
+ * hdr | |
+ * types_data----+ |
+ * strs_data------------------+
+ *
+ * +----------+---------+-----------+
+ * | Header | Types | Strings |
+ * raw_data----->+----------+---------+-----------+
+ */
+ struct btf_header *hdr;
+
+ void *types_data;
+ size_t types_data_cap; /* used size stored in hdr->type_len */
+
+ /* type ID to `struct btf_type *` lookup index */
+ __u32 *type_offs;
+ size_t type_offs_cap;
__u32 nr_types;
- __u32 types_size;
- __u32 data_size;
+
+ void *strs_data;
+ size_t strs_data_cap; /* used size stored in hdr->str_len */
+
+ /* lookup index for each unique string in strings section */
+ struct hashmap *strs_hash;
+ /* whether strings are already deduplicated */
+ bool strs_deduped;
+ /* BTF object FD, if loaded into kernel */
int fd;
+
+ /* Pointer size (in bytes) for a target architecture of this BTF */
int ptr_sz;
};
@@ -49,60 +102,114 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
-static int btf_add_type(struct btf *btf, struct btf_type *t)
+/* Ensure given dynamically allocated memory region pointed to by *data* with
+ * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough
+ * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements
+ * are already used. At most *max_cnt* elements can be ever allocated.
+ * If necessary, memory is reallocated and all existing data is copied over,
+ * new pointer to the memory region is stored at *data, new memory region
+ * capacity (in number of elements) is stored in *cap.
+ * On success, memory pointer to the beginning of unused memory is returned.
+ * On error, NULL is returned.
+ */
+void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt)
{
- if (btf->types_size - btf->nr_types < 2) {
- struct btf_type **new_types;
- __u32 expand_by, new_size;
+ size_t new_cnt;
+ void *new_data;
- if (btf->types_size == BTF_MAX_NR_TYPES)
- return -E2BIG;
+ if (cur_cnt + add_cnt <= *cap_cnt)
+ return *data + cur_cnt * elem_sz;
- expand_by = max(btf->types_size >> 2, 16U);
- new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
+ /* requested more than the set limit */
+ if (cur_cnt + add_cnt > max_cnt)
+ return NULL;
- new_types = realloc(btf->types, sizeof(*new_types) * new_size);
- if (!new_types)
- return -ENOMEM;
+ new_cnt = *cap_cnt;
+ new_cnt += new_cnt / 4; /* expand by 25% */
+ if (new_cnt < 16) /* but at least 16 elements */
+ new_cnt = 16;
+ if (new_cnt > max_cnt) /* but not exceeding a set limit */
+ new_cnt = max_cnt;
+ if (new_cnt < cur_cnt + add_cnt) /* also ensure we have enough memory */
+ new_cnt = cur_cnt + add_cnt;
+
+ new_data = libbpf_reallocarray(*data, new_cnt, elem_sz);
+ if (!new_data)
+ return NULL;
- if (btf->nr_types == 0)
- new_types[0] = &btf_void;
+ /* zero out newly allocated portion of memory */
+ memset(new_data + (*cap_cnt) * elem_sz, 0, (new_cnt - *cap_cnt) * elem_sz);
- btf->types = new_types;
- btf->types_size = new_size;
- }
+ *data = new_data;
+ *cap_cnt = new_cnt;
+ return new_data + cur_cnt * elem_sz;
+}
- btf->types[++(btf->nr_types)] = t;
+/* Ensure given dynamically allocated memory region has enough allocated space
+ * to accommodate *need_cnt* elements of size *elem_sz* bytes each
+ */
+int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
+{
+ void *p;
+
+ if (need_cnt <= *cap_cnt)
+ return 0;
+ p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
+ if (!p)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
+{
+ __u32 *p;
+
+ p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
+ btf->nr_types + 1, BTF_MAX_NR_TYPES, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = type_off;
return 0;
}
+static void btf_bswap_hdr(struct btf_header *h)
+{
+ h->magic = bswap_16(h->magic);
+ h->hdr_len = bswap_32(h->hdr_len);
+ h->type_off = bswap_32(h->type_off);
+ h->type_len = bswap_32(h->type_len);
+ h->str_off = bswap_32(h->str_off);
+ h->str_len = bswap_32(h->str_len);
+}
+
static int btf_parse_hdr(struct btf *btf)
{
- const struct btf_header *hdr = btf->hdr;
+ struct btf_header *hdr = btf->hdr;
__u32 meta_left;
- if (btf->data_size < sizeof(struct btf_header)) {
+ if (btf->raw_size < sizeof(struct btf_header)) {
pr_debug("BTF header not found\n");
return -EINVAL;
}
- if (hdr->magic != BTF_MAGIC) {
+ if (hdr->magic == bswap_16(BTF_MAGIC)) {
+ btf->swapped_endian = true;
+ if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) {
+ pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n",
+ bswap_32(hdr->hdr_len));
+ return -ENOTSUP;
+ }
+ btf_bswap_hdr(hdr);
+ } else if (hdr->magic != BTF_MAGIC) {
pr_debug("Invalid BTF magic:%x\n", hdr->magic);
return -EINVAL;
}
- if (hdr->version != BTF_VERSION) {
- pr_debug("Unsupported BTF version:%u\n", hdr->version);
- return -ENOTSUP;
- }
-
- if (hdr->flags) {
- pr_debug("Unsupported BTF flags:%x\n", hdr->flags);
- return -ENOTSUP;
- }
-
- meta_left = btf->data_size - sizeof(*hdr);
+ meta_left = btf->raw_size - sizeof(*hdr);
if (!meta_left) {
pr_debug("BTF has no data\n");
return -EINVAL;
@@ -128,15 +235,13 @@ static int btf_parse_hdr(struct btf *btf)
return -EINVAL;
}
- btf->nohdr_data = btf->hdr + 1;
-
return 0;
}
static int btf_parse_str_sec(struct btf *btf)
{
const struct btf_header *hdr = btf->hdr;
- const char *start = btf->nohdr_data + hdr->str_off;
+ const char *start = btf->strs_data;
const char *end = start + btf->hdr->str_len;
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
@@ -145,14 +250,12 @@ static int btf_parse_str_sec(struct btf *btf)
return -EINVAL;
}
- btf->strings = start;
-
return 0;
}
-static int btf_type_size(struct btf_type *t)
+static int btf_type_size(const struct btf_type *t)
{
- int base_size = sizeof(struct btf_type);
+ const int base_size = sizeof(struct btf_type);
__u16 vlen = btf_vlen(t);
switch (btf_kind(t)) {
@@ -185,25 +288,120 @@ static int btf_type_size(struct btf_type *t)
}
}
+static void btf_bswap_type_base(struct btf_type *t)
+{
+ t->name_off = bswap_32(t->name_off);
+ t->info = bswap_32(t->info);
+ t->type = bswap_32(t->type);
+}
+
+static int btf_bswap_type_rest(struct btf_type *t)
+{
+ struct btf_var_secinfo *v;
+ struct btf_member *m;
+ struct btf_array *a;
+ struct btf_param *p;
+ struct btf_enum *e;
+ __u16 vlen = btf_vlen(t);
+ int i;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ return 0;
+ case BTF_KIND_INT:
+ *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
+ return 0;
+ case BTF_KIND_ENUM:
+ for (i = 0, e = btf_enum(t); i < vlen; i++, e++) {
+ e->name_off = bswap_32(e->name_off);
+ e->val = bswap_32(e->val);
+ }
+ return 0;
+ case BTF_KIND_ARRAY:
+ a = btf_array(t);
+ a->type = bswap_32(a->type);
+ a->index_type = bswap_32(a->index_type);
+ a->nelems = bswap_32(a->nelems);
+ return 0;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ for (i = 0, m = btf_members(t); i < vlen; i++, m++) {
+ m->name_off = bswap_32(m->name_off);
+ m->type = bswap_32(m->type);
+ m->offset = bswap_32(m->offset);
+ }
+ return 0;
+ case BTF_KIND_FUNC_PROTO:
+ for (i = 0, p = btf_params(t); i < vlen; i++, p++) {
+ p->name_off = bswap_32(p->name_off);
+ p->type = bswap_32(p->type);
+ }
+ return 0;
+ case BTF_KIND_VAR:
+ btf_var(t)->linkage = bswap_32(btf_var(t)->linkage);
+ return 0;
+ case BTF_KIND_DATASEC:
+ for (i = 0, v = btf_var_secinfos(t); i < vlen; i++, v++) {
+ v->type = bswap_32(v->type);
+ v->offset = bswap_32(v->offset);
+ v->size = bswap_32(v->size);
+ }
+ return 0;
+ default:
+ pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
+ return -EINVAL;
+ }
+}
+
static int btf_parse_type_sec(struct btf *btf)
{
struct btf_header *hdr = btf->hdr;
- void *nohdr_data = btf->nohdr_data;
- void *next_type = nohdr_data + hdr->type_off;
- void *end_type = nohdr_data + hdr->str_off;
+ void *next_type = btf->types_data;
+ void *end_type = next_type + hdr->type_len;
+ int err, i = 0, type_size;
- while (next_type < end_type) {
- struct btf_type *t = next_type;
- int type_size;
- int err;
+ /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(),
+ * so ensure we can never properly use its offset from index by
+ * setting it to a large value
+ */
+ err = btf_add_type_idx_entry(btf, UINT_MAX);
+ if (err)
+ return err;
+
+ while (next_type + sizeof(struct btf_type) <= end_type) {
+ i++;
+
+ if (btf->swapped_endian)
+ btf_bswap_type_base(next_type);
- type_size = btf_type_size(t);
+ type_size = btf_type_size(next_type);
if (type_size < 0)
return type_size;
- next_type += type_size;
- err = btf_add_type(btf, t);
+ if (next_type + type_size > end_type) {
+ pr_warn("BTF type [%d] is malformed\n", i);
+ return -EINVAL;
+ }
+
+ if (btf->swapped_endian && btf_bswap_type_rest(next_type))
+ return -EINVAL;
+
+ err = btf_add_type_idx_entry(btf, next_type - btf->types_data);
if (err)
return err;
+
+ next_type += type_size;
+ btf->nr_types++;
+ }
+
+ if (next_type != end_type) {
+ pr_warn("BTF types data is malformed\n");
+ return -EINVAL;
}
return 0;
@@ -214,12 +412,20 @@ __u32 btf__get_nr_types(const struct btf *btf)
return btf->nr_types;
}
+/* internal helper returning non-const pointer to a type */
+static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+{
+ if (type_id == 0)
+ return &btf_void;
+
+ return btf->types_data + btf->type_offs[type_id];
+}
+
const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
{
if (type_id > btf->nr_types)
return NULL;
-
- return btf->types[type_id];
+ return btf_type_by_id((struct btf *)btf, type_id);
}
static int determine_ptr_size(const struct btf *btf)
@@ -286,6 +492,38 @@ int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
return 0;
}
+static bool is_host_big_endian(void)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ return false;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ return true;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+}
+
+enum btf_endianness btf__endianness(const struct btf *btf)
+{
+ if (is_host_big_endian())
+ return btf->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN;
+ else
+ return btf->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN;
+}
+
+int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
+{
+ if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
+ return -EINVAL;
+
+ btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
+ if (!btf->swapped_endian) {
+ free(btf->raw_data_swapped);
+ btf->raw_data_swapped = NULL;
+ }
+ return 0;
+}
+
static bool btf_type_is_void(const struct btf_type *t)
{
return t == &btf_void || btf_is_fwd(t);
@@ -417,7 +655,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
return 0;
for (i = 1; i <= btf->nr_types; i++) {
- const struct btf_type *t = btf->types[i];
+ const struct btf_type *t = btf__type_by_id(btf, i);
const char *name = btf__name_by_offset(btf, t->name_off);
if (name && !strcmp(type_name, name))
@@ -436,7 +674,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
return 0;
for (i = 1; i <= btf->nr_types; i++) {
- const struct btf_type *t = btf->types[i];
+ const struct btf_type *t = btf__type_by_id(btf, i);
const char *name;
if (btf_kind(t) != kind)
@@ -449,6 +687,11 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
return -ENOENT;
}
+static bool btf_is_modifiable(const struct btf *btf)
+{
+ return (void *)btf->hdr != btf->raw_data;
+}
+
void btf__free(struct btf *btf)
{
if (IS_ERR_OR_NULL(btf))
@@ -457,11 +700,55 @@ void btf__free(struct btf *btf)
if (btf->fd >= 0)
close(btf->fd);
- free(btf->data);
- free(btf->types);
+ if (btf_is_modifiable(btf)) {
+ /* if BTF was modified after loading, it will have a split
+ * in-memory representation for header, types, and strings
+ * sections, so we need to free all of them individually. It
+ * might still have a cached contiguous raw data present,
+ * which will be unconditionally freed below.
+ */
+ free(btf->hdr);
+ free(btf->types_data);
+ free(btf->strs_data);
+ }
+ free(btf->raw_data);
+ free(btf->raw_data_swapped);
+ free(btf->type_offs);
free(btf);
}
+struct btf *btf__new_empty(void)
+{
+ struct btf *btf;
+
+ btf = calloc(1, sizeof(*btf));
+ if (!btf)
+ return ERR_PTR(-ENOMEM);
+
+ btf->fd = -1;
+ btf->ptr_sz = sizeof(void *);
+ btf->swapped_endian = false;
+
+ /* +1 for empty string at offset 0 */
+ btf->raw_size = sizeof(struct btf_header) + 1;
+ btf->raw_data = calloc(1, btf->raw_size);
+ if (!btf->raw_data) {
+ free(btf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ btf->hdr = btf->raw_data;
+ btf->hdr->hdr_len = sizeof(struct btf_header);
+ btf->hdr->magic = BTF_MAGIC;
+ btf->hdr->version = BTF_VERSION;
+
+ btf->types_data = btf->raw_data + btf->hdr->hdr_len;
+ btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
+ btf->hdr->str_len = 1; /* empty string at offset 0 */
+
+ return btf;
+}
+
struct btf *btf__new(const void *data, __u32 size)
{
struct btf *btf;
@@ -471,26 +758,28 @@ struct btf *btf__new(const void *data, __u32 size)
if (!btf)
return ERR_PTR(-ENOMEM);
- btf->fd = -1;
-
- btf->data = malloc(size);
- if (!btf->data) {
+ btf->raw_data = malloc(size);
+ if (!btf->raw_data) {
err = -ENOMEM;
goto done;
}
+ memcpy(btf->raw_data, data, size);
+ btf->raw_size = size;
- memcpy(btf->data, data, size);
- btf->data_size = size;
-
+ btf->hdr = btf->raw_data;
err = btf_parse_hdr(btf);
if (err)
goto done;
+ btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off;
+ btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off;
+
err = btf_parse_str_sec(btf);
+ err = err ?: btf_parse_type_sec(btf);
if (err)
goto done;
- err = btf_parse_type_sec(btf);
+ btf->fd = -1;
done:
if (err) {
@@ -501,17 +790,6 @@ done:
return btf;
}
-static bool btf_check_endianness(const GElf_Ehdr *ehdr)
-{
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
-#elif __BYTE_ORDER == __BIG_ENDIAN
- return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
-#else
-# error "Unrecognized __BYTE_ORDER__"
-#endif
-}
-
struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
{
Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
@@ -544,10 +822,6 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
pr_warn("failed to get EHDR from %s\n", path);
goto done;
}
- if (!btf_check_endianness(&ehdr)) {
- pr_warn("non-native ELF endianness is not supported\n");
- goto done;
- }
if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
pr_warn("failed to get e_shstrndx from %s\n", path);
goto done;
@@ -659,7 +933,7 @@ struct btf *btf__parse_raw(const char *path)
err = -EIO;
goto err_out;
}
- if (magic != BTF_MAGIC) {
+ if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) {
/* definitely not a raw BTF */
err = -EPROTO;
goto err_out;
@@ -792,7 +1066,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
__u32 i;
for (i = 1; i <= btf->nr_types; i++) {
- struct btf_type *t = btf->types[i];
+ struct btf_type *t = btf_type_by_id(btf, i);
/* Loader needs to fix up some of the things compiler
* couldn't get its hands on while emitting BTF. This
@@ -809,10 +1083,13 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
return err;
}
+static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
+
int btf__load(struct btf *btf)
{
- __u32 log_buf_size = 0;
+ __u32 log_buf_size = 0, raw_size;
char *log_buf = NULL;
+ void *raw_data;
int err = 0;
if (btf->fd >= 0)
@@ -827,8 +1104,16 @@ retry_load:
*log_buf = 0;
}
- btf->fd = bpf_load_btf(btf->data, btf->data_size,
- log_buf, log_buf_size, false);
+ raw_data = btf_get_raw_data(btf, &raw_size, false);
+ if (!raw_data) {
+ err = -ENOMEM;
+ goto done;
+ }
+ /* cache native raw data representation */
+ btf->raw_size = raw_size;
+ btf->raw_data = raw_data;
+
+ btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false);
if (btf->fd < 0) {
if (!log_buf || errno == ENOSPC) {
log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
@@ -859,20 +1144,88 @@ void btf__set_fd(struct btf *btf, int fd)
btf->fd = fd;
}
-const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
+static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
+{
+ struct btf_header *hdr = btf->hdr;
+ struct btf_type *t;
+ void *data, *p;
+ __u32 data_sz;
+ int i;
+
+ data = swap_endian ? btf->raw_data_swapped : btf->raw_data;
+ if (data) {
+ *size = btf->raw_size;
+ return data;
+ }
+
+ data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len;
+ data = calloc(1, data_sz);
+ if (!data)
+ return NULL;
+ p = data;
+
+ memcpy(p, hdr, hdr->hdr_len);
+ if (swap_endian)
+ btf_bswap_hdr(p);
+ p += hdr->hdr_len;
+
+ memcpy(p, btf->types_data, hdr->type_len);
+ if (swap_endian) {
+ for (i = 1; i <= btf->nr_types; i++) {
+ t = p + btf->type_offs[i];
+ /* btf_bswap_type_rest() relies on native t->info, so
+ * we swap base type info after we swapped all the
+ * additional information
+ */
+ if (btf_bswap_type_rest(t))
+ goto err_out;
+ btf_bswap_type_base(t);
+ }
+ }
+ p += hdr->type_len;
+
+ memcpy(p, btf->strs_data, hdr->str_len);
+ p += hdr->str_len;
+
+ *size = data_sz;
+ return data;
+err_out:
+ free(data);
+ return NULL;
+}
+
+const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
{
- *size = btf->data_size;
- return btf->data;
+ struct btf *btf = (struct btf *)btf_ro;
+ __u32 data_sz;
+ void *data;
+
+ data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
+ if (!data)
+ return NULL;
+
+ btf->raw_size = data_sz;
+ if (btf->swapped_endian)
+ btf->raw_data_swapped = data;
+ else
+ btf->raw_data = data;
+ *size = data_sz;
+ return data;
}
-const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
{
if (offset < btf->hdr->str_len)
- return &btf->strings[offset];
+ return btf->strs_data + offset;
else
return NULL;
}
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+{
+ return btf__str_by_offset(btf, offset);
+}
+
int btf__get_from_id(__u32 id, struct btf **btf)
{
struct bpf_btf_info btf_info = { 0 };
@@ -1008,6 +1361,970 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
return 0;
}
+static size_t strs_hash_fn(const void *key, void *ctx)
+{
+ struct btf *btf = ctx;
+ const char *str = btf->strs_data + (long)key;
+
+ return str_hash(str);
+}
+
+static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
+{
+ struct btf *btf = ctx;
+ const char *str1 = btf->strs_data + (long)key1;
+ const char *str2 = btf->strs_data + (long)key2;
+
+ return strcmp(str1, str2) == 0;
+}
+
+static void btf_invalidate_raw_data(struct btf *btf)
+{
+ if (btf->raw_data) {
+ free(btf->raw_data);
+ btf->raw_data = NULL;
+ }
+ if (btf->raw_data_swapped) {
+ free(btf->raw_data_swapped);
+ btf->raw_data_swapped = NULL;
+ }
+}
+
+/* Ensure BTF is ready to be modified (by splitting into a three memory
+ * regions for header, types, and strings). Also invalidate cached
+ * raw_data, if any.
+ */
+static int btf_ensure_modifiable(struct btf *btf)
+{
+ void *hdr, *types, *strs, *strs_end, *s;
+ struct hashmap *hash = NULL;
+ long off;
+ int err;
+
+ if (btf_is_modifiable(btf)) {
+ /* any BTF modification invalidates raw_data */
+ btf_invalidate_raw_data(btf);
+ return 0;
+ }
+
+ /* split raw data into three memory regions */
+ hdr = malloc(btf->hdr->hdr_len);
+ types = malloc(btf->hdr->type_len);
+ strs = malloc(btf->hdr->str_len);
+ if (!hdr || !types || !strs)
+ goto err_out;
+
+ memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
+ memcpy(types, btf->types_data, btf->hdr->type_len);
+ memcpy(strs, btf->strs_data, btf->hdr->str_len);
+
+ /* build lookup index for all strings */
+ hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
+ if (IS_ERR(hash)) {
+ err = PTR_ERR(hash);
+ hash = NULL;
+ goto err_out;
+ }
+
+ strs_end = strs + btf->hdr->str_len;
+ for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) {
+ /* hashmap__add() returns EEXIST if string with the same
+ * content already is in the hash map
+ */
+ err = hashmap__add(hash, (void *)off, (void *)off);
+ if (err == -EEXIST)
+ continue; /* duplicate */
+ if (err)
+ goto err_out;
+ }
+
+ /* only when everything was successful, update internal state */
+ btf->hdr = hdr;
+ btf->types_data = types;
+ btf->types_data_cap = btf->hdr->type_len;
+ btf->strs_data = strs;
+ btf->strs_data_cap = btf->hdr->str_len;
+ btf->strs_hash = hash;
+ /* if BTF was created from scratch, all strings are guaranteed to be
+ * unique and deduplicated
+ */
+ btf->strs_deduped = btf->hdr->str_len <= 1;
+
+ /* invalidate raw_data representation */
+ btf_invalidate_raw_data(btf);
+
+ return 0;
+
+err_out:
+ hashmap__free(hash);
+ free(hdr);
+ free(types);
+ free(strs);
+ return -ENOMEM;
+}
+
+static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
+{
+ return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
+ btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
+}
+
+/* Find an offset in BTF string section that corresponds to a given string *s*.
+ * Returns:
+ * - >0 offset into string section, if string is found;
+ * - -ENOENT, if string is not in the string section;
+ * - <0, on any other error.
+ */
+int btf__find_str(struct btf *btf, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+
+ /* BTF needs to be in a modifiable state to build string lookup index */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ /* see btf__add_str() for why we do this */
+ len = strlen(s) + 1;
+ p = btf_add_str_mem(btf, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = btf->hdr->str_len;
+ memcpy(p, s, len);
+
+ if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
+ return old_off;
+
+ return -ENOENT;
+}
+
+/* Add a string s to the BTF string section.
+ * Returns:
+ * - > 0 offset into string section, on success;
+ * - < 0, on error.
+ */
+int btf__add_str(struct btf *btf, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+ int err;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ /* Hashmap keys are always offsets within btf->strs_data, so to even
+ * look up some string from the "outside", we need to first append it
+ * at the end, so that it can be addressed with an offset. Luckily,
+ * until btf->hdr->str_len is incremented, that string is just a piece
+ * of garbage for the rest of BTF code, so no harm, no foul. On the
+ * other hand, if the string is unique, it's already appended and
+ * ready to be used, only a simple btf->hdr->str_len increment away.
+ */
+ len = strlen(s) + 1;
+ p = btf_add_str_mem(btf, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = btf->hdr->str_len;
+ memcpy(p, s, len);
+
+ /* Now attempt to add the string, but only if the string with the same
+ * contents doesn't exist already (HASHMAP_ADD strategy). If such
+ * string exists, we'll get its offset in old_off (that's old_key).
+ */
+ err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
+ HASHMAP_ADD, (const void **)&old_off, NULL);
+ if (err == -EEXIST)
+ return old_off; /* duplicated string, return existing offset */
+ if (err)
+ return err;
+
+ btf->hdr->str_len += len; /* new unique string, adjust data length */
+ return new_off;
+}
+
+static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
+{
+ return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
+ btf->hdr->type_len, UINT_MAX, add_sz);
+}
+
+static __u32 btf_type_info(int kind, int vlen, int kflag)
+{
+ return (kflag << 31) | (kind << 24) | vlen;
+}
+
+static void btf_type_inc_vlen(struct btf_type *t)
+{
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
+}
+
+/*
+ * Append new BTF_KIND_INT type with:
+ * - *name* - non-empty, non-NULL type name;
+ * - *sz* - power-of-2 (1, 2, 4, ..) size of the type, in bytes;
+ * - encoding is a combination of BTF_INT_SIGNED, BTF_INT_CHAR, BTF_INT_BOOL.
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding)
+{
+ struct btf_type *t;
+ int sz, err, name_off;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+ /* byte_sz must be power of 2 */
+ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
+ return -EINVAL;
+ if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
+ return -EINVAL;
+
+ /* deconstruct BTF, if necessary, and invalidate raw_data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type) + sizeof(int);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ /* if something goes wrong later, we might end up with an extra string,
+ * but that shouldn't be a problem, because BTF can't be constructed
+ * completely anyway and will most probably be just discarded
+ */
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_INT, 0, 0);
+ t->size = byte_sz;
+ /* set INT info, we don't allow setting legacy bit offset/size */
+ *(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8);
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/* it's completely legal to append BTF types with type IDs pointing forward to
+ * types that haven't been appended yet, so we only make sure that id looks
+ * sane, we can't guarantee that ID will always be valid
+ */
+static int validate_type_id(int id)
+{
+ if (id < 0 || id > BTF_MAX_NR_TYPES)
+ return -EINVAL;
+ return 0;
+}
+
+/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */
+static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
+{
+ struct btf_type *t;
+ int sz, name_off = 0, err;
+
+ if (validate_type_id(ref_type_id))
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ t->name_off = name_off;
+ t->info = btf_type_info(kind, 0, 0);
+ t->type = ref_type_id;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_PTR type with:
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_ptr(struct btf *btf, int ref_type_id)
+{
+ return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_ARRAY type with:
+ * - *index_type_id* - type ID of the type describing array index;
+ * - *elem_type_id* - type ID of the type describing array element;
+ * - *nr_elems* - the size of the array;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 nr_elems)
+{
+ struct btf_type *t;
+ struct btf_array *a;
+ int sz, err;
+
+ if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type) + sizeof(struct btf_array);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ t->name_off = 0;
+ t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
+ t->size = 0;
+
+ a = btf_array(t);
+ a->type = elem_type_id;
+ a->index_type = index_type_id;
+ a->nelems = nr_elems;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/* generic STRUCT/UNION append function */
+static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz)
+{
+ struct btf_type *t;
+ int sz, err, name_off = 0;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ /* start out with vlen=0 and no kflag; this will be adjusted when
+ * adding each member
+ */
+ t->name_off = name_off;
+ t->info = btf_type_info(kind, 0, 0);
+ t->size = bytes_sz;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_STRUCT type with:
+ * - *name* - name of the struct, can be NULL or empty for anonymous structs;
+ * - *byte_sz* - size of the struct, in bytes;
+ *
+ * Struct initially has no fields in it. Fields can be added by
+ * btf__add_field() right after btf__add_struct() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_struct(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ return btf_add_composite(btf, BTF_KIND_STRUCT, name, byte_sz);
+}
+
+/*
+ * Append new BTF_KIND_UNION type with:
+ * - *name* - name of the union, can be NULL or empty for anonymous union;
+ * - *byte_sz* - size of the union, in bytes;
+ *
+ * Union initially has no fields in it. Fields can be added by
+ * btf__add_field() right after btf__add_union() succeeds. All fields
+ * should have *bit_offset* of 0.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz);
+}
+
+/*
+ * Append new field for the current STRUCT/UNION type with:
+ * - *name* - name of the field, can be NULL or empty for anonymous field;
+ * - *type_id* - type ID for the type describing field type;
+ * - *bit_offset* - bit offset of the start of the field within struct/union;
+ * - *bit_size* - bit size of a bitfield, 0 for non-bitfield fields;
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_field(struct btf *btf, const char *name, int type_id,
+ __u32 bit_offset, __u32 bit_size)
+{
+ struct btf_type *t;
+ struct btf_member *m;
+ bool is_bitfield;
+ int sz, name_off = 0;
+
+ /* last type should be union/struct */
+ if (btf->nr_types == 0)
+ return -EINVAL;
+ t = btf_type_by_id(btf, btf->nr_types);
+ if (!btf_is_composite(t))
+ return -EINVAL;
+
+ if (validate_type_id(type_id))
+ return -EINVAL;
+ /* best-effort bit field offset/size enforcement */
+ is_bitfield = bit_size || (bit_offset % 8 != 0);
+ if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
+ return -EINVAL;
+
+ /* only offset 0 is allowed for unions */
+ if (btf_is_union(t) && bit_offset)
+ return -EINVAL;
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_member);
+ m = btf_add_type_mem(btf, sz);
+ if (!m)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ m->name_off = name_off;
+ m->type = type_id;
+ m->offset = bit_offset | (bit_size << 24);
+
+ /* btf_add_type_mem can invalidate t pointer */
+ t = btf_type_by_id(btf, btf->nr_types);
+ /* update parent type's vlen and kflag */
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
+/*
+ * Append new BTF_KIND_ENUM type with:
+ * - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ * - *byte_sz* - size of the enum, in bytes.
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum_value()
+ * immediately after btf__add_enum() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ struct btf_type *t;
+ int sz, err, name_off = 0;
+
+ /* byte_sz must be power of 2 */
+ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ /* start out with vlen=0; it will be adjusted when adding enum values */
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_ENUM, 0, 0);
+ t->size = byte_sz;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new enum value for the current ENUM type with:
+ * - *name* - name of the enumerator value, can't be NULL or empty;
+ * - *value* - integer value corresponding to enum value *name*;
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
+{
+ struct btf_type *t;
+ struct btf_enum *v;
+ int sz, name_off;
+
+ /* last type should be BTF_KIND_ENUM */
+ if (btf->nr_types == 0)
+ return -EINVAL;
+ t = btf_type_by_id(btf, btf->nr_types);
+ if (!btf_is_enum(t))
+ return -EINVAL;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+ if (value < INT_MIN || value > UINT_MAX)
+ return -E2BIG;
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_enum);
+ v = btf_add_type_mem(btf, sz);
+ if (!v)
+ return -ENOMEM;
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ v->name_off = name_off;
+ v->val = value;
+
+ /* update parent type's vlen */
+ t = btf_type_by_id(btf, btf->nr_types);
+ btf_type_inc_vlen(t);
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
+/*
+ * Append new BTF_KIND_FWD type with:
+ * - *name*, non-empty/non-NULL name;
+ * - *fwd_kind*, kind of forward declaration, one of BTF_FWD_STRUCT,
+ * BTF_FWD_UNION, or BTF_FWD_ENUM;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
+{
+ if (!name || !name[0])
+ return -EINVAL;
+
+ switch (fwd_kind) {
+ case BTF_FWD_STRUCT:
+ case BTF_FWD_UNION: {
+ struct btf_type *t;
+ int id;
+
+ id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0);
+ if (id <= 0)
+ return id;
+ t = btf_type_by_id(btf, id);
+ t->info = btf_type_info(BTF_KIND_FWD, 0, fwd_kind == BTF_FWD_UNION);
+ return id;
+ }
+ case BTF_FWD_ENUM:
+ /* enum forward in BTF currently is just an enum with no enum
+ * values; we also assume a standard 4-byte size for it
+ */
+ return btf__add_enum(btf, name, sizeof(int));
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Append new BTF_KING_TYPEDEF type with:
+ * - *name*, non-empty/non-NULL name;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
+{
+ if (!name || !name[0])
+ return -EINVAL;
+
+ return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_VOLATILE type with:
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_volatile(struct btf *btf, int ref_type_id)
+{
+ return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_CONST type with:
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_const(struct btf *btf, int ref_type_id)
+{
+ return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_RESTRICT type with:
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_restrict(struct btf *btf, int ref_type_id)
+{
+ return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_FUNC type with:
+ * - *name*, non-empty/non-NULL name;
+ * - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_func(struct btf *btf, const char *name,
+ enum btf_func_linkage linkage, int proto_type_id)
+{
+ int id;
+
+ if (!name || !name[0])
+ return -EINVAL;
+ if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
+ linkage != BTF_FUNC_EXTERN)
+ return -EINVAL;
+
+ id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
+ if (id > 0) {
+ struct btf_type *t = btf_type_by_id(btf, id);
+
+ t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
+ }
+ return id;
+}
+
+/*
+ * Append new BTF_KIND_FUNC_PROTO with:
+ * - *ret_type_id* - type ID for return result of a function.
+ *
+ * Function prototype initially has no arguments, but they can be added by
+ * btf__add_func_param() one by one, immediately after
+ * btf__add_func_proto() succeeded.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_func_proto(struct btf *btf, int ret_type_id)
+{
+ struct btf_type *t;
+ int sz, err;
+
+ if (validate_type_id(ret_type_id))
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ /* start out with vlen=0; this will be adjusted when adding enum
+ * values, if necessary
+ */
+ t->name_off = 0;
+ t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0);
+ t->type = ret_type_id;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new function parameter for current FUNC_PROTO type with:
+ * - *name* - parameter name, can be NULL or empty;
+ * - *type_id* - type ID describing the type of the parameter.
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_func_param(struct btf *btf, const char *name, int type_id)
+{
+ struct btf_type *t;
+ struct btf_param *p;
+ int sz, name_off = 0;
+
+ if (validate_type_id(type_id))
+ return -EINVAL;
+
+ /* last type should be BTF_KIND_FUNC_PROTO */
+ if (btf->nr_types == 0)
+ return -EINVAL;
+ t = btf_type_by_id(btf, btf->nr_types);
+ if (!btf_is_func_proto(t))
+ return -EINVAL;
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_param);
+ p = btf_add_type_mem(btf, sz);
+ if (!p)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ p->name_off = name_off;
+ p->type = type_id;
+
+ /* update parent type's vlen */
+ t = btf_type_by_id(btf, btf->nr_types);
+ btf_type_inc_vlen(t);
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
+/*
+ * Append new BTF_KIND_VAR type with:
+ * - *name* - non-empty/non-NULL name;
+ * - *linkage* - variable linkage, one of BTF_VAR_STATIC,
+ * BTF_VAR_GLOBAL_ALLOCATED, or BTF_VAR_GLOBAL_EXTERN;
+ * - *type_id* - type ID of the type describing the type of the variable.
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
+{
+ struct btf_type *t;
+ struct btf_var *v;
+ int sz, err, name_off;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+ if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+ linkage != BTF_VAR_GLOBAL_EXTERN)
+ return -EINVAL;
+ if (validate_type_id(type_id))
+ return -EINVAL;
+
+ /* deconstruct BTF, if necessary, and invalidate raw_data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type) + sizeof(struct btf_var);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_VAR, 0, 0);
+ t->type = type_id;
+
+ v = btf_var(t);
+ v->linkage = linkage;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_DATASEC type with:
+ * - *name* - non-empty/non-NULL name;
+ * - *byte_sz* - data section size, in bytes.
+ *
+ * Data section is initially empty. Variables info can be added with
+ * btf__add_datasec_var_info() calls, after btf__add_datasec() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ struct btf_type *t;
+ int sz, err, name_off;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ /* start with vlen=0, which will be update as var_secinfos are added */
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0);
+ t->size = byte_sz;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new data section variable information entry for current DATASEC type:
+ * - *var_type_id* - type ID, describing type of the variable;
+ * - *offset* - variable offset within data section, in bytes;
+ * - *byte_sz* - variable size, in bytes.
+ *
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz)
+{
+ struct btf_type *t;
+ struct btf_var_secinfo *v;
+ int sz;
+
+ /* last type should be BTF_KIND_DATASEC */
+ if (btf->nr_types == 0)
+ return -EINVAL;
+ t = btf_type_by_id(btf, btf->nr_types);
+ if (!btf_is_datasec(t))
+ return -EINVAL;
+
+ if (validate_type_id(var_type_id))
+ return -EINVAL;
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_var_secinfo);
+ v = btf_add_type_mem(btf, sz);
+ if (!v)
+ return -ENOMEM;
+
+ v->type = var_type_id;
+ v->offset = offset;
+ v->size = byte_sz;
+
+ /* update parent type's vlen */
+ t = btf_type_by_id(btf, btf->nr_types);
+ btf_type_inc_vlen(t);
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
struct btf_ext_sec_setup_param {
__u32 off;
__u32 len;
@@ -1131,14 +2448,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
return btf_ext_setup_info(btf_ext, &param);
}
-static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext)
+static int btf_ext_setup_core_relos(struct btf_ext *btf_ext)
{
struct btf_ext_sec_setup_param param = {
- .off = btf_ext->hdr->field_reloc_off,
- .len = btf_ext->hdr->field_reloc_len,
- .min_rec_size = sizeof(struct bpf_field_reloc),
- .ext_info = &btf_ext->field_reloc_info,
- .desc = "field_reloc",
+ .off = btf_ext->hdr->core_relo_off,
+ .len = btf_ext->hdr->core_relo_len,
+ .min_rec_size = sizeof(struct bpf_core_relo),
+ .ext_info = &btf_ext->core_relo_info,
+ .desc = "core_relo",
};
return btf_ext_setup_info(btf_ext, &param);
@@ -1154,7 +2471,10 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
return -EINVAL;
}
- if (hdr->magic != BTF_MAGIC) {
+ if (hdr->magic == bswap_16(BTF_MAGIC)) {
+ pr_warn("BTF.ext in non-native endianness is not supported\n");
+ return -ENOTSUP;
+ } else if (hdr->magic != BTF_MAGIC) {
pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
return -EINVAL;
}
@@ -1217,10 +2537,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
if (err)
goto done;
- if (btf_ext->hdr->hdr_len <
- offsetofend(struct btf_ext_header, field_reloc_len))
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
goto done;
- err = btf_ext_setup_field_reloc(btf_ext);
+ err = btf_ext_setup_core_relos(btf_ext);
if (err)
goto done;
@@ -1475,6 +2794,9 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
return -EINVAL;
}
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
err = btf_dedup_strings(d);
if (err < 0) {
pr_debug("btf_dedup_strings failed:%d\n", err);
@@ -1575,7 +2897,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d,
__u32 *new_list;
d->hypot_cap += max((size_t)16, d->hypot_cap / 2);
- new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
+ new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32));
if (!new_list)
return -ENOMEM;
d->hypot_list = new_list;
@@ -1659,7 +2981,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
/* special BTF "void" type is made canonical immediately */
d->map[0] = 0;
for (i = 1; i <= btf->nr_types; i++) {
- struct btf_type *t = d->btf->types[i];
+ struct btf_type *t = btf_type_by_id(d->btf, i);
/* VAR and DATASEC are never deduped and are self-canonical */
if (btf_is_var(t) || btf_is_datasec(t))
@@ -1698,7 +3020,7 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
struct btf_type *t;
for (i = 1; i <= d->btf->nr_types; i++) {
- t = d->btf->types[i];
+ t = btf_type_by_id(d->btf, i);
r = fn(&t->name_off, ctx);
if (r)
return r;
@@ -1852,8 +3174,7 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
*/
static int btf_dedup_strings(struct btf_dedup *d)
{
- const struct btf_header *hdr = d->btf->hdr;
- char *start = (char *)d->btf->nohdr_data + hdr->str_off;
+ char *start = d->btf->strs_data;
char *end = start + d->btf->hdr->str_len;
char *p = start, *tmp_strs = NULL;
struct btf_str_ptrs strs = {
@@ -1865,14 +3186,16 @@ static int btf_dedup_strings(struct btf_dedup *d)
int i, j, err = 0, grp_idx;
bool grp_used;
+ if (d->btf->strs_deduped)
+ return 0;
+
/* build index of all strings */
while (p < end) {
if (strs.cnt + 1 > strs.cap) {
struct btf_str_ptr *new_ptrs;
strs.cap += max(strs.cnt / 2, 16U);
- new_ptrs = realloc(strs.ptrs,
- sizeof(strs.ptrs[0]) * strs.cap);
+ new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
if (!new_ptrs) {
err = -ENOMEM;
goto done;
@@ -1958,6 +3281,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
goto done;
d->btf->hdr->str_len = end - start;
+ d->btf->strs_deduped = true;
done:
free(tmp_strs);
@@ -2234,7 +3558,7 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
*/
static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
{
- struct btf_type *t = d->btf->types[type_id];
+ struct btf_type *t = btf_type_by_id(d->btf, type_id);
struct hashmap_entry *hash_entry;
struct btf_type *cand;
/* if we don't find equivalent type, then we are canonical */
@@ -2261,7 +3585,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_int(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_int(t, cand)) {
new_id = cand_id;
break;
@@ -2273,7 +3597,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_enum(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_enum(t, cand)) {
new_id = cand_id;
break;
@@ -2296,7 +3620,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
break;
@@ -2355,13 +3679,13 @@ static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
{
__u32 orig_type_id = type_id;
- if (!btf_is_fwd(d->btf->types[type_id]))
+ if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
return type_id;
while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
type_id = d->map[type_id];
- if (!btf_is_fwd(d->btf->types[type_id]))
+ if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
return type_id;
return orig_type_id;
@@ -2489,8 +3813,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
return -ENOMEM;
- cand_type = d->btf->types[cand_id];
- canon_type = d->btf->types[canon_id];
+ cand_type = btf_type_by_id(d->btf, cand_id);
+ canon_type = btf_type_by_id(d->btf, canon_id);
cand_kind = btf_kind(cand_type);
canon_kind = btf_kind(canon_type);
@@ -2641,8 +3965,8 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
targ_type_id = d->hypot_map[cand_type_id];
t_id = resolve_type_id(d, targ_type_id);
c_id = resolve_type_id(d, cand_type_id);
- t_kind = btf_kind(d->btf->types[t_id]);
- c_kind = btf_kind(d->btf->types[c_id]);
+ t_kind = btf_kind(btf__type_by_id(d->btf, t_id));
+ c_kind = btf_kind(btf__type_by_id(d->btf, c_id));
/*
* Resolve FWD into STRUCT/UNION.
* It's ok to resolve FWD into STRUCT/UNION that's not yet
@@ -2710,7 +4034,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
if (d->map[type_id] <= BTF_MAX_NR_TYPES)
return 0;
- t = d->btf->types[type_id];
+ t = btf_type_by_id(d->btf, type_id);
kind = btf_kind(t);
if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
@@ -2731,7 +4055,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
* creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
* FWD and compatible STRUCT/UNION are considered equivalent.
*/
- cand_type = d->btf->types[cand_id];
+ cand_type = btf_type_by_id(d->btf, cand_id);
if (!btf_shallow_equal_struct(t, cand_type))
continue;
@@ -2803,7 +4127,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
if (d->map[type_id] <= BTF_MAX_NR_TYPES)
return resolve_type_id(d, type_id);
- t = d->btf->types[type_id];
+ t = btf_type_by_id(d->btf, type_id);
d->map[type_id] = BTF_IN_PROGRESS_ID;
switch (btf_kind(t)) {
@@ -2821,7 +4145,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
break;
@@ -2845,7 +4169,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_array(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_array(t, cand)) {
new_id = cand_id;
break;
@@ -2877,7 +4201,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_fnproto(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_fnproto(t, cand)) {
new_id = cand_id;
break;
@@ -2925,9 +4249,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
*/
static int btf_dedup_compact_types(struct btf_dedup *d)
{
- struct btf_type **new_types;
+ __u32 *new_offs;
__u32 next_type_id = 1;
- char *types_start, *p;
+ void *p;
int i, len;
/* we are going to reuse hypot_map to store compaction remapping */
@@ -2935,41 +4259,34 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
for (i = 1; i <= d->btf->nr_types; i++)
d->hypot_map[i] = BTF_UNPROCESSED_ID;
- types_start = d->btf->nohdr_data + d->btf->hdr->type_off;
- p = types_start;
+ p = d->btf->types_data;
for (i = 1; i <= d->btf->nr_types; i++) {
if (d->map[i] != i)
continue;
- len = btf_type_size(d->btf->types[i]);
+ len = btf_type_size(btf__type_by_id(d->btf, i));
if (len < 0)
return len;
- memmove(p, d->btf->types[i], len);
+ memmove(p, btf__type_by_id(d->btf, i), len);
d->hypot_map[i] = next_type_id;
- d->btf->types[next_type_id] = (struct btf_type *)p;
+ d->btf->type_offs[next_type_id] = p - d->btf->types_data;
p += len;
next_type_id++;
}
/* shrink struct btf's internal types index and update btf_header */
d->btf->nr_types = next_type_id - 1;
- d->btf->types_size = d->btf->nr_types;
- d->btf->hdr->type_len = p - types_start;
- new_types = realloc(d->btf->types,
- (1 + d->btf->nr_types) * sizeof(struct btf_type *));
- if (!new_types)
+ d->btf->type_offs_cap = d->btf->nr_types + 1;
+ d->btf->hdr->type_len = p - d->btf->types_data;
+ new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
+ sizeof(*new_offs));
+ if (!new_offs)
return -ENOMEM;
- d->btf->types = new_types;
-
- /* make sure string section follows type information without gaps */
- d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data;
- memmove(p, d->btf->strings, d->btf->hdr->str_len);
- d->btf->strings = p;
- p += d->btf->hdr->str_len;
-
- d->btf->data_size = p - (char *)d->btf->data;
+ d->btf->type_offs = new_offs;
+ d->btf->hdr->str_off = d->btf->hdr->type_len;
+ d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len;
return 0;
}
@@ -3002,7 +4319,7 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
*/
static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
{
- struct btf_type *t = d->btf->types[type_id];
+ struct btf_type *t = btf_type_by_id(d->btf, type_id);
int i, r;
switch (btf_kind(t)) {
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 1ca14448df4c..57247240a20a 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -5,6 +5,7 @@
#define __LIBBPF_BTF_H
#include <stdarg.h>
+#include <stdbool.h>
#include <linux/btf.h>
#include <linux/types.h>
@@ -24,46 +25,14 @@ struct btf_type;
struct bpf_object;
-/*
- * The .BTF.ext ELF section layout defined as
- * struct btf_ext_header
- * func_info subsection
- *
- * The func_info subsection layout:
- * record size for struct bpf_func_info in the func_info subsection
- * struct btf_sec_func_info for section #1
- * a list of bpf_func_info records for section #1
- * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
- * but may not be identical
- * struct btf_sec_func_info for section #2
- * a list of bpf_func_info records for section #2
- * ......
- *
- * Note that the bpf_func_info record size in .BTF.ext may not
- * be the same as the one defined in include/uapi/linux/bpf.h.
- * The loader should ensure that record_size meets minimum
- * requirement and pass the record as is to the kernel. The
- * kernel will handle the func_info properly based on its contents.
- */
-struct btf_ext_header {
- __u16 magic;
- __u8 version;
- __u8 flags;
- __u32 hdr_len;
-
- /* All offsets are in bytes relative to the end of this header */
- __u32 func_info_off;
- __u32 func_info_len;
- __u32 line_info_off;
- __u32 line_info_len;
-
- /* optional part of .BTF.ext header */
- __u32 field_reloc_off;
- __u32 field_reloc_len;
+enum btf_endianness {
+ BTF_LITTLE_ENDIAN = 0,
+ BTF_BIG_ENDIAN = 1,
};
LIBBPF_API void btf__free(struct btf *btf);
LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
+LIBBPF_API struct btf *btf__new_empty(void);
LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
LIBBPF_API struct btf *btf__parse_raw(const char *path);
@@ -78,6 +47,8 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
__u32 id);
LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz);
+LIBBPF_API enum btf_endianness btf__endianness(const struct btf *btf);
+LIBBPF_API int btf__set_endianness(struct btf *btf, enum btf_endianness endian);
LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
@@ -85,6 +56,7 @@ LIBBPF_API int btf__fd(const struct btf *btf);
LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
__u32 expected_key_size,
@@ -95,19 +67,62 @@ LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
__u32 *size);
-LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **func_info, __u32 *cnt);
-LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **line_info, __u32 *cnt);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
+int btf_ext__reloc_func_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **func_info, __u32 *cnt);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
+int btf_ext__reloc_line_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **line_info, __u32 *cnt);
LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
+LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
+
+LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
+LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_array(struct btf *btf,
+ int index_type_id, int elem_type_id, __u32 nr_elems);
+/* struct/union construction APIs */
+LIBBPF_API int btf__add_struct(struct btf *btf, const char *name, __u32 sz);
+LIBBPF_API int btf__add_union(struct btf *btf, const char *name, __u32 sz);
+LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_id,
+ __u32 bit_offset, __u32 bit_size);
+
+/* enum construction APIs */
+LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz);
+LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value);
+
+enum btf_fwd_kind {
+ BTF_FWD_STRUCT = 0,
+ BTF_FWD_UNION = 1,
+ BTF_FWD_ENUM = 2,
+};
+
+LIBBPF_API int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind);
+LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id);
+LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
+
+/* func and func_proto construction APIs */
+LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
+ enum btf_func_linkage linkage, int proto_type_id);
+LIBBPF_API int btf__add_func_proto(struct btf *btf, int ret_type_id);
+LIBBPF_API int btf__add_func_param(struct btf *btf, const char *name, int type_id);
+
+/* var & datasec construction APIs */
+LIBBPF_API int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id);
+LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz);
+LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
+ __u32 offset, __u32 byte_sz);
+
struct btf_dedup_opts {
unsigned int dedup_table_size;
bool dont_resolve_fwds;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 57c00fa63932..2f9d685bd522 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -19,9 +19,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
@@ -63,11 +60,14 @@ struct btf_dump {
struct btf_dump_opts opts;
int ptr_sz;
bool strip_mods;
+ int last_id;
/* per-type auxiliary state */
struct btf_dump_type_aux_state *type_states;
+ size_t type_states_cap;
/* per-type optional cached unique name, must be freed, if present */
const char **cached_names;
+ size_t cached_names_cap;
/* topo-sorted list of dependent type definitions */
__u32 *emit_queue;
@@ -93,14 +93,7 @@ struct btf_dump {
static size_t str_hash_fn(const void *key, void *ctx)
{
- const char *s = key;
- size_t h = 0;
-
- while (*s) {
- h = h * 31 + *s;
- s++;
- }
- return h;
+ return str_hash(key);
}
static bool str_equal_fn(const void *a, const void *b, void *ctx)
@@ -123,6 +116,7 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
}
static int btf_dump_mark_referenced(struct btf_dump *d);
+static int btf_dump_resize(struct btf_dump *d);
struct btf_dump *btf_dump__new(const struct btf *btf,
const struct btf_ext *btf_ext,
@@ -154,25 +148,8 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
d->ident_names = NULL;
goto err;
}
- d->type_states = calloc(1 + btf__get_nr_types(d->btf),
- sizeof(d->type_states[0]));
- if (!d->type_states) {
- err = -ENOMEM;
- goto err;
- }
- d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
- sizeof(d->cached_names[0]));
- if (!d->cached_names) {
- err = -ENOMEM;
- goto err;
- }
-
- /* VOID is special */
- d->type_states[0].order_state = ORDERED;
- d->type_states[0].emit_state = EMITTED;
- /* eagerly determine referenced types for anon enums */
- err = btf_dump_mark_referenced(d);
+ err = btf_dump_resize(d);
if (err)
goto err;
@@ -182,9 +159,38 @@ err:
return ERR_PTR(err);
}
+static int btf_dump_resize(struct btf_dump *d)
+{
+ int err, last_id = btf__get_nr_types(d->btf);
+
+ if (last_id <= d->last_id)
+ return 0;
+
+ if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
+ sizeof(*d->type_states), last_id + 1))
+ return -ENOMEM;
+ if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
+ sizeof(*d->cached_names), last_id + 1))
+ return -ENOMEM;
+
+ if (d->last_id == 0) {
+ /* VOID is special */
+ d->type_states[0].order_state = ORDERED;
+ d->type_states[0].emit_state = EMITTED;
+ }
+
+ /* eagerly determine referenced types for anon enums */
+ err = btf_dump_mark_referenced(d);
+ if (err)
+ return err;
+
+ d->last_id = last_id;
+ return 0;
+}
+
void btf_dump__free(struct btf_dump *d)
{
- int i, cnt;
+ int i;
if (IS_ERR_OR_NULL(d))
return;
@@ -192,7 +198,7 @@ void btf_dump__free(struct btf_dump *d)
free(d->type_states);
if (d->cached_names) {
/* any set cached name is owned by us and should be freed */
- for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) {
+ for (i = 0; i <= d->last_id; i++) {
if (d->cached_names[i])
free((void *)d->cached_names[i]);
}
@@ -232,6 +238,10 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
if (id > btf__get_nr_types(d->btf))
return -EINVAL;
+ err = btf_dump_resize(d);
+ if (err)
+ return err;
+
d->emit_queue_cnt = 0;
err = btf_dump_order_type(d, id, false);
if (err < 0)
@@ -261,7 +271,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
const struct btf_type *t;
__u16 vlen;
- for (i = 1; i <= n; i++) {
+ for (i = d->last_id + 1; i <= n; i++) {
t = btf__type_by_id(d->btf, i);
vlen = btf_vlen(t);
@@ -316,6 +326,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
}
return 0;
}
+
static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
{
__u32 *new_queue;
@@ -323,8 +334,7 @@ static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
if (d->emit_queue_cnt >= d->emit_queue_cap) {
new_cap = max(16, d->emit_queue_cap * 3 / 2);
- new_queue = realloc(d->emit_queue,
- new_cap * sizeof(new_queue[0]));
+ new_queue = libbpf_reallocarray(d->emit_queue, new_cap, sizeof(new_queue[0]));
if (!new_queue)
return -ENOMEM;
d->emit_queue = new_queue;
@@ -1003,8 +1013,7 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
if (d->decl_stack_cnt >= d->decl_stack_cap) {
new_cap = max(16, d->decl_stack_cap * 3 / 2);
- new_stack = realloc(d->decl_stack,
- new_cap * sizeof(new_stack[0]));
+ new_stack = libbpf_reallocarray(d->decl_stack, new_cap, sizeof(new_stack[0]));
if (!new_stack)
return -ENOMEM;
d->decl_stack = new_stack;
@@ -1061,11 +1070,15 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
const struct btf_dump_emit_type_decl_opts *opts)
{
const char *fname;
- int lvl;
+ int lvl, err;
if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
return -EINVAL;
+ err = btf_dump_resize(d);
+ if (err)
+ return -EINVAL;
+
fname = OPTS_GET(opts, field_name, "");
lvl = OPTS_GET(opts, indent_level, 0);
d->strip_mods = OPTS_GET(opts, strip_mods, false);
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index a405dad068f5..3c20b126d60d 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -15,6 +15,9 @@
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
/* start with 4 buckets */
#define HASHMAP_MIN_CAP_BITS 2
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index e0af36b0e5d8..d9b385fe808c 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits)
#endif
}
+/* generic C-string hashing function */
+static inline size_t str_hash(const char *s)
+{
+ size_t h = 0;
+
+ while (*s) {
+ h = h * 31 + *s;
+ s++;
+ }
+ return h;
+}
+
typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7253b833576c..313034117070 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -44,7 +44,6 @@
#include <sys/vfs.h>
#include <sys/utsname.h>
#include <sys/resource.h>
-#include <tools/libc_compat.h>
#include <libelf.h>
#include <gelf.h>
#include <zlib.h>
@@ -56,9 +55,6 @@
#include "libbpf_internal.h"
#include "hashmap.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#ifndef EM_BPF
#define EM_BPF 247
#endif
@@ -67,6 +63,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define BPF_INSN_SZ (sizeof(struct bpf_insn))
+
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
* compilation if user enables corresponding warning. Disable it explicitly.
*/
@@ -75,8 +73,6 @@
#define __printf(a, b) __attribute__((format(printf, a, b)))
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
-static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj,
- int idx);
static const struct btf_type *
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
@@ -154,34 +150,37 @@ static void pr_perm_msg(int err)
___err; })
#endif
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
-#else
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
-#endif
-
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
-struct bpf_capabilities {
+enum kern_feature_id {
/* v4.14: kernel support for program & map names. */
- __u32 name:1;
+ FEAT_PROG_NAME,
/* v5.2: kernel support for global data sections. */
- __u32 global_data:1;
+ FEAT_GLOBAL_DATA,
+ /* BTF support */
+ FEAT_BTF,
/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
- __u32 btf_func:1;
+ FEAT_BTF_FUNC,
/* BTF_KIND_VAR and BTF_KIND_DATASEC support */
- __u32 btf_datasec:1;
- /* BPF_F_MMAPABLE is supported for arrays */
- __u32 array_mmap:1;
+ FEAT_BTF_DATASEC,
/* BTF_FUNC_GLOBAL is supported */
- __u32 btf_func_global:1;
+ FEAT_BTF_GLOBAL_FUNC,
+ /* BPF_F_MMAPABLE is supported for arrays */
+ FEAT_ARRAY_MMAP,
/* kernel support for expected_attach_type in BPF_PROG_LOAD */
- __u32 exp_attach_type:1;
+ FEAT_EXP_ATTACH_TYPE,
+ /* bpf_probe_read_{kernel,user}[_str] helpers */
+ FEAT_PROBE_READ_KERN,
+ /* BPF_PROG_BIND_MAP is supported */
+ FEAT_PROG_BIND_MAP,
+ __FEAT_CNT,
};
+static bool kernel_supports(enum kern_feature_id feat_id);
+
enum reloc_type {
RELO_LD64,
RELO_CALL,
@@ -194,6 +193,7 @@ struct reloc_desc {
int insn_idx;
int map_idx;
int sym_off;
+ bool processed;
};
struct bpf_sec_def;
@@ -209,6 +209,7 @@ struct bpf_sec_def {
bool is_exp_attach_type_optional;
bool is_attachable;
bool is_attach_btf;
+ bool is_sleepable;
attach_fn_t attach_fn;
};
@@ -217,20 +218,45 @@ struct bpf_sec_def {
* linux/filter.h.
*/
struct bpf_program {
- /* Index in elf obj file, for relocation use. */
- int idx;
- char *name;
- int prog_ifindex;
- char *section_name;
const struct bpf_sec_def *sec_def;
- /* section_name with / replaced by _; makes recursive pinning
+ char *sec_name;
+ size_t sec_idx;
+ /* this program's instruction offset (in number of instructions)
+ * within its containing ELF section
+ */
+ size_t sec_insn_off;
+ /* number of original instructions in ELF section belonging to this
+ * program, not taking into account subprogram instructions possible
+ * appended later during relocation
+ */
+ size_t sec_insn_cnt;
+ /* Offset (in number of instructions) of the start of instruction
+ * belonging to this BPF program within its containing main BPF
+ * program. For the entry-point (main) BPF program, this is always
+ * zero. For a sub-program, this gets reset before each of main BPF
+ * programs are processed and relocated and is used to determined
+ * whether sub-program was already appended to the main program, and
+ * if yes, at which instruction offset.
+ */
+ size_t sub_insn_off;
+
+ char *name;
+ /* sec_name with / replaced by _; makes recursive pinning
* in bpf_object__pin_programs easier
*/
char *pin_name;
+
+ /* instructions that belong to BPF program; insns[0] is located at
+ * sec_insn_off instruction within its ELF section in ELF file, so
+ * when mapping ELF file instruction index to the local instruction,
+ * one needs to subtract sec_insn_off; and vice versa.
+ */
struct bpf_insn *insns;
- size_t insns_cnt, main_prog_cnt;
- enum bpf_prog_type type;
- bool load;
+ /* actual number of instruction in this BPF program's image; for
+ * entry-point BPF programs this includes the size of main program
+ * itself plus all the used sub-programs, appended at the end
+ */
+ size_t insns_cnt;
struct reloc_desc *reloc_desc;
int nr_reloc;
@@ -246,15 +272,16 @@ struct bpf_program {
void *priv;
bpf_program_clear_priv_t clear_priv;
+ bool load;
+ enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
+ int prog_ifindex;
__u32 attach_btf_id;
__u32 attach_prog_fd;
void *func_info;
__u32 func_info_rec_size;
__u32 func_info_cnt;
- struct bpf_capabilities *caps;
-
void *line_info;
__u32 line_info_rec_size;
__u32 line_info_cnt;
@@ -363,6 +390,12 @@ struct extern_desc {
} kcfg;
struct {
unsigned long long addr;
+
+ /* target btf_id of the corresponding kernel var. */
+ int vmlinux_btf_id;
+
+ /* local btf_id of the ksym extern's type. */
+ __u32 type_id;
} ksym;
};
};
@@ -384,9 +417,10 @@ struct bpf_object {
struct extern_desc *externs;
int nr_extern;
int kconfig_map_idx;
+ int rodata_map_idx;
bool loaded;
- bool has_pseudo_calls;
+ bool has_subcalls;
/*
* Information when doing elf related work. Only valid if fd
@@ -403,6 +437,7 @@ struct bpf_object {
Elf_Data *rodata;
Elf_Data *bss;
Elf_Data *st_ops_data;
+ size_t shstrndx; /* section index for section name strings */
size_t strtabidx;
struct {
GElf_Shdr shdr;
@@ -436,12 +471,20 @@ struct bpf_object {
void *priv;
bpf_object_clear_priv_t clear_priv;
- struct bpf_capabilities caps;
-
char path[];
};
#define obj_elf_valid(o) ((o)->efile.elf)
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
+ size_t off, __u32 sym_type, GElf_Sym *sym);
+
void bpf_program__unload(struct bpf_program *prog)
{
int i;
@@ -481,159 +524,160 @@ static void bpf_program__exit(struct bpf_program *prog)
bpf_program__unload(prog);
zfree(&prog->name);
- zfree(&prog->section_name);
+ zfree(&prog->sec_name);
zfree(&prog->pin_name);
zfree(&prog->insns);
zfree(&prog->reloc_desc);
prog->nr_reloc = 0;
prog->insns_cnt = 0;
- prog->idx = -1;
+ prog->sec_idx = -1;
}
static char *__bpf_program__pin_name(struct bpf_program *prog)
{
char *name, *p;
- name = p = strdup(prog->section_name);
+ name = p = strdup(prog->sec_name);
while ((p = strchr(p, '/')))
*p = '_';
return name;
}
+static bool insn_is_subprog_call(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_JMP &&
+ BPF_OP(insn->code) == BPF_CALL &&
+ BPF_SRC(insn->code) == BPF_K &&
+ insn->src_reg == BPF_PSEUDO_CALL &&
+ insn->dst_reg == 0 &&
+ insn->off == 0;
+}
+
static int
-bpf_program__init(void *data, size_t size, char *section_name, int idx,
- struct bpf_program *prog)
+bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
+ const char *name, size_t sec_idx, const char *sec_name,
+ size_t sec_off, void *insn_data, size_t insn_data_sz)
{
- const size_t bpf_insn_sz = sizeof(struct bpf_insn);
+ int i;
- if (size == 0 || size % bpf_insn_sz) {
- pr_warn("corrupted section '%s', size: %zu\n",
- section_name, size);
+ if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
+ pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
+ sec_name, name, sec_off, insn_data_sz);
return -EINVAL;
}
memset(prog, 0, sizeof(*prog));
+ prog->obj = obj;
+
+ prog->sec_idx = sec_idx;
+ prog->sec_insn_off = sec_off / BPF_INSN_SZ;
+ prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
+ /* insns_cnt can later be increased by appending used subprograms */
+ prog->insns_cnt = prog->sec_insn_cnt;
- prog->section_name = strdup(section_name);
- if (!prog->section_name) {
- pr_warn("failed to alloc name for prog under section(%d) %s\n",
- idx, section_name);
+ prog->type = BPF_PROG_TYPE_UNSPEC;
+ prog->load = true;
+
+ prog->instances.fds = NULL;
+ prog->instances.nr = -1;
+
+ prog->sec_name = strdup(sec_name);
+ if (!prog->sec_name)
+ goto errout;
+
+ prog->name = strdup(name);
+ if (!prog->name)
goto errout;
- }
prog->pin_name = __bpf_program__pin_name(prog);
- if (!prog->pin_name) {
- pr_warn("failed to alloc pin name for prog under section(%d) %s\n",
- idx, section_name);
+ if (!prog->pin_name)
goto errout;
- }
- prog->insns = malloc(size);
- if (!prog->insns) {
- pr_warn("failed to alloc insns for prog under section %s\n",
- section_name);
+ prog->insns = malloc(insn_data_sz);
+ if (!prog->insns)
goto errout;
+ memcpy(prog->insns, insn_data, insn_data_sz);
+
+ for (i = 0; i < prog->insns_cnt; i++) {
+ if (insn_is_subprog_call(&prog->insns[i])) {
+ obj->has_subcalls = true;
+ break;
+ }
}
- prog->insns_cnt = size / bpf_insn_sz;
- memcpy(prog->insns, data, size);
- prog->idx = idx;
- prog->instances.fds = NULL;
- prog->instances.nr = -1;
- prog->type = BPF_PROG_TYPE_UNSPEC;
- prog->load = true;
return 0;
errout:
+ pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
bpf_program__exit(prog);
return -ENOMEM;
}
static int
-bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
- char *section_name, int idx)
+bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
+ const char *sec_name, int sec_idx)
{
- struct bpf_program prog, *progs;
+ struct bpf_program *prog, *progs;
+ void *data = sec_data->d_buf;
+ size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
int nr_progs, err;
+ const char *name;
+ GElf_Sym sym;
- err = bpf_program__init(data, size, section_name, idx, &prog);
- if (err)
- return err;
-
- prog.caps = &obj->caps;
progs = obj->programs;
nr_progs = obj->nr_programs;
+ sec_off = 0;
- progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
- if (!progs) {
- /*
- * In this case the original obj->programs
- * is still valid, so don't need special treat for
- * bpf_close_object().
- */
- pr_warn("failed to alloc a new program under section '%s'\n",
- section_name);
- bpf_program__exit(&prog);
- return -ENOMEM;
- }
-
- pr_debug("found program %s\n", prog.section_name);
- obj->programs = progs;
- obj->nr_programs = nr_progs + 1;
- prog.obj = obj;
- progs[nr_progs] = prog;
- return 0;
-}
-
-static int
-bpf_object__init_prog_names(struct bpf_object *obj)
-{
- Elf_Data *symbols = obj->efile.symbols;
- struct bpf_program *prog;
- size_t pi, si;
+ while (sec_off < sec_sz) {
+ if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
+ pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
+ sec_name, sec_off);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
- for (pi = 0; pi < obj->nr_programs; pi++) {
- const char *name = NULL;
+ prog_sz = sym.st_size;
- prog = &obj->programs[pi];
+ name = elf_sym_str(obj, sym.st_name);
+ if (!name) {
+ pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
+ sec_name, sec_off);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
- for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
- si++) {
- GElf_Sym sym;
+ if (sec_off + prog_sz > sec_sz) {
+ pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
+ sec_name, sec_off);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
- if (!gelf_getsym(symbols, si, &sym))
- continue;
- if (sym.st_shndx != prog->idx)
- continue;
- if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
- continue;
+ pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
+ sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
- name = elf_strptr(obj->efile.elf,
- obj->efile.strtabidx,
- sym.st_name);
- if (!name) {
- pr_warn("failed to get sym name string for prog %s\n",
- prog->section_name);
- return -LIBBPF_ERRNO__LIBELF;
- }
+ progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
+ if (!progs) {
+ /*
+ * In this case the original obj->programs
+ * is still valid, so don't need special treat for
+ * bpf_close_object().
+ */
+ pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
+ sec_name, name);
+ return -ENOMEM;
}
+ obj->programs = progs;
- if (!name && prog->idx == obj->efile.text_shndx)
- name = ".text";
+ prog = &progs[nr_progs];
- if (!name) {
- pr_warn("failed to find sym for prog %s\n",
- prog->section_name);
- return -EINVAL;
- }
+ err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
+ sec_off, data + sec_off, prog_sz);
+ if (err)
+ return err;
- prog->name = strdup(name);
- if (!prog->name) {
- pr_warn("failed to allocate memory for prog sym %s\n",
- name);
- return -ENOMEM;
- }
+ nr_progs++;
+ obj->nr_programs = nr_progs;
+
+ sec_off += prog_sz;
}
return 0;
@@ -1035,6 +1079,7 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.bss_shndx = -1;
obj->efile.st_ops_shndx = -1;
obj->kconfig_map_idx = -1;
+ obj->rodata_map_idx = -1;
obj->kern_version = get_kernel_version();
obj->loaded = false;
@@ -1066,13 +1111,18 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
obj->efile.obj_buf_sz = 0;
}
+/* if libelf is old and doesn't support mmap(), fall back to read() */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
static int bpf_object__elf_init(struct bpf_object *obj)
{
int err = 0;
GElf_Ehdr *ep;
if (obj_elf_valid(obj)) {
- pr_warn("elf init: internal error\n");
+ pr_warn("elf: init internal error\n");
return -LIBBPF_ERRNO__LIBELF;
}
@@ -1090,31 +1140,44 @@ static int bpf_object__elf_init(struct bpf_object *obj)
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
- pr_warn("failed to open %s: %s\n", obj->path, cp);
+ pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
return err;
}
- obj->efile.elf = elf_begin(obj->efile.fd,
- LIBBPF_ELF_C_READ_MMAP, NULL);
+ obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
}
if (!obj->efile.elf) {
- pr_warn("failed to open %s as ELF file\n", obj->path);
+ pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
err = -LIBBPF_ERRNO__LIBELF;
goto errout;
}
if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
- pr_warn("failed to get EHDR from %s\n", obj->path);
+ pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
ep = &obj->efile.ehdr;
+ if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
+ pr_warn("elf: failed to get section names section index for %s: %s\n",
+ obj->path, elf_errmsg(-1));
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
+ }
+
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
+ pr_warn("elf: failed to get section names strings from %s: %s\n",
+ obj->path, elf_errmsg(-1));
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
/* Old LLVM set e_machine to EM_NONE */
if (ep->e_type != ET_REL ||
(ep->e_machine && ep->e_machine != EM_BPF)) {
- pr_warn("%s is not an eBPF object file\n", obj->path);
+ pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
@@ -1136,7 +1199,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
#else
# error "Unrecognized __BYTE_ORDER__"
#endif
- pr_warn("endianness mismatch.\n");
+ pr_warn("elf: endianness mismatch in %s.\n", obj->path);
return -LIBBPF_ERRNO__ENDIAN;
}
@@ -1171,55 +1234,10 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
return false;
}
-static int bpf_object_search_section_size(const struct bpf_object *obj,
- const char *name, size_t *d_size)
-{
- const GElf_Ehdr *ep = &obj->efile.ehdr;
- Elf *elf = obj->efile.elf;
- Elf_Scn *scn = NULL;
- int idx = 0;
-
- while ((scn = elf_nextscn(elf, scn)) != NULL) {
- const char *sec_name;
- Elf_Data *data;
- GElf_Shdr sh;
-
- idx++;
- if (gelf_getshdr(scn, &sh) != &sh) {
- pr_warn("failed to get section(%d) header from %s\n",
- idx, obj->path);
- return -EIO;
- }
-
- sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
- if (!sec_name) {
- pr_warn("failed to get section(%d) name from %s\n",
- idx, obj->path);
- return -EIO;
- }
-
- if (strcmp(name, sec_name))
- continue;
-
- data = elf_getdata(scn, 0);
- if (!data) {
- pr_warn("failed to get section(%d) data from %s(%s)\n",
- idx, name, obj->path);
- return -EIO;
- }
-
- *d_size = data->d_size;
- return 0;
- }
-
- return -ENOENT;
-}
-
int bpf_object__section_size(const struct bpf_object *obj, const char *name,
__u32 *size)
{
int ret = -ENOENT;
- size_t d_size;
*size = 0;
if (!name) {
@@ -1237,9 +1255,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
if (obj->efile.st_ops_data)
*size = obj->efile.st_ops_data->d_size;
} else {
- ret = bpf_object_search_section_size(obj, name, &d_size);
- if (!ret)
- *size = d_size;
+ Elf_Scn *scn = elf_sec_by_name(obj, name);
+ Elf_Data *data = elf_sec_data(obj, scn);
+
+ if (data) {
+ ret = 0; /* found it */
+ *size = data->d_size;
+ }
}
return *size ? 0 : ret;
@@ -1264,8 +1286,7 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
continue;
- sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name);
+ sname = elf_sym_str(obj, sym.st_name);
if (!sname) {
pr_warn("failed to get sym name string for var %s\n",
name);
@@ -1290,7 +1311,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
return &obj->maps[obj->nr_maps++];
new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
- new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
+ new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
if (!new_maps) {
pr_warn("alloc maps for object failed\n");
return ERR_PTR(-ENOMEM);
@@ -1417,6 +1438,8 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
obj->efile.rodata->d_size);
if (err)
return err;
+
+ obj->rodata_map_idx = obj->nr_maps - 1;
}
if (obj->efile.bss_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
@@ -1742,12 +1765,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
if (!symbols)
return -EINVAL;
- scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
- if (scn)
- data = elf_getdata(scn, NULL);
+
+ scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
+ data = elf_sec_data(obj, scn);
if (!scn || !data) {
- pr_warn("failed to get Elf_Data from map section %d\n",
- obj->efile.maps_shndx);
+ pr_warn("elf: failed to get legacy map definitions for %s\n",
+ obj->path);
return -EINVAL;
}
@@ -1769,12 +1792,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
nr_maps++;
}
/* Assume equally sized map definitions */
- pr_debug("maps in %s: %d maps in %zd bytes\n",
- obj->path, nr_maps, data->d_size);
+ pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
+ nr_maps, data->d_size, obj->path);
if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
- pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n",
- obj->path, nr_maps, data->d_size);
+ pr_warn("elf: unable to determine legacy map definition size in %s\n",
+ obj->path);
return -EINVAL;
}
map_def_sz = data->d_size / nr_maps;
@@ -1795,8 +1818,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
if (IS_ERR(map))
return PTR_ERR(map);
- map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name);
+ map_name = elf_sym_str(obj, sym.st_name);
if (!map_name) {
pr_warn("failed to get map #%d name sym string for obj %s\n",
i, obj->path);
@@ -1884,6 +1906,29 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
return btf_is_func_proto(t) ? t : NULL;
}
+static const char *btf_kind_str(const struct btf_type *t)
+{
+ switch (btf_kind(t)) {
+ case BTF_KIND_UNKN: return "void";
+ case BTF_KIND_INT: return "int";
+ case BTF_KIND_PTR: return "ptr";
+ case BTF_KIND_ARRAY: return "array";
+ case BTF_KIND_STRUCT: return "struct";
+ case BTF_KIND_UNION: return "union";
+ case BTF_KIND_ENUM: return "enum";
+ case BTF_KIND_FWD: return "fwd";
+ case BTF_KIND_TYPEDEF: return "typedef";
+ case BTF_KIND_VOLATILE: return "volatile";
+ case BTF_KIND_CONST: return "const";
+ case BTF_KIND_RESTRICT: return "restrict";
+ case BTF_KIND_FUNC: return "func";
+ case BTF_KIND_FUNC_PROTO: return "func_proto";
+ case BTF_KIND_VAR: return "var";
+ case BTF_KIND_DATASEC: return "datasec";
+ default: return "unknown";
+ }
+}
+
/*
* Fetch integer attribute of BTF map definition. Such attributes are
* represented using a pointer to an array, in which dimensionality of array
@@ -1900,8 +1945,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
const struct btf_type *arr_t;
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': attr '%s': expected PTR, got %u.\n",
- map_name, name, btf_kind(t));
+ pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
+ map_name, name, btf_kind_str(t));
return false;
}
@@ -1912,8 +1957,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
return false;
}
if (!btf_is_array(arr_t)) {
- pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n",
- map_name, name, btf_kind(arr_t));
+ pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
+ map_name, name, btf_kind_str(arr_t));
return false;
}
arr_info = btf_array(arr_t);
@@ -1924,7 +1969,7 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
static int build_map_pin_path(struct bpf_map *map, const char *path)
{
char buf[PATH_MAX];
- int err, len;
+ int len;
if (!path)
path = "/sys/fs/bpf";
@@ -1935,11 +1980,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
else if (len >= PATH_MAX)
return -ENAMETOOLONG;
- err = bpf_map__set_pin_path(map, buf);
- if (err)
- return err;
-
- return 0;
+ return bpf_map__set_pin_path(map, buf);
}
@@ -2007,8 +2048,8 @@ static int parse_btf_map_def(struct bpf_object *obj,
return -EINVAL;
}
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': key spec is not PTR: %u.\n",
- map->name, btf_kind(t));
+ pr_warn("map '%s': key spec is not PTR: %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
@@ -2049,8 +2090,8 @@ static int parse_btf_map_def(struct bpf_object *obj,
return -EINVAL;
}
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': value spec is not PTR: %u.\n",
- map->name, btf_kind(t));
+ pr_warn("map '%s': value spec is not PTR: %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
@@ -2107,14 +2148,14 @@ static int parse_btf_map_def(struct bpf_object *obj,
t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
NULL);
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
- map->name, btf_kind(t));
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
if (!btf_is_struct(t)) {
- pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
- map->name, btf_kind(t));
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
@@ -2205,8 +2246,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
return -EINVAL;
}
if (!btf_is_var(var)) {
- pr_warn("map '%s': unexpected var kind %u.\n",
- map_name, btf_kind(var));
+ pr_warn("map '%s': unexpected var kind %s.\n",
+ map_name, btf_kind_str(var));
return -EINVAL;
}
if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
@@ -2218,8 +2259,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
if (!btf_is_struct(def)) {
- pr_warn("map '%s': unexpected def kind %u.\n",
- map_name, btf_kind(var));
+ pr_warn("map '%s': unexpected def kind %s.\n",
+ map_name, btf_kind_str(var));
return -EINVAL;
}
if (def->size > vi->size) {
@@ -2259,12 +2300,11 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
if (obj->efile.btf_maps_shndx < 0)
return 0;
- scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
- if (scn)
- data = elf_getdata(scn, NULL);
+ scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
+ data = elf_sec_data(obj, scn);
if (!scn || !data) {
- pr_warn("failed to get Elf_Data from map section %d (%s)\n",
- obj->efile.btf_maps_shndx, MAPS_ELF_SEC);
+ pr_warn("elf: failed to get %s map definitions for %s\n",
+ MAPS_ELF_SEC, obj->path);
return -EINVAL;
}
@@ -2322,36 +2362,28 @@ static int bpf_object__init_maps(struct bpf_object *obj,
static bool section_have_execinstr(struct bpf_object *obj, int idx)
{
- Elf_Scn *scn;
GElf_Shdr sh;
- scn = elf_getscn(obj->efile.elf, idx);
- if (!scn)
+ if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
return false;
- if (gelf_getshdr(scn, &sh) != &sh)
- return false;
-
- if (sh.sh_flags & SHF_EXECINSTR)
- return true;
-
- return false;
+ return sh.sh_flags & SHF_EXECINSTR;
}
static bool btf_needs_sanitization(struct bpf_object *obj)
{
- bool has_func_global = obj->caps.btf_func_global;
- bool has_datasec = obj->caps.btf_datasec;
- bool has_func = obj->caps.btf_func;
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
return !has_func || !has_datasec || !has_func_global;
}
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
- bool has_func_global = obj->caps.btf_func_global;
- bool has_datasec = obj->caps.btf_datasec;
- bool has_func = obj->caps.btf_func;
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
struct btf_type *t;
int i, j, vlen;
@@ -2496,12 +2528,23 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
{
bool need_vmlinux_btf = false;
struct bpf_program *prog;
- int err;
+ int i, err;
/* CO-RE relocations need kernel BTF */
- if (obj->btf_ext && obj->btf_ext->field_reloc_info.len)
+ if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
need_vmlinux_btf = true;
+ /* Support for typed ksyms needs kernel BTF */
+ for (i = 0; i < obj->nr_extern; i++) {
+ const struct extern_desc *ext;
+
+ ext = &obj->externs[i];
+ if (ext->type == EXT_KSYM && ext->ksym.type_id) {
+ need_vmlinux_btf = true;
+ break;
+ }
+ }
+
bpf_object__for_each_program(prog, obj) {
if (!prog->load)
continue;
@@ -2533,6 +2576,15 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
if (!obj->btf)
return 0;
+ if (!kernel_supports(FEAT_BTF)) {
+ if (kernel_needs_btf(obj)) {
+ err = -EOPNOTSUPP;
+ goto report;
+ }
+ pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
+ return 0;
+ }
+
sanitize = btf_needs_sanitization(obj);
if (sanitize) {
const void *raw_data;
@@ -2558,6 +2610,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
}
btf__free(kern_btf);
}
+report:
if (err) {
btf_mandatory = kernel_needs_btf(obj);
pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
@@ -2569,61 +2622,255 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
return err;
}
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
+{
+ const char *name;
+
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
+ if (!name) {
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+ off, obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return name;
+}
+
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
+{
+ const char *name;
+
+ name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
+ if (!name) {
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+ off, obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return name;
+}
+
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
+{
+ Elf_Scn *scn;
+
+ scn = elf_getscn(obj->efile.elf, idx);
+ if (!scn) {
+ pr_warn("elf: failed to get section(%zu) from %s: %s\n",
+ idx, obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+ return scn;
+}
+
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
+{
+ Elf_Scn *scn = NULL;
+ Elf *elf = obj->efile.elf;
+ const char *sec_name;
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ sec_name = elf_sec_name(obj, scn);
+ if (!sec_name)
+ return NULL;
+
+ if (strcmp(sec_name, name) != 0)
+ continue;
+
+ return scn;
+ }
+ return NULL;
+}
+
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
+{
+ if (!scn)
+ return -EINVAL;
+
+ if (gelf_getshdr(scn, hdr) != hdr) {
+ pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
+{
+ const char *name;
+ GElf_Shdr sh;
+
+ if (!scn)
+ return NULL;
+
+ if (elf_sec_hdr(obj, scn, &sh))
+ return NULL;
+
+ name = elf_sec_str(obj, sh.sh_name);
+ if (!name) {
+ pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return name;
+}
+
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
+{
+ Elf_Data *data;
+
+ if (!scn)
+ return NULL;
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
+ elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
+ obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return data;
+}
+
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
+ size_t off, __u32 sym_type, GElf_Sym *sym)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ size_t n = symbols->d_size / sizeof(GElf_Sym);
+ int i;
+
+ for (i = 0; i < n; i++) {
+ if (!gelf_getsym(symbols, i, sym))
+ continue;
+ if (sym->st_shndx != sec_idx || sym->st_value != off)
+ continue;
+ if (GELF_ST_TYPE(sym->st_info) != sym_type)
+ continue;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static bool is_sec_name_dwarf(const char *name)
+{
+ /* approximation, but the actual list is too long */
+ return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+}
+
+static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
+{
+ /* no special handling of .strtab */
+ if (hdr->sh_type == SHT_STRTAB)
+ return true;
+
+ /* ignore .llvm_addrsig section as well */
+ if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
+ return true;
+
+ /* no subprograms will lead to an empty .text section, ignore it */
+ if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
+ strcmp(name, ".text") == 0)
+ return true;
+
+ /* DWARF sections */
+ if (is_sec_name_dwarf(name))
+ return true;
+
+ if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+ name += sizeof(".rel") - 1;
+ /* DWARF section relocations */
+ if (is_sec_name_dwarf(name))
+ return true;
+
+ /* .BTF and .BTF.ext don't need relocations */
+ if (strcmp(name, BTF_ELF_SEC) == 0 ||
+ strcmp(name, BTF_EXT_ELF_SEC) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+static int cmp_progs(const void *_a, const void *_b)
+{
+ const struct bpf_program *a = _a;
+ const struct bpf_program *b = _b;
+
+ if (a->sec_idx != b->sec_idx)
+ return a->sec_idx < b->sec_idx ? -1 : 1;
+
+ /* sec_insn_off can't be the same within the section */
+ return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
+}
+
static int bpf_object__elf_collect(struct bpf_object *obj)
{
Elf *elf = obj->efile.elf;
- GElf_Ehdr *ep = &obj->efile.ehdr;
Elf_Data *btf_ext_data = NULL;
Elf_Data *btf_data = NULL;
- Elf_Scn *scn = NULL;
int idx = 0, err = 0;
+ const char *name;
+ Elf_Data *data;
+ Elf_Scn *scn;
+ GElf_Shdr sh;
- /* Elf is corrupted/truncated, avoid calling elf_strptr. */
- if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
- pr_warn("failed to get e_shstrndx from %s\n", obj->path);
- return -LIBBPF_ERRNO__FORMAT;
+ /* a bunch of ELF parsing functionality depends on processing symbols,
+ * so do the first pass and find the symbol table
+ */
+ scn = NULL;
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ if (elf_sec_hdr(obj, scn, &sh))
+ return -LIBBPF_ERRNO__FORMAT;
+
+ if (sh.sh_type == SHT_SYMTAB) {
+ if (obj->efile.symbols) {
+ pr_warn("elf: multiple symbol tables in %s\n", obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ data = elf_sec_data(obj, scn);
+ if (!data)
+ return -LIBBPF_ERRNO__FORMAT;
+
+ obj->efile.symbols = data;
+ obj->efile.symbols_shndx = elf_ndxscn(scn);
+ obj->efile.strtabidx = sh.sh_link;
+ }
}
+ scn = NULL;
while ((scn = elf_nextscn(elf, scn)) != NULL) {
- char *name;
- GElf_Shdr sh;
- Elf_Data *data;
-
idx++;
- if (gelf_getshdr(scn, &sh) != &sh) {
- pr_warn("failed to get section(%d) header from %s\n",
- idx, obj->path);
+
+ if (elf_sec_hdr(obj, scn, &sh))
return -LIBBPF_ERRNO__FORMAT;
- }
- name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
- if (!name) {
- pr_warn("failed to get section(%d) name from %s\n",
- idx, obj->path);
+ name = elf_sec_str(obj, sh.sh_name);
+ if (!name)
return -LIBBPF_ERRNO__FORMAT;
- }
- data = elf_getdata(scn, 0);
- if (!data) {
- pr_warn("failed to get section(%d) data from %s(%s)\n",
- idx, name, obj->path);
+ if (ignore_elf_section(&sh, name))
+ continue;
+
+ data = elf_sec_data(obj, scn);
+ if (!data)
return -LIBBPF_ERRNO__FORMAT;
- }
- pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+
+ pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
idx, name, (unsigned long)data->d_size,
(int)sh.sh_link, (unsigned long)sh.sh_flags,
(int)sh.sh_type);
if (strcmp(name, "license") == 0) {
- err = bpf_object__init_license(obj,
- data->d_buf,
- data->d_size);
+ err = bpf_object__init_license(obj, data->d_buf, data->d_size);
if (err)
return err;
} else if (strcmp(name, "version") == 0) {
- err = bpf_object__init_kversion(obj,
- data->d_buf,
- data->d_size);
+ err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
if (err)
return err;
} else if (strcmp(name, "maps") == 0) {
@@ -2635,31 +2882,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
btf_ext_data = data;
} else if (sh.sh_type == SHT_SYMTAB) {
- if (obj->efile.symbols) {
- pr_warn("bpf: multiple SYMTAB in %s\n",
- obj->path);
- return -LIBBPF_ERRNO__FORMAT;
- }
- obj->efile.symbols = data;
- obj->efile.symbols_shndx = idx;
- obj->efile.strtabidx = sh.sh_link;
+ /* already processed during the first pass above */
} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
if (sh.sh_flags & SHF_EXECINSTR) {
if (strcmp(name, ".text") == 0)
obj->efile.text_shndx = idx;
- err = bpf_object__add_program(obj, data->d_buf,
- data->d_size,
- name, idx);
- if (err) {
- char errmsg[STRERR_BUFSIZE];
- char *cp;
-
- cp = libbpf_strerror_r(-err, errmsg,
- sizeof(errmsg));
- pr_warn("failed to alloc program %s (%s): %s",
- name, obj->path, cp);
+ err = bpf_object__add_programs(obj, data, name, idx);
+ if (err)
return err;
- }
} else if (strcmp(name, DATA_SEC) == 0) {
obj->efile.data = data;
obj->efile.data_shndx = idx;
@@ -2670,7 +2900,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
obj->efile.st_ops_data = data;
obj->efile.st_ops_shndx = idx;
} else {
- pr_debug("skip section(%d) %s\n", idx, name);
+ pr_info("elf: skipping unrecognized data section(%d) %s\n",
+ idx, name);
}
} else if (sh.sh_type == SHT_REL) {
int nr_sects = obj->efile.nr_reloc_sects;
@@ -2681,36 +2912,40 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (!section_have_execinstr(obj, sec) &&
strcmp(name, ".rel" STRUCT_OPS_SEC) &&
strcmp(name, ".rel" MAPS_ELF_SEC)) {
- pr_debug("skip relo %s(%d) for section(%d)\n",
- name, idx, sec);
+ pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
+ idx, name, sec,
+ elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
continue;
}
- sects = reallocarray(sects, nr_sects + 1,
- sizeof(*obj->efile.reloc_sects));
- if (!sects) {
- pr_warn("reloc_sects realloc failed\n");
+ sects = libbpf_reallocarray(sects, nr_sects + 1,
+ sizeof(*obj->efile.reloc_sects));
+ if (!sects)
return -ENOMEM;
- }
obj->efile.reloc_sects = sects;
obj->efile.nr_reloc_sects++;
obj->efile.reloc_sects[nr_sects].shdr = sh;
obj->efile.reloc_sects[nr_sects].data = data;
- } else if (sh.sh_type == SHT_NOBITS &&
- strcmp(name, BSS_SEC) == 0) {
+ } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
obj->efile.bss = data;
obj->efile.bss_shndx = idx;
} else {
- pr_debug("skip section(%d) %s\n", idx, name);
+ pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
+ (size_t)sh.sh_size);
}
}
if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
- pr_warn("Corrupted ELF file: index of strtab invalid\n");
+ pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
+
+ /* sort BPF programs by section name and in-section instruction offset
+ * for faster search */
+ qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
+
return bpf_object__init_btf(obj, btf_data, btf_ext_data);
}
@@ -2869,14 +3104,13 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
if (!obj->efile.symbols)
return 0;
- scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
- if (!scn)
+ scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
+ if (elf_sec_hdr(obj, scn, &sh))
return -LIBBPF_ERRNO__FORMAT;
- if (gelf_getshdr(scn, &sh) != &sh)
- return -LIBBPF_ERRNO__FORMAT;
- n = sh.sh_size / sh.sh_entsize;
+ n = sh.sh_size / sh.sh_entsize;
pr_debug("looking for externs among %d symbols...\n", n);
+
for (i = 0; i < n; i++) {
GElf_Sym sym;
@@ -2884,13 +3118,12 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -LIBBPF_ERRNO__FORMAT;
if (!sym_is_extern(&sym))
continue;
- ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name);
+ ext_name = elf_sym_str(obj, sym.st_name);
if (!ext_name || !ext_name[0])
continue;
ext = obj->externs;
- ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
+ ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
if (!ext)
return -ENOMEM;
obj->externs = ext;
@@ -2940,16 +3173,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -ENOTSUP;
}
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
- const struct btf_type *vt;
-
ksym_sec = sec;
ext->type = EXT_KSYM;
-
- vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
- if (!btf_is_void(vt)) {
- pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
- return -ENOTSUP;
- }
+ skip_mods_and_typedefs(obj->btf, t->type,
+ &ext->ksym.type_id);
} else {
pr_warn("unrecognized extern section '%s'\n", sec_name);
return -ENOTSUP;
@@ -3037,20 +3264,6 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return 0;
}
-static struct bpf_program *
-bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
-{
- struct bpf_program *prog;
- size_t i;
-
- for (i = 0; i < obj->nr_programs; i++) {
- prog = &obj->programs[i];
- if (prog->idx == idx)
- return prog;
- }
- return NULL;
-}
-
struct bpf_program *
bpf_object__find_program_by_title(const struct bpf_object *obj,
const char *title)
@@ -3058,12 +3271,18 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
struct bpf_program *pos;
bpf_object__for_each_program(pos, obj) {
- if (pos->section_name && !strcmp(pos->section_name, title))
+ if (pos->sec_name && !strcmp(pos->sec_name, title))
return pos;
}
return NULL;
}
+static bool prog_is_subprog(const struct bpf_object *obj,
+ const struct bpf_program *prog)
+{
+ return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls;
+}
+
struct bpf_program *
bpf_object__find_program_by_name(const struct bpf_object *obj,
const char *name)
@@ -3071,6 +3290,8 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,
struct bpf_program *prog;
bpf_object__for_each_program(prog, obj) {
+ if (prog_is_subprog(obj, prog))
+ continue;
if (!strcmp(prog->name, name))
return prog;
}
@@ -3109,7 +3330,7 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
static int bpf_program__record_reloc(struct bpf_program *prog,
struct reloc_desc *reloc_desc,
- __u32 insn_idx, const char *name,
+ __u32 insn_idx, const char *sym_name,
const GElf_Sym *sym, const GElf_Rel *rel)
{
struct bpf_insn *insn = &prog->insns[insn_idx];
@@ -3117,34 +3338,38 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
struct bpf_object *obj = prog->obj;
__u32 shdr_idx = sym->st_shndx;
enum libbpf_map_type type;
+ const char *sym_sec_name;
struct bpf_map *map;
+ reloc_desc->processed = false;
+
/* sub-program call relocation */
if (insn->code == (BPF_JMP | BPF_CALL)) {
if (insn->src_reg != BPF_PSEUDO_CALL) {
- pr_warn("incorrect bpf_call opcode\n");
+ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
return -LIBBPF_ERRNO__RELOC;
}
/* text_shndx can be 0, if no default "main" program exists */
if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
- pr_warn("bad call relo against section %u\n", shdr_idx);
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+ pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
+ prog->name, sym_name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
- if (sym->st_value % 8) {
- pr_warn("bad call relo offset: %zu\n",
- (size_t)sym->st_value);
+ if (sym->st_value % BPF_INSN_SZ) {
+ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+ prog->name, sym_name, (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_CALL;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = sym->st_value;
- obj->has_pseudo_calls = true;
return 0;
}
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
- pr_warn("invalid relo for insns[%d].code 0x%x\n",
- insn_idx, insn->code);
+ pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
+ prog->name, sym_name, insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
}
@@ -3159,12 +3384,12 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
break;
}
if (i >= n) {
- pr_warn("extern relo failed to find extern for sym %d\n",
- sym_idx);
+ pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
+ prog->name, sym_name, sym_idx);
return -LIBBPF_ERRNO__RELOC;
}
- pr_debug("found extern #%d '%s' (sym %d) for insn %u\n",
- i, ext->name, ext->sym_idx, insn_idx);
+ pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
+ prog->name, i, ext->name, ext->sym_idx, insn_idx);
reloc_desc->type = RELO_EXTERN;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = i; /* sym_off stores extern index */
@@ -3172,18 +3397,19 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
}
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
- pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
- name, shdr_idx);
+ pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
+ prog->name, sym_name, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
/* generic map reference relocation */
if (type == LIBBPF_MAP_UNSPEC) {
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
- pr_warn("bad map relo against section %u\n",
- shdr_idx);
+ pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
+ prog->name, sym_name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
@@ -3192,14 +3418,14 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
map->sec_idx != sym->st_shndx ||
map->sec_offset != sym->st_value)
continue;
- pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n",
- map_idx, map->name, map->sec_idx,
+ pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
+ prog->name, map_idx, map->name, map->sec_idx,
map->sec_offset, insn_idx);
break;
}
if (map_idx >= nr_maps) {
- pr_warn("map relo failed to find map for sec %u, off %zu\n",
- shdr_idx, (size_t)sym->st_value);
+ pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
+ prog->name, sym_sec_name, (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_LD64;
@@ -3211,21 +3437,22 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
/* global data map relocation */
if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
- pr_warn("bad data relo against section %u\n", shdr_idx);
+ pr_warn("prog '%s': bad data relo against section '%s'\n",
+ prog->name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
map = &obj->maps[map_idx];
if (map->libbpf_type != type)
continue;
- pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n",
- map_idx, map->name, map->sec_idx, map->sec_offset,
- insn_idx);
+ pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
+ prog->name, map_idx, map->name, map->sec_idx,
+ map->sec_offset, insn_idx);
break;
}
if (map_idx >= nr_maps) {
- pr_warn("data relo failed to find map for sec %u\n",
- shdr_idx);
+ pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
+ prog->name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
@@ -3236,55 +3463,113 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return 0;
}
+static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
+{
+ return insn_idx >= prog->sec_insn_off &&
+ insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
+}
+
+static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
+ size_t sec_idx, size_t insn_idx)
+{
+ int l = 0, r = obj->nr_programs - 1, m;
+ struct bpf_program *prog;
+
+ while (l < r) {
+ m = l + (r - l + 1) / 2;
+ prog = &obj->programs[m];
+
+ if (prog->sec_idx < sec_idx ||
+ (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
+ l = m;
+ else
+ r = m - 1;
+ }
+ /* matching program could be at index l, but it still might be the
+ * wrong one, so we need to double check conditions for the last time
+ */
+ prog = &obj->programs[l];
+ if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
+ return prog;
+ return NULL;
+}
+
static int
-bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
- Elf_Data *data, struct bpf_object *obj)
+bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
{
Elf_Data *symbols = obj->efile.symbols;
+ const char *relo_sec_name, *sec_name;
+ size_t sec_idx = shdr->sh_info;
+ struct bpf_program *prog;
+ struct reloc_desc *relos;
int err, i, nrels;
+ const char *sym_name;
+ __u32 insn_idx;
+ GElf_Sym sym;
+ GElf_Rel rel;
- pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
- nrels = shdr->sh_size / shdr->sh_entsize;
+ relo_sec_name = elf_sec_str(obj, shdr->sh_name);
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ if (!relo_sec_name || !sec_name)
+ return -EINVAL;
- prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
- if (!prog->reloc_desc) {
- pr_warn("failed to alloc memory in relocation\n");
- return -ENOMEM;
- }
- prog->nr_reloc = nrels;
+ pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
+ relo_sec_name, sec_idx, sec_name);
+ nrels = shdr->sh_size / shdr->sh_entsize;
for (i = 0; i < nrels; i++) {
- const char *name;
- __u32 insn_idx;
- GElf_Sym sym;
- GElf_Rel rel;
-
if (!gelf_getrel(data, i, &rel)) {
- pr_warn("relocation: failed to get %d reloc\n", i);
+ pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
return -LIBBPF_ERRNO__FORMAT;
}
if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
- pr_warn("relocation: symbol %"PRIx64" not found\n",
- GELF_R_SYM(rel.r_info));
+ pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (rel.r_offset % sizeof(struct bpf_insn))
+ if (rel.r_offset % BPF_INSN_SZ) {
+ pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ insn_idx = rel.r_offset / BPF_INSN_SZ;
+ /* relocations against static functions are recorded as
+ * relocations against the section that contains a function;
+ * in such case, symbol will be STT_SECTION and sym.st_name
+ * will point to empty string (0), so fetch section name
+ * instead
+ */
+ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
+ sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
+ else
+ sym_name = elf_sym_str(obj, sym.st_name);
+ sym_name = sym_name ?: "<?";
- insn_idx = rel.r_offset / sizeof(struct bpf_insn);
- name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name) ? : "<?>";
+ pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
+ relo_sec_name, i, insn_idx, sym_name);
- pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
- (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
- (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info),
- GELF_ST_BIND(sym.st_info), sym.st_name, name,
- insn_idx);
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+ if (!prog) {
+ pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
+ relo_sec_name, i, sec_name, insn_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
- err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
- insn_idx, name, &sym, &rel);
+ relos = libbpf_reallocarray(prog->reloc_desc,
+ prog->nr_reloc + 1, sizeof(*relos));
+ if (!relos)
+ return -ENOMEM;
+ prog->reloc_desc = relos;
+
+ /* adjust insn_idx to local BPF program frame of reference */
+ insn_idx -= prog->sec_insn_off;
+ err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
+ insn_idx, sym_name, &sym, &rel);
if (err)
return err;
+
+ prog->nr_reloc++;
}
return 0;
}
@@ -3433,8 +3718,14 @@ bpf_object__probe_loading(struct bpf_object *obj)
return 0;
}
-static int
-bpf_object__probe_name(struct bpf_object *obj)
+static int probe_fd(int fd)
+{
+ if (fd >= 0)
+ close(fd);
+ return fd >= 0;
+}
+
+static int probe_kern_prog_name(void)
{
struct bpf_load_program_attr attr;
struct bpf_insn insns[] = {
@@ -3452,16 +3743,10 @@ bpf_object__probe_name(struct bpf_object *obj)
attr.license = "GPL";
attr.name = "test";
ret = bpf_load_program_xattr(&attr, NULL, 0);
- if (ret >= 0) {
- obj->caps.name = 1;
- close(ret);
- }
-
- return 0;
+ return probe_fd(ret);
}
-static int
-bpf_object__probe_global_data(struct bpf_object *obj)
+static int probe_kern_global_data(void)
{
struct bpf_load_program_attr prg_attr;
struct bpf_create_map_attr map_attr;
@@ -3498,16 +3783,23 @@ bpf_object__probe_global_data(struct bpf_object *obj)
prg_attr.license = "GPL";
ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
- if (ret >= 0) {
- obj->caps.global_data = 1;
- close(ret);
- }
-
close(map);
- return 0;
+ return probe_fd(ret);
+}
+
+static int probe_kern_btf(void)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_btf_func(struct bpf_object *obj)
+static int probe_kern_btf_func(void)
{
static const char strs[] = "\0int\0x\0a";
/* void x(int a) {} */
@@ -3520,20 +3812,12 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj)
/* FUNC x */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
};
- int btf_fd;
-
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
- if (btf_fd >= 0) {
- obj->caps.btf_func = 1;
- close(btf_fd);
- return 1;
- }
- return 0;
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
+static int probe_kern_btf_func_global(void)
{
static const char strs[] = "\0int\0x\0a";
/* static void x(int a) {} */
@@ -3546,20 +3830,12 @@ static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
/* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
};
- int btf_fd;
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
- if (btf_fd >= 0) {
- obj->caps.btf_func_global = 1;
- close(btf_fd);
- return 1;
- }
-
- return 0;
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
+static int probe_kern_btf_datasec(void)
{
static const char strs[] = "\0x\0.data";
/* static int a; */
@@ -3573,20 +3849,12 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
BTF_VAR_SECINFO_ENC(2, 0, 4),
};
- int btf_fd;
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
- if (btf_fd >= 0) {
- obj->caps.btf_datasec = 1;
- close(btf_fd);
- return 1;
- }
-
- return 0;
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_array_mmap(struct bpf_object *obj)
+static int probe_kern_array_mmap(void)
{
struct bpf_create_map_attr attr = {
.map_type = BPF_MAP_TYPE_ARRAY,
@@ -3595,27 +3863,17 @@ static int bpf_object__probe_array_mmap(struct bpf_object *obj)
.value_size = sizeof(int),
.max_entries = 1,
};
- int fd;
-
- fd = bpf_create_map_xattr(&attr);
- if (fd >= 0) {
- obj->caps.array_mmap = 1;
- close(fd);
- return 1;
- }
- return 0;
+ return probe_fd(bpf_create_map_xattr(&attr));
}
-static int
-bpf_object__probe_exp_attach_type(struct bpf_object *obj)
+static int probe_kern_exp_attach_type(void)
{
struct bpf_load_program_attr attr;
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- int fd;
memset(&attr, 0, sizeof(attr));
/* use any valid combination of program type and (optional)
@@ -3629,36 +3887,140 @@ bpf_object__probe_exp_attach_type(struct bpf_object *obj)
attr.insns_cnt = ARRAY_SIZE(insns);
attr.license = "GPL";
- fd = bpf_load_program_xattr(&attr, NULL, 0);
- if (fd >= 0) {
- obj->caps.exp_attach_type = 1;
- close(fd);
- return 1;
- }
- return 0;
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
}
-static int
-bpf_object__probe_caps(struct bpf_object *obj)
-{
- int (*probe_fn[])(struct bpf_object *obj) = {
- bpf_object__probe_name,
- bpf_object__probe_global_data,
- bpf_object__probe_btf_func,
- bpf_object__probe_btf_func_global,
- bpf_object__probe_btf_datasec,
- bpf_object__probe_array_mmap,
- bpf_object__probe_exp_attach_type,
+static int probe_kern_probe_read_kernel(void)
+{
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
+ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
+ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+ BPF_EXIT_INSN(),
};
- int i, ret;
- for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
- ret = probe_fn[i](obj);
- if (ret < 0)
- pr_debug("Probe #%d failed with %d.\n", i, ret);
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
+
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
+}
+
+static int probe_prog_bind_map(void)
+{
+ struct bpf_load_program_attr prg_attr;
+ struct bpf_create_map_attr map_attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int ret, map, prog;
+
+ memset(&map_attr, 0, sizeof(map_attr));
+ map_attr.map_type = BPF_MAP_TYPE_ARRAY;
+ map_attr.key_size = sizeof(int);
+ map_attr.value_size = 32;
+ map_attr.max_entries = 1;
+
+ map = bpf_create_map_xattr(&map_attr);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
}
- return 0;
+ memset(&prg_attr, 0, sizeof(prg_attr));
+ prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ prg_attr.insns = insns;
+ prg_attr.insns_cnt = ARRAY_SIZE(insns);
+ prg_attr.license = "GPL";
+
+ prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
+ if (prog < 0) {
+ close(map);
+ return 0;
+ }
+
+ ret = bpf_prog_bind_map(prog, map, NULL);
+
+ close(map);
+ close(prog);
+
+ return ret >= 0;
+}
+
+enum kern_feature_result {
+ FEAT_UNKNOWN = 0,
+ FEAT_SUPPORTED = 1,
+ FEAT_MISSING = 2,
+};
+
+typedef int (*feature_probe_fn)(void);
+
+static struct kern_feature_desc {
+ const char *desc;
+ feature_probe_fn probe;
+ enum kern_feature_result res;
+} feature_probes[__FEAT_CNT] = {
+ [FEAT_PROG_NAME] = {
+ "BPF program name", probe_kern_prog_name,
+ },
+ [FEAT_GLOBAL_DATA] = {
+ "global variables", probe_kern_global_data,
+ },
+ [FEAT_BTF] = {
+ "minimal BTF", probe_kern_btf,
+ },
+ [FEAT_BTF_FUNC] = {
+ "BTF functions", probe_kern_btf_func,
+ },
+ [FEAT_BTF_GLOBAL_FUNC] = {
+ "BTF global function", probe_kern_btf_func_global,
+ },
+ [FEAT_BTF_DATASEC] = {
+ "BTF data section and variable", probe_kern_btf_datasec,
+ },
+ [FEAT_ARRAY_MMAP] = {
+ "ARRAY map mmap()", probe_kern_array_mmap,
+ },
+ [FEAT_EXP_ATTACH_TYPE] = {
+ "BPF_PROG_LOAD expected_attach_type attribute",
+ probe_kern_exp_attach_type,
+ },
+ [FEAT_PROBE_READ_KERN] = {
+ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+ },
+ [FEAT_PROG_BIND_MAP] = {
+ "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
+ }
+};
+
+static bool kernel_supports(enum kern_feature_id feat_id)
+{
+ struct kern_feature_desc *feat = &feature_probes[feat_id];
+ int ret;
+
+ if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
+ ret = feat->probe();
+ if (ret > 0) {
+ WRITE_ONCE(feat->res, FEAT_SUPPORTED);
+ } else if (ret == 0) {
+ WRITE_ONCE(feat->res, FEAT_MISSING);
+ } else {
+ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+ WRITE_ONCE(feat->res, FEAT_MISSING);
+ }
+ }
+
+ return READ_ONCE(feat->res) == FEAT_SUPPORTED;
}
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
@@ -3760,7 +4122,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
memset(&create_attr, 0, sizeof(create_attr));
- if (obj->caps.name)
+ if (kernel_supports(FEAT_PROG_NAME))
create_attr.name = map->name;
create_attr.map_ifindex = map->map_ifindex;
create_attr.map_type = def->type;
@@ -3841,6 +4203,36 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
return 0;
}
+static int init_map_slots(struct bpf_map *map)
+{
+ const struct bpf_map *targ_map;
+ unsigned int i;
+ int fd, err;
+
+ for (i = 0; i < map->init_slots_sz; i++) {
+ if (!map->init_slots[i])
+ continue;
+
+ targ_map = map->init_slots[i];
+ fd = bpf_map__fd(targ_map);
+ err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ if (err) {
+ err = -errno;
+ pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
+ map->name, i, targ_map->name,
+ fd, err);
+ return err;
+ }
+ pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
+ map->name, i, targ_map->name, fd);
+ }
+
+ zfree(&map->init_slots);
+ map->init_slots_sz = 0;
+
+ return 0;
+}
+
static int
bpf_object__create_maps(struct bpf_object *obj)
{
@@ -3864,47 +4256,29 @@ bpf_object__create_maps(struct bpf_object *obj)
if (map->fd >= 0) {
pr_debug("map '%s': skipping creation (preset fd=%d)\n",
map->name, map->fd);
- continue;
- }
-
- err = bpf_object__create_map(obj, map);
- if (err)
- goto err_out;
-
- pr_debug("map '%s': created successfully, fd=%d\n", map->name,
- map->fd);
-
- if (bpf_map__is_internal(map)) {
- err = bpf_object__populate_internal_map(obj, map);
- if (err < 0) {
- zclose(map->fd);
+ } else {
+ err = bpf_object__create_map(obj, map);
+ if (err)
goto err_out;
- }
- }
- if (map->init_slots_sz) {
- for (j = 0; j < map->init_slots_sz; j++) {
- const struct bpf_map *targ_map;
- int fd;
+ pr_debug("map '%s': created successfully, fd=%d\n",
+ map->name, map->fd);
- if (!map->init_slots[j])
- continue;
+ if (bpf_map__is_internal(map)) {
+ err = bpf_object__populate_internal_map(obj, map);
+ if (err < 0) {
+ zclose(map->fd);
+ goto err_out;
+ }
+ }
- targ_map = map->init_slots[j];
- fd = bpf_map__fd(targ_map);
- err = bpf_map_update_elem(map->fd, &j, &fd, 0);
- if (err) {
- err = -errno;
- pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
- map->name, j, targ_map->name,
- fd, err);
+ if (map->init_slots_sz) {
+ err = init_map_slots(map);
+ if (err < 0) {
+ zclose(map->fd);
goto err_out;
}
- pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
- map->name, j, targ_map->name, fd);
}
- zfree(&map->init_slots);
- map->init_slots_sz = 0;
}
if (map->pin_path && !map->pinned) {
@@ -3929,75 +4303,6 @@ err_out:
return err;
}
-static int
-check_btf_ext_reloc_err(struct bpf_program *prog, int err,
- void *btf_prog_info, const char *info_name)
-{
- if (err != -ENOENT) {
- pr_warn("Error in loading %s for sec %s.\n",
- info_name, prog->section_name);
- return err;
- }
-
- /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */
-
- if (btf_prog_info) {
- /*
- * Some info has already been found but has problem
- * in the last btf_ext reloc. Must have to error out.
- */
- pr_warn("Error in relocating %s for sec %s.\n",
- info_name, prog->section_name);
- return err;
- }
-
- /* Have problem loading the very first info. Ignore the rest. */
- pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n",
- info_name, prog->section_name, info_name);
- return 0;
-}
-
-static int
-bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
- const char *section_name, __u32 insn_offset)
-{
- int err;
-
- if (!insn_offset || prog->func_info) {
- /*
- * !insn_offset => main program
- *
- * For sub prog, the main program's func_info has to
- * be loaded first (i.e. prog->func_info != NULL)
- */
- err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext,
- section_name, insn_offset,
- &prog->func_info,
- &prog->func_info_cnt);
- if (err)
- return check_btf_ext_reloc_err(prog, err,
- prog->func_info,
- "bpf_func_info");
-
- prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext);
- }
-
- if (!insn_offset || prog->line_info) {
- err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext,
- section_name, insn_offset,
- &prog->line_info,
- &prog->line_info_cnt);
- if (err)
- return check_btf_ext_reloc_err(prog, err,
- prog->line_info,
- "bpf_line_info");
-
- prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
- }
-
- return 0;
-}
-
#define BPF_CORE_SPEC_MAX_LEN 64
/* represents BPF CO-RE field or array element accessor */
@@ -4011,6 +4316,10 @@ struct bpf_core_spec {
const struct btf *btf;
/* high-level spec: named fields and array indices only */
struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
+ __u32 root_type_id;
+ /* CO-RE relocation kind */
+ enum bpf_core_relo_kind relo_kind;
/* high-level spec length */
int len;
/* raw, low-level spec: 1-to-1 with accessor spec string */
@@ -4041,8 +4350,66 @@ static bool is_flex_arr(const struct btf *btf,
return acc->idx == btf_vlen(t) - 1;
}
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+ case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+ case BPF_FIELD_EXISTS: return "field_exists";
+ case BPF_FIELD_SIGNED: return "signed";
+ case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+ case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+ case BPF_TYPE_ID_LOCAL: return "local_type_id";
+ case BPF_TYPE_ID_TARGET: return "target_type_id";
+ case BPF_TYPE_EXISTS: return "type_exists";
+ case BPF_TYPE_SIZE: return "type_size";
+ case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+ case BPF_ENUMVAL_VALUE: return "enumval_value";
+ default: return "unknown";
+ }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ case BPF_FIELD_BYTE_SIZE:
+ case BPF_FIELD_EXISTS:
+ case BPF_FIELD_SIGNED:
+ case BPF_FIELD_LSHIFT_U64:
+ case BPF_FIELD_RSHIFT_U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_TYPE_ID_LOCAL:
+ case BPF_TYPE_ID_TARGET:
+ case BPF_TYPE_EXISTS:
+ case BPF_TYPE_SIZE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_ENUMVAL_EXISTS:
+ case BPF_ENUMVAL_VALUE:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
- * Turn bpf_field_reloc into a low- and high-level spec representation,
+ * Turn bpf_core_relo into a low- and high-level spec representation,
* validating correctness along the way, as well as calculating resulting
* field bit offset, specified by accessor string. Low-level spec captures
* every single level of nestedness, including traversing anonymous
@@ -4071,10 +4438,17 @@ static bool is_flex_arr(const struct btf *btf,
* - field 'a' access (corresponds to '2' in low-level spec);
* - array element #3 access (corresponds to '3' in low-level spec).
*
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
*/
-static int bpf_core_spec_parse(const struct btf *btf,
+static int bpf_core_parse_spec(const struct btf *btf,
__u32 type_id,
const char *spec_str,
+ enum bpf_core_relo_kind relo_kind,
struct bpf_core_spec *spec)
{
int access_idx, parsed_len, i;
@@ -4089,6 +4463,15 @@ static int bpf_core_spec_parse(const struct btf *btf,
memset(spec, 0, sizeof(*spec));
spec->btf = btf;
+ spec->root_type_id = type_id;
+ spec->relo_kind = relo_kind;
+
+ /* type-based relocations don't have a field access string */
+ if (core_relo_is_type_based(relo_kind)) {
+ if (strcmp(spec_str, "0"))
+ return -EINVAL;
+ return 0;
+ }
/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
while (*spec_str) {
@@ -4105,16 +4488,28 @@ static int bpf_core_spec_parse(const struct btf *btf,
if (spec->raw_len == 0)
return -EINVAL;
- /* first spec value is always reloc type array index */
t = skip_mods_and_typedefs(btf, type_id, &id);
if (!t)
return -EINVAL;
access_idx = spec->raw_spec[0];
- spec->spec[0].type_id = id;
- spec->spec[0].idx = access_idx;
+ acc = &spec->spec[0];
+ acc->type_id = id;
+ acc->idx = access_idx;
spec->len++;
+ if (core_relo_is_enumval_based(relo_kind)) {
+ if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ /* record enumerator name in a first accessor */
+ acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(relo_kind))
+ return -EINVAL;
+
sz = btf__resolve_size(btf, id);
if (sz < 0)
return sz;
@@ -4172,8 +4567,8 @@ static int bpf_core_spec_parse(const struct btf *btf,
return sz;
spec->bit_offset += access_idx * sz * 8;
} else {
- pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
- type_id, spec_str, i, id, btf_kind(t));
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+ type_id, spec_str, i, id, btf_kind_str(t));
return -EINVAL;
}
}
@@ -4223,16 +4618,16 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
{
size_t local_essent_len, targ_essent_len;
const char *local_name, *targ_name;
- const struct btf_type *t;
+ const struct btf_type *t, *local_t;
struct ids_vec *cand_ids;
__u32 *new_ids;
int i, err, n;
- t = btf__type_by_id(local_btf, local_type_id);
- if (!t)
+ local_t = btf__type_by_id(local_btf, local_type_id);
+ if (!local_t)
return ERR_PTR(-EINVAL);
- local_name = btf__name_by_offset(local_btf, t->name_off);
+ local_name = btf__name_by_offset(local_btf, local_t->name_off);
if (str_is_empty(local_name))
return ERR_PTR(-EINVAL);
local_essent_len = bpf_core_essential_name_len(local_name);
@@ -4244,12 +4639,11 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
n = btf__get_nr_types(targ_btf);
for (i = 1; i <= n; i++) {
t = btf__type_by_id(targ_btf, i);
- targ_name = btf__name_by_offset(targ_btf, t->name_off);
- if (str_is_empty(targ_name))
+ if (btf_kind(t) != btf_kind(local_t))
continue;
- t = skip_mods_and_typedefs(targ_btf, i, NULL);
- if (!btf_is_composite(t) && !btf_is_array(t))
+ targ_name = btf__name_by_offset(targ_btf, t->name_off);
+ if (str_is_empty(targ_name))
continue;
targ_essent_len = bpf_core_essential_name_len(targ_name);
@@ -4257,11 +4651,12 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
continue;
if (strncmp(local_name, targ_name, local_essent_len) == 0) {
- pr_debug("[%d] %s: found candidate [%d] %s\n",
- local_type_id, local_name, i, targ_name);
- new_ids = reallocarray(cand_ids->data,
- cand_ids->len + 1,
- sizeof(*cand_ids->data));
+ pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
+ local_type_id, btf_kind_str(local_t),
+ local_name, i, btf_kind_str(t), targ_name);
+ new_ids = libbpf_reallocarray(cand_ids->data,
+ cand_ids->len + 1,
+ sizeof(*cand_ids->data));
if (!new_ids) {
err = -ENOMEM;
goto err_out;
@@ -4276,8 +4671,9 @@ err_out:
return ERR_PTR(err);
}
-/* Check two types for compatibility, skipping const/volatile/restrict and
- * typedefs, to ensure we are relocating compatible entities:
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
* - any two STRUCTs/UNIONs are compatible and can be mixed;
* - any two FWDs are compatible, if their names match (modulo flavor suffix);
* - any two PTRs are always compatible;
@@ -4432,6 +4828,100 @@ static int bpf_core_match_member(const struct btf *local_btf,
return 0;
}
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ * kind should match for local and target types (i.e., STRUCT is not
+ * compatible with UNION);
+ * - for ENUMs, the size is ignored;
+ * - for INT, size and signedness are ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ * - FUNC_PROTOs are compatible if they have compatible signature: same
+ * number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ int depth = 32; /* max recursion depth */
+
+ /* caller made sure that names match (ignoring flavor suffix) */
+ local_type = btf__type_by_id(local_btf, local_id);
+ targ_type = btf__type_by_id(targ_btf, targ_id);
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ return 1;
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+ case BTF_KIND_PTR:
+ local_id = local_type->type;
+ targ_id = targ_type->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_type);
+ struct btf_param *targ_p = btf_params(targ_type);
+ __u16 local_vlen = btf_vlen(local_type);
+ __u16 targ_vlen = btf_vlen(targ_type);
+ int i, err;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+ err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+ goto recur;
+ }
+ default:
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+ btf_kind_str(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
/*
* Try to match local spec to a target type and, if successful, produce full
* target spec (high-level, low-level + bit offset).
@@ -4447,10 +4937,51 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
memset(targ_spec, 0, sizeof(*targ_spec));
targ_spec->btf = targ_btf;
+ targ_spec->root_type_id = targ_id;
+ targ_spec->relo_kind = local_spec->relo_kind;
+
+ if (core_relo_is_type_based(local_spec->relo_kind)) {
+ return bpf_core_types_are_compat(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
+ }
local_acc = &local_spec->spec[0];
targ_acc = &targ_spec->spec[0];
+ if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+ size_t local_essent_len, targ_essent_len;
+ const struct btf_enum *e;
+ const char *targ_name;
+
+ /* has to resolve to an enum */
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+ if (!btf_is_enum(targ_type))
+ return 0;
+
+ local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+ for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+ targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(local_spec->relo_kind))
+ return -EINVAL;
+
for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
&targ_id);
@@ -4507,22 +5038,42 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
}
static int bpf_core_calc_field_relo(const struct bpf_program *prog,
- const struct bpf_field_reloc *relo,
+ const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
- __u32 *val, bool *validate)
+ __u32 *val, __u32 *field_sz, __u32 *type_id,
+ bool *validate)
{
- const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1];
- const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id);
- __u32 byte_off, byte_sz, bit_off, bit_sz;
+ const struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
const struct btf_member *m;
const struct btf_type *mt;
bool bitfield;
__s64 sz;
+ *field_sz = 0;
+
+ if (relo->kind == BPF_FIELD_EXISTS) {
+ *val = spec ? 1 : 0;
+ return 0;
+ }
+
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+
+ acc = &spec->spec[spec->len - 1];
+ t = btf__type_by_id(spec->btf, acc->type_id);
+
/* a[n] accessor needs special handling */
if (!acc->name) {
if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
*val = spec->bit_offset / 8;
+ /* remember field size for load/store mem size */
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *field_sz = sz;
+ *type_id = acc->type_id;
} else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
sz = btf__resolve_size(spec->btf, acc->type_id);
if (sz < 0)
@@ -4530,8 +5081,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
*val = sz;
} else {
pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
- bpf_program__title(prog, false),
- relo->kind, relo->insn_off / 8);
+ prog->name, relo->kind, relo->insn_off / 8);
return -EINVAL;
}
if (validate)
@@ -4540,7 +5090,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
}
m = btf_members(t) + acc->idx;
- mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
+ mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
bit_off = spec->bit_offset;
bit_sz = btf_member_bitfield_size(t, acc->idx);
@@ -4553,15 +5103,14 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
if (byte_sz >= 8) {
/* bitfield can't be read with 64-bit read */
pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
- bpf_program__title(prog, false),
- relo->kind, relo->insn_off / 8);
+ prog->name, relo->kind, relo->insn_off / 8);
return -E2BIG;
}
byte_sz *= 2;
byte_off = bit_off / 8 / byte_sz * byte_sz;
}
} else {
- sz = btf__resolve_size(spec->btf, m->type);
+ sz = btf__resolve_size(spec->btf, field_type_id);
if (sz < 0)
return -EINVAL;
byte_sz = sz;
@@ -4579,6 +5128,10 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
switch (relo->kind) {
case BPF_FIELD_BYTE_OFFSET:
*val = byte_off;
+ if (!bitfield) {
+ *field_sz = byte_sz;
+ *type_id = field_type_id;
+ }
break;
case BPF_FIELD_BYTE_SIZE:
*val = byte_sz;
@@ -4604,117 +5157,384 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
break;
case BPF_FIELD_EXISTS:
default:
- pr_warn("prog '%s': unknown relo %d at insn #%d\n",
- bpf_program__title(prog, false),
- relo->kind, relo->insn_off / 8);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
return 0;
}
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ __s64 sz;
+
+ /* type-based relos return zero when target type is not found */
+ if (!spec) {
+ *val = 0;
+ return 0;
+ }
+
+ switch (relo->kind) {
+ case BPF_TYPE_ID_TARGET:
+ *val = spec->root_type_id;
+ break;
+ case BPF_TYPE_EXISTS:
+ *val = 1;
+ break;
+ case BPF_TYPE_SIZE:
+ sz = btf__resolve_size(spec->btf, spec->root_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ break;
+ case BPF_TYPE_ID_LOCAL:
+ /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ const struct btf_type *t;
+ const struct btf_enum *e;
+
+ switch (relo->kind) {
+ case BPF_ENUMVAL_EXISTS:
+ *val = spec ? 1 : 0;
+ break;
+ case BPF_ENUMVAL_VALUE:
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+ t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+ e = btf_enum(t) + spec->spec[0].idx;
+ *val = e->val;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct bpf_core_relo_res
+{
+ /* expected value in the instruction, unless validate == false */
+ __u32 orig_val;
+ /* new value that needs to be patched up to */
+ __u32 new_val;
+ /* relocation unsuccessful, poison instruction, but don't fail load */
+ bool poison;
+ /* some relocations can't be validated against orig_val */
+ bool validate;
+ /* for field byte offset relocations or the forms:
+ * *(T *)(rX + <off>) = rY
+ * rX = *(T *)(rY + <off>),
+ * we remember original and resolved field size to adjust direct
+ * memory loads of pointers and integers; this is necessary for 32-bit
+ * host kernel architectures, but also allows to automatically
+ * relocate fields that were resized from, e.g., u32 to u64, etc.
+ */
+ bool fail_memsz_adjust;
+ __u32 orig_sz;
+ __u32 orig_type_id;
+ __u32 new_sz;
+ __u32 new_type_id;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct bpf_core_spec *local_spec,
+ const struct bpf_core_spec *targ_spec,
+ struct bpf_core_relo_res *res)
+{
+ int err = -EOPNOTSUPP;
+
+ res->orig_val = 0;
+ res->new_val = 0;
+ res->poison = false;
+ res->validate = true;
+ res->fail_memsz_adjust = false;
+ res->orig_sz = res->new_sz = 0;
+ res->orig_type_id = res->new_type_id = 0;
+
+ if (core_relo_is_field_based(relo->kind)) {
+ err = bpf_core_calc_field_relo(prog, relo, local_spec,
+ &res->orig_val, &res->orig_sz,
+ &res->orig_type_id, &res->validate);
+ err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
+ &res->new_val, &res->new_sz,
+ &res->new_type_id, NULL);
+ if (err)
+ goto done;
+ /* Validate if it's safe to adjust load/store memory size.
+ * Adjustments are performed only if original and new memory
+ * sizes differ.
+ */
+ res->fail_memsz_adjust = false;
+ if (res->orig_sz != res->new_sz) {
+ const struct btf_type *orig_t, *new_t;
+
+ orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+ new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+
+ /* There are two use cases in which it's safe to
+ * adjust load/store's mem size:
+ * - reading a 32-bit kernel pointer, while on BPF
+ * size pointers are always 64-bit; in this case
+ * it's safe to "downsize" instruction size due to
+ * pointer being treated as unsigned integer with
+ * zero-extended upper 32-bits;
+ * - reading unsigned integers, again due to
+ * zero-extension is preserving the value correctly.
+ *
+ * In all other cases it's incorrect to attempt to
+ * load/store field because read value will be
+ * incorrect, so we poison relocated instruction.
+ */
+ if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+ goto done;
+ if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+ btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+ btf_int_encoding(new_t) != BTF_INT_SIGNED)
+ goto done;
+
+ /* mark as invalid mem size adjustment, but this will
+ * only be checked for LDX/STX/ST insns
+ */
+ res->fail_memsz_adjust = true;
+ }
+ } else if (core_relo_is_type_based(relo->kind)) {
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+ } else if (core_relo_is_enumval_based(relo->kind)) {
+ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+ }
+
+done:
+ if (err == -EUCLEAN) {
+ /* EUCLEAN is used to signal instruction poisoning request */
+ res->poison = true;
+ err = 0;
+ } else if (err == -EOPNOTSUPP) {
+ /* EOPNOTSUPP means unknown/unsupported relocation */
+ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+ prog->name, relo_idx, core_relo_kind_str(relo->kind),
+ relo->kind, relo->insn_off / 8);
+ }
+
+ return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
+ int insn_idx, struct bpf_insn *insn)
+{
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+ prog->name, relo_idx, insn_idx);
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with the following message:
+ * invalid func unknown#195896080
+ */
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static bool is_ldimm64(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_DW: return 8;
+ case BPF_W: return 4;
+ case BPF_H: return 2;
+ case BPF_B: return 1;
+ default: return -1;
+ }
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+ switch (sz) {
+ case 8: return BPF_DW;
+ case 4: return BPF_W;
+ case 2: return BPF_H;
+ case 1: return BPF_B;
+ default: return -1;
+ }
+}
+
/*
* Patch relocatable BPF instruction.
*
* Patched value is determined by relocation kind and target specification.
- * For field existence relocation target spec will be NULL if field is not
- * found.
+ * For existence relocations target spec will be NULL if field/type is not found.
* Expected insn->imm value is determined using relocation kind and local
* spec, and is checked before patching instruction. If actual insn->imm value
* is wrong, bail out with error.
*
- * Currently three kinds of BPF instructions are supported:
+ * Currently supported classes of BPF instruction are:
* 1. rX = <imm> (assignment with immediate operand);
* 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
*/
-static int bpf_core_reloc_insn(struct bpf_program *prog,
- const struct bpf_field_reloc *relo,
+static int bpf_core_patch_insn(struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
int relo_idx,
- const struct bpf_core_spec *local_spec,
- const struct bpf_core_spec *targ_spec)
+ const struct bpf_core_relo_res *res)
{
__u32 orig_val, new_val;
struct bpf_insn *insn;
- bool validate = true;
- int insn_idx, err;
+ int insn_idx;
__u8 class;
- if (relo->insn_off % sizeof(struct bpf_insn))
+ if (relo->insn_off % BPF_INSN_SZ)
return -EINVAL;
- insn_idx = relo->insn_off / sizeof(struct bpf_insn);
+ insn_idx = relo->insn_off / BPF_INSN_SZ;
+ /* adjust insn_idx from section frame of reference to the local
+ * program's frame of reference; (sub-)program code is not yet
+ * relocated, so it's enough to just subtract in-section offset
+ */
+ insn_idx = insn_idx - prog->sec_insn_off;
insn = &prog->insns[insn_idx];
class = BPF_CLASS(insn->code);
- if (relo->kind == BPF_FIELD_EXISTS) {
- orig_val = 1; /* can't generate EXISTS relo w/o local field */
- new_val = targ_spec ? 1 : 0;
- } else if (!targ_spec) {
- pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
- bpf_program__title(prog, false), relo_idx, insn_idx);
- insn->code = BPF_JMP | BPF_CALL;
- insn->dst_reg = 0;
- insn->src_reg = 0;
- insn->off = 0;
- /* if this instruction is reachable (not a dead code),
- * verifier will complain with the following message:
- * invalid func unknown#195896080
+ if (res->poison) {
+poison:
+ /* poison second part of ldimm64 to avoid confusing error from
+ * verifier about "unknown opcode 00"
*/
- insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+ if (is_ldimm64(insn))
+ bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
+ bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
return 0;
- } else {
- err = bpf_core_calc_field_relo(prog, relo, local_spec,
- &orig_val, &validate);
- if (err)
- return err;
- err = bpf_core_calc_field_relo(prog, relo, targ_spec,
- &new_val, NULL);
- if (err)
- return err;
}
+ orig_val = res->orig_val;
+ new_val = res->new_val;
+
switch (class) {
case BPF_ALU:
case BPF_ALU64:
if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
- if (validate && insn->imm != orig_val) {
+ if (res->validate && insn->imm != orig_val) {
pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
- bpf_program__title(prog, false), relo_idx,
+ prog->name, relo_idx,
insn_idx, insn->imm, orig_val, new_val);
return -EINVAL;
}
orig_val = insn->imm;
insn->imm = new_val;
pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
- bpf_program__title(prog, false), relo_idx, insn_idx,
+ prog->name, relo_idx, insn_idx,
orig_val, new_val);
break;
case BPF_LDX:
case BPF_ST:
case BPF_STX:
- if (validate && insn->off != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
- bpf_program__title(prog, false), relo_idx,
- insn_idx, insn->off, orig_val, new_val);
+ if (res->validate && insn->off != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
+ prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
return -EINVAL;
}
if (new_val > SHRT_MAX) {
pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
- bpf_program__title(prog, false), relo_idx,
- insn_idx, new_val);
+ prog->name, relo_idx, insn_idx, new_val);
return -ERANGE;
}
+ if (res->fail_memsz_adjust) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+ "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+ prog->name, relo_idx, insn_idx);
+ goto poison;
+ }
+
orig_val = insn->off;
insn->off = new_val;
pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
- bpf_program__title(prog, false), relo_idx, insn_idx,
- orig_val, new_val);
+ prog->name, relo_idx, insn_idx, orig_val, new_val);
+
+ if (res->new_sz != res->orig_sz) {
+ int insn_bytes_sz, insn_bpf_sz;
+
+ insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+ if (insn_bytes_sz != res->orig_sz) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+ prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+ return -EINVAL;
+ }
+
+ insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+ if (insn_bpf_sz < 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+ prog->name, relo_idx, insn_idx, res->new_sz);
+ return -EINVAL;
+ }
+
+ insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+ prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+ }
+ break;
+ case BPF_LD: {
+ __u64 imm;
+
+ if (!is_ldimm64(insn) ||
+ insn[0].src_reg != 0 || insn[0].off != 0 ||
+ insn_idx + 1 >= prog->insns_cnt ||
+ insn[1].code != 0 || insn[1].dst_reg != 0 ||
+ insn[1].src_reg != 0 || insn[1].off != 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+ prog->name, relo_idx, insn_idx);
+ return -EINVAL;
+ }
+
+ imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+ if (res->validate && imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+ prog->name, relo_idx,
+ insn_idx, (unsigned long long)imm,
+ orig_val, new_val);
+ return -EINVAL;
+ }
+
+ insn[0].imm = new_val;
+ insn[1].imm = 0; /* currently only 32-bit values are supported */
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+ prog->name, relo_idx, insn_idx,
+ (unsigned long long)imm, new_val);
break;
+ }
default:
- pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
- bpf_program__title(prog, false), relo_idx,
- insn_idx, insn->code, insn->src_reg, insn->dst_reg,
- insn->off, insn->imm);
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+ prog->name, relo_idx, insn_idx, insn->code,
+ insn->src_reg, insn->dst_reg, insn->off, insn->imm);
return -EINVAL;
}
@@ -4728,29 +5548,48 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
{
const struct btf_type *t;
+ const struct btf_enum *e;
const char *s;
__u32 type_id;
int i;
- type_id = spec->spec[0].type_id;
+ type_id = spec->root_type_id;
t = btf__type_by_id(spec->btf, type_id);
s = btf__name_by_offset(spec->btf, t->name_off);
- libbpf_print(level, "[%u] %s + ", type_id, s);
- for (i = 0; i < spec->raw_len; i++)
- libbpf_print(level, "%d%s", spec->raw_spec[i],
- i == spec->raw_len - 1 ? " => " : ":");
+ libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
- libbpf_print(level, "%u.%u @ &x",
- spec->bit_offset / 8, spec->bit_offset % 8);
+ if (core_relo_is_type_based(spec->relo_kind))
+ return;
- for (i = 0; i < spec->len; i++) {
- if (spec->spec[i].name)
- libbpf_print(level, ".%s", spec->spec[i].name);
- else
- libbpf_print(level, "[%u]", spec->spec[i].idx);
+ if (core_relo_is_enumval_based(spec->relo_kind)) {
+ t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+ e = btf_enum(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+
+ libbpf_print(level, "::%s = %u", s, e->val);
+ return;
}
+ if (core_relo_is_field_based(spec->relo_kind)) {
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else if (i > 0 || spec->spec[i].idx > 0)
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+ libbpf_print(level, " (");
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+ if (spec->bit_offset % 8)
+ libbpf_print(level, " @ offset %u.%u)",
+ spec->bit_offset / 8, spec->bit_offset % 8);
+ else
+ libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+ return;
+ }
}
static size_t bpf_core_hash_fn(const void *key, void *ctx)
@@ -4814,22 +5653,22 @@ static void *u32_as_hash_key(__u32 x)
* CPU-wise compared to prebuilding a map from all local type names to
* a list of candidate type names. It's also sped up by caching resolved
* list of matching candidates per each local "root" type ID, that has at
- * least one bpf_field_reloc associated with it. This list is shared
+ * least one bpf_core_relo associated with it. This list is shared
* between multiple relocations for the same type ID and is updated as some
* of the candidates are pruned due to structural incompatibility.
*/
-static int bpf_core_reloc_field(struct bpf_program *prog,
- const struct bpf_field_reloc *relo,
- int relo_idx,
- const struct btf *local_btf,
- const struct btf *targ_btf,
- struct hashmap *cand_cache)
+static int bpf_core_apply_relo(struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ const struct btf *targ_btf,
+ struct hashmap *cand_cache)
{
- const char *prog_name = bpf_program__title(prog, false);
- struct bpf_core_spec local_spec, cand_spec, targ_spec;
+ struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
const void *type_key = u32_as_hash_key(relo->type_id);
- const struct btf_type *local_type, *cand_type;
- const char *local_name, *cand_name;
+ struct bpf_core_relo_res cand_res, targ_res;
+ const struct btf_type *local_type;
+ const char *local_name;
struct ids_vec *cand_ids;
__u32 local_id, cand_id;
const char *spec_str;
@@ -4841,32 +5680,49 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
return -EINVAL;
local_name = btf__name_by_offset(local_btf, local_type->name_off);
- if (str_is_empty(local_name))
+ if (!local_name)
return -EINVAL;
spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
if (str_is_empty(spec_str))
return -EINVAL;
- err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
+ err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
if (err) {
- pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
- prog_name, relo_idx, local_id, local_name, spec_str,
- err);
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
+ str_is_empty(local_name) ? "<anon>" : local_name,
+ spec_str, err);
return -EINVAL;
}
- pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx,
- relo->kind);
+ pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
+ relo_idx, core_relo_kind_str(relo->kind), relo->kind);
bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
libbpf_print(LIBBPF_DEBUG, "\n");
+ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+ if (relo->kind == BPF_TYPE_ID_LOCAL) {
+ targ_res.validate = true;
+ targ_res.poison = false;
+ targ_res.orig_val = local_spec.root_type_id;
+ targ_res.new_val = local_spec.root_type_id;
+ goto patch_insn;
+ }
+
+ /* libbpf doesn't support candidate search for anonymous types */
+ if (str_is_empty(spec_str)) {
+ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+ prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ return -EOPNOTSUPP;
+ }
+
if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
if (IS_ERR(cand_ids)) {
- pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
- prog_name, relo_idx, local_id, local_name,
- PTR_ERR(cand_ids));
+ pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
+ local_name, PTR_ERR(cand_ids));
return PTR_ERR(cand_ids);
}
err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
@@ -4878,36 +5734,51 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
for (i = 0, j = 0; i < cand_ids->len; i++) {
cand_id = cand_ids->data[i];
- cand_type = btf__type_by_id(targ_btf, cand_id);
- cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
-
- err = bpf_core_spec_match(&local_spec, targ_btf,
- cand_id, &cand_spec);
- pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
- prog_name, relo_idx, i, cand_name);
- bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
- libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
+ err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
if (err < 0) {
- pr_warn("prog '%s': relo #%d: matching error: %d\n",
- prog_name, relo_idx, err);
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+ prog->name, relo_idx, i);
+ bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+ libbpf_print(LIBBPF_WARN, ": %d\n", err);
return err;
}
+
+ pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
+ relo_idx, err == 0 ? "non-matching" : "matching", i);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
if (err == 0)
continue;
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+ if (err)
+ return err;
+
if (j == 0) {
+ targ_res = cand_res;
targ_spec = cand_spec;
} else if (cand_spec.bit_offset != targ_spec.bit_offset) {
- /* if there are many candidates, they should all
- * resolve to the same bit offset
+ /* if there are many field relo candidates, they
+ * should all resolve to the same bit offset
*/
- pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
- prog_name, relo_idx, cand_spec.bit_offset,
+ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+ prog->name, relo_idx, cand_spec.bit_offset,
targ_spec.bit_offset);
return -EINVAL;
+ } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+ /* all candidates should result in the same relocation
+ * decision and value, otherwise it's dangerous to
+ * proceed due to ambiguity
+ */
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+ prog->name, relo_idx,
+ cand_res.poison ? "failure" : "success", cand_res.new_val,
+ targ_res.poison ? "failure" : "success", targ_res.new_val);
+ return -EINVAL;
}
- cand_ids->data[j++] = cand_spec.spec[0].type_id;
+ cand_ids->data[j++] = cand_spec.root_type_id;
}
/*
@@ -4926,22 +5797,28 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
* as well as expected case, depending whether instruction w/
* relocation is guarded in some way that makes it unreachable (dead
* code) if relocation can't be resolved. This is handled in
- * bpf_core_reloc_insn() uniformly by replacing that instruction with
+ * bpf_core_patch_insn() uniformly by replacing that instruction with
* BPF helper call insn (using invalid helper ID). If that instruction
* is indeed unreachable, then it will be ignored and eliminated by
* verifier. If it was an error, then verifier will complain and point
* to a specific instruction number in its log.
*/
- if (j == 0)
- pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
- prog_name, relo_idx, local_id, local_name, spec_str);
+ if (j == 0) {
+ pr_debug("prog '%s': relo #%d: no matching targets found\n",
+ prog->name, relo_idx);
+
+ /* calculate single target relo result explicitly */
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
+ if (err)
+ return err;
+ }
- /* bpf_core_reloc_insn should know how to handle missing targ_spec */
- err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec,
- j ? &targ_spec : NULL);
+patch_insn:
+ /* bpf_core_patch_insn() should know how to handle missing targ_spec */
+ err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
if (err) {
pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
- prog_name, relo_idx, relo->insn_off, err);
+ prog->name, relo_idx, relo->insn_off, err);
return -EINVAL;
}
@@ -4949,20 +5826,23 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
}
static int
-bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
{
const struct btf_ext_info_sec *sec;
- const struct bpf_field_reloc *rec;
+ const struct bpf_core_relo *rec;
const struct btf_ext_info *seg;
struct hashmap_entry *entry;
struct hashmap *cand_cache = NULL;
struct bpf_program *prog;
struct btf *targ_btf;
const char *sec_name;
- int i, err = 0;
+ int i, err = 0, insn_idx, sec_idx;
+
+ if (obj->btf_ext->core_relo_info.len == 0)
+ return 0;
if (targ_btf_path)
- targ_btf = btf__parse_elf(targ_btf_path, NULL);
+ targ_btf = btf__parse(targ_btf_path, NULL);
else
targ_btf = obj->btf_vmlinux;
if (IS_ERR_OR_NULL(targ_btf)) {
@@ -4976,36 +5856,54 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
goto out;
}
- seg = &obj->btf_ext->field_reloc_info;
+ seg = &obj->btf_ext->core_relo_info;
for_each_btf_ext_sec(seg, sec) {
sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
if (str_is_empty(sec_name)) {
err = -EINVAL;
goto out;
}
+ /* bpf_object's ELF is gone by now so it's not easy to find
+ * section index by section name, but we can find *any*
+ * bpf_program within desired section name and use it's
+ * prog->sec_idx to do a proper search by section index and
+ * instruction offset
+ */
prog = NULL;
for (i = 0; i < obj->nr_programs; i++) {
- if (!strcmp(obj->programs[i].section_name, sec_name)) {
- prog = &obj->programs[i];
+ prog = &obj->programs[i];
+ if (strcmp(prog->sec_name, sec_name) == 0)
break;
- }
}
if (!prog) {
- pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
- sec_name);
- err = -EINVAL;
- goto out;
+ pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
+ return -ENOENT;
}
+ sec_idx = prog->sec_idx;
- pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
+ pr_debug("sec '%s': found %d CO-RE relocations\n",
sec_name, sec->num_info);
for_each_btf_ext_rec(seg, sec, i, rec) {
- err = bpf_core_reloc_field(prog, rec, i, obj->btf,
- targ_btf, cand_cache);
+ insn_idx = rec->insn_off / BPF_INSN_SZ;
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+ if (!prog) {
+ pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
+ sec_name, insn_idx, i);
+ err = -EINVAL;
+ goto out;
+ }
+ /* no need to apply CO-RE relocation if the program is
+ * not going to be loaded
+ */
+ if (!prog->load)
+ continue;
+
+ err = bpf_core_apply_relo(prog, rec, i, obj->btf,
+ targ_btf, cand_cache);
if (err) {
pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
- sec_name, i, err);
+ prog->name, i, err);
goto out;
}
}
@@ -5024,125 +5922,432 @@ out:
return err;
}
+/* Relocate data references within program code:
+ * - map references;
+ * - global variable references;
+ * - extern references.
+ */
static int
-bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
{
- int err = 0;
+ int i;
- if (obj->btf_ext->field_reloc_info.len)
- err = bpf_core_reloc_fields(obj, targ_btf_path);
+ for (i = 0; i < prog->nr_reloc; i++) {
+ struct reloc_desc *relo = &prog->reloc_desc[i];
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+ struct extern_desc *ext;
- return err;
+ switch (relo->type) {
+ case RELO_LD64:
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ relo->processed = true;
+ break;
+ case RELO_DATA:
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[1].imm = insn[0].imm + relo->sym_off;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ relo->processed = true;
+ break;
+ case RELO_EXTERN:
+ ext = &obj->externs[relo->sym_off];
+ if (ext->type == EXT_KCFG) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ insn[1].imm = ext->kcfg.data_off;
+ } else /* EXT_KSYM */ {
+ if (ext->ksym.type_id) { /* typed ksyms */
+ insn[0].src_reg = BPF_PSEUDO_BTF_ID;
+ insn[0].imm = ext->ksym.vmlinux_btf_id;
+ } else { /* typeless ksyms */
+ insn[0].imm = (__u32)ext->ksym.addr;
+ insn[1].imm = ext->ksym.addr >> 32;
+ }
+ }
+ relo->processed = true;
+ break;
+ case RELO_CALL:
+ /* will be handled as a follow up pass */
+ break;
+ default:
+ pr_warn("prog '%s': relo #%d: bad relo type %d\n",
+ prog->name, i, relo->type);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
}
-static int
-bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
- struct reloc_desc *relo)
+static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
+ const struct bpf_program *prog,
+ const struct btf_ext_info *ext_info,
+ void **prog_info, __u32 *prog_rec_cnt,
+ __u32 *prog_rec_sz)
{
- struct bpf_insn *insn, *new_insn;
- struct bpf_program *text;
- size_t new_cnt;
- int err;
+ void *copy_start = NULL, *copy_end = NULL;
+ void *rec, *rec_end, *new_prog_info;
+ const struct btf_ext_info_sec *sec;
+ size_t old_sz, new_sz;
+ const char *sec_name;
+ int i, off_adj;
- if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) {
- text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
- if (!text) {
- pr_warn("no .text section found yet relo into text exist\n");
- return -LIBBPF_ERRNO__RELOC;
+ for_each_btf_ext_sec(ext_info, sec) {
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+ if (!sec_name)
+ return -EINVAL;
+ if (strcmp(sec_name, prog->sec_name) != 0)
+ continue;
+
+ for_each_btf_ext_rec(ext_info, sec, i, rec) {
+ __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
+
+ if (insn_off < prog->sec_insn_off)
+ continue;
+ if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
+ break;
+
+ if (!copy_start)
+ copy_start = rec;
+ copy_end = rec + ext_info->rec_size;
}
- new_cnt = prog->insns_cnt + text->insns_cnt;
- new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
- if (!new_insn) {
- pr_warn("oom in prog realloc\n");
+
+ if (!copy_start)
+ return -ENOENT;
+
+ /* append func/line info of a given (sub-)program to the main
+ * program func/line info
+ */
+ old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
+ new_sz = old_sz + (copy_end - copy_start);
+ new_prog_info = realloc(*prog_info, new_sz);
+ if (!new_prog_info)
return -ENOMEM;
- }
- prog->insns = new_insn;
+ *prog_info = new_prog_info;
+ *prog_rec_cnt = new_sz / ext_info->rec_size;
+ memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
+
+ /* Kernel instruction offsets are in units of 8-byte
+ * instructions, while .BTF.ext instruction offsets generated
+ * by Clang are in units of bytes. So convert Clang offsets
+ * into kernel offsets and adjust offset according to program
+ * relocated position.
+ */
+ off_adj = prog->sub_insn_off - prog->sec_insn_off;
+ rec = new_prog_info + old_sz;
+ rec_end = new_prog_info + new_sz;
+ for (; rec < rec_end; rec += ext_info->rec_size) {
+ __u32 *insn_off = rec;
- if (obj->btf_ext) {
- err = bpf_program_reloc_btf_ext(prog, obj,
- text->section_name,
- prog->insns_cnt);
- if (err)
- return err;
+ *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
}
-
- memcpy(new_insn + prog->insns_cnt, text->insns,
- text->insns_cnt * sizeof(*insn));
- prog->main_prog_cnt = prog->insns_cnt;
- prog->insns_cnt = new_cnt;
- pr_debug("added %zd insn from %s to prog %s\n",
- text->insns_cnt, text->section_name,
- prog->section_name);
+ *prog_rec_sz = ext_info->rec_size;
+ return 0;
}
- insn = &prog->insns[relo->insn_idx];
- insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;
- return 0;
+ return -ENOENT;
}
static int
-bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
+reloc_prog_func_and_line_info(const struct bpf_object *obj,
+ struct bpf_program *main_prog,
+ const struct bpf_program *prog)
{
- int i, err;
+ int err;
- if (!prog)
+ /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
+ * supprot func/line info
+ */
+ if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
return 0;
- if (obj->btf_ext) {
- err = bpf_program_reloc_btf_ext(prog, obj,
- prog->section_name, 0);
- if (err)
+ /* only attempt func info relocation if main program's func_info
+ * relocation was successful
+ */
+ if (main_prog != prog && !main_prog->func_info)
+ goto line_info;
+
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
+ &main_prog->func_info,
+ &main_prog->func_info_cnt,
+ &main_prog->func_info_rec_size);
+ if (err) {
+ if (err != -ENOENT) {
+ pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
+ prog->name, err);
+ return err;
+ }
+ if (main_prog->func_info) {
+ /*
+ * Some info has already been found but has problem
+ * in the last btf_ext reloc. Must have to error out.
+ */
+ pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
return err;
+ }
+ /* Have problem loading the very first info. Ignore the rest. */
+ pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
+ prog->name);
}
- if (!prog->reloc_desc)
+line_info:
+ /* don't relocate line info if main program's relocation failed */
+ if (main_prog != prog && !main_prog->line_info)
return 0;
- for (i = 0; i < prog->nr_reloc; i++) {
- struct reloc_desc *relo = &prog->reloc_desc[i];
- struct bpf_insn *insn = &prog->insns[relo->insn_idx];
- struct extern_desc *ext;
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
+ &main_prog->line_info,
+ &main_prog->line_info_cnt,
+ &main_prog->line_info_rec_size);
+ if (err) {
+ if (err != -ENOENT) {
+ pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
+ prog->name, err);
+ return err;
+ }
+ if (main_prog->line_info) {
+ /*
+ * Some info has already been found but has problem
+ * in the last btf_ext reloc. Must have to error out.
+ */
+ pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
+ return err;
+ }
+ /* Have problem loading the very first info. Ignore the rest. */
+ pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
+ prog->name);
+ }
+ return 0;
+}
+
+static int cmp_relo_by_insn_idx(const void *key, const void *elem)
+{
+ size_t insn_idx = *(const size_t *)key;
+ const struct reloc_desc *relo = elem;
+
+ if (insn_idx == relo->insn_idx)
+ return 0;
+ return insn_idx < relo->insn_idx ? -1 : 1;
+}
+
+static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
+{
+ return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
+ sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
+}
+
+static int
+bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
+ struct bpf_program *prog)
+{
+ size_t sub_insn_idx, insn_idx, new_cnt;
+ struct bpf_program *subprog;
+ struct bpf_insn *insns, *insn;
+ struct reloc_desc *relo;
+ int err;
- if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
- pr_warn("relocation out of range: '%s'\n",
- prog->section_name);
+ err = reloc_prog_func_and_line_info(obj, main_prog, prog);
+ if (err)
+ return err;
+
+ for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
+ if (!insn_is_subprog_call(insn))
+ continue;
+
+ relo = find_prog_insn_relo(prog, insn_idx);
+ if (relo && relo->type != RELO_CALL) {
+ pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
+ prog->name, insn_idx, relo->type);
return -LIBBPF_ERRNO__RELOC;
}
+ if (relo) {
+ /* sub-program instruction index is a combination of
+ * an offset of a symbol pointed to by relocation and
+ * call instruction's imm field; for global functions,
+ * call always has imm = -1, but for static functions
+ * relocation is against STT_SECTION and insn->imm
+ * points to a start of a static function
+ */
+ sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ } else {
+ /* if subprogram call is to a static function within
+ * the same ELF section, there won't be any relocation
+ * emitted, but it also means there is no additional
+ * offset necessary, insns->imm is relative to
+ * instruction's original position within the section
+ */
+ sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
+ }
- switch (relo->type) {
- case RELO_LD64:
- insn[0].src_reg = BPF_PSEUDO_MAP_FD;
- insn[0].imm = obj->maps[relo->map_idx].fd;
- break;
- case RELO_DATA:
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[1].imm = insn[0].imm + relo->sym_off;
- insn[0].imm = obj->maps[relo->map_idx].fd;
- break;
- case RELO_EXTERN:
- ext = &obj->externs[relo->sym_off];
- if (ext->type == EXT_KCFG) {
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
- insn[1].imm = ext->kcfg.data_off;
- } else /* EXT_KSYM */ {
- insn[0].imm = (__u32)ext->ksym.addr;
- insn[1].imm = ext->ksym.addr >> 32;
+ /* we enforce that sub-programs should be in .text section */
+ subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
+ if (!subprog) {
+ pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
+ prog->name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ /* if it's the first call instruction calling into this
+ * subprogram (meaning this subprog hasn't been processed
+ * yet) within the context of current main program:
+ * - append it at the end of main program's instructions blog;
+ * - process is recursively, while current program is put on hold;
+ * - if that subprogram calls some other not yet processes
+ * subprogram, same thing will happen recursively until
+ * there are no more unprocesses subprograms left to append
+ * and relocate.
+ */
+ if (subprog->sub_insn_off == 0) {
+ subprog->sub_insn_off = main_prog->insns_cnt;
+
+ new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+ if (!insns) {
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+ return -ENOMEM;
}
- break;
- case RELO_CALL:
- err = bpf_program__reloc_text(prog, obj, relo);
+ main_prog->insns = insns;
+ main_prog->insns_cnt = new_cnt;
+
+ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+ subprog->insns_cnt * sizeof(*insns));
+
+ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+ main_prog->name, subprog->insns_cnt, subprog->name);
+
+ err = bpf_object__reloc_code(obj, main_prog, subprog);
if (err)
return err;
- break;
- default:
- pr_warn("relo #%d: bad relo type %d\n", i, relo->type);
- return -EINVAL;
}
+
+ /* main_prog->insns memory could have been re-allocated, so
+ * calculate pointer again
+ */
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
+ /* calculate correct instruction position within current main
+ * prog; each main prog can have a different set of
+ * subprograms appended (potentially in different order as
+ * well), so position of any subprog can be different for
+ * different main programs */
+ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
+
+ if (relo)
+ relo->processed = true;
+
+ pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
+ prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
}
- zfree(&prog->reloc_desc);
- prog->nr_reloc = 0;
+ return 0;
+}
+
+/*
+ * Relocate sub-program calls.
+ *
+ * Algorithm operates as follows. Each entry-point BPF program (referred to as
+ * main prog) is processed separately. For each subprog (non-entry functions,
+ * that can be called from either entry progs or other subprogs) gets their
+ * sub_insn_off reset to zero. This serves as indicator that this subprogram
+ * hasn't been yet appended and relocated within current main prog. Once its
+ * relocated, sub_insn_off will point at the position within current main prog
+ * where given subprog was appended. This will further be used to relocate all
+ * the call instructions jumping into this subprog.
+ *
+ * We start with main program and process all call instructions. If the call
+ * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
+ * is zero), subprog instructions are appended at the end of main program's
+ * instruction array. Then main program is "put on hold" while we recursively
+ * process newly appended subprogram. If that subprogram calls into another
+ * subprogram that hasn't been appended, new subprogram is appended again to
+ * the *main* prog's instructions (subprog's instructions are always left
+ * untouched, as they need to be in unmodified state for subsequent main progs
+ * and subprog instructions are always sent only as part of a main prog) and
+ * the process continues recursively. Once all the subprogs called from a main
+ * prog or any of its subprogs are appended (and relocated), all their
+ * positions within finalized instructions array are known, so it's easy to
+ * rewrite call instructions with correct relative offsets, corresponding to
+ * desired target subprog.
+ *
+ * Its important to realize that some subprogs might not be called from some
+ * main prog and any of its called/used subprogs. Those will keep their
+ * subprog->sub_insn_off as zero at all times and won't be appended to current
+ * main prog and won't be relocated within the context of current main prog.
+ * They might still be used from other main progs later.
+ *
+ * Visually this process can be shown as below. Suppose we have two main
+ * programs mainA and mainB and BPF object contains three subprogs: subA,
+ * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
+ * subC both call subB:
+ *
+ * +--------+ +-------+
+ * | v v |
+ * +--+---+ +--+-+-+ +---+--+
+ * | subA | | subB | | subC |
+ * +--+---+ +------+ +---+--+
+ * ^ ^
+ * | |
+ * +---+-------+ +------+----+
+ * | mainA | | mainB |
+ * +-----------+ +-----------+
+ *
+ * We'll start relocating mainA, will find subA, append it and start
+ * processing sub A recursively:
+ *
+ * +-----------+------+
+ * | mainA | subA |
+ * +-----------+------+
+ *
+ * At this point we notice that subB is used from subA, so we append it and
+ * relocate (there are no further subcalls from subB):
+ *
+ * +-----------+------+------+
+ * | mainA | subA | subB |
+ * +-----------+------+------+
+ *
+ * At this point, we relocate subA calls, then go one level up and finish with
+ * relocatin mainA calls. mainA is done.
+ *
+ * For mainB process is similar but results in different order. We start with
+ * mainB and skip subA and subB, as mainB never calls them (at least
+ * directly), but we see subC is needed, so we append and start processing it:
+ *
+ * +-----------+------+
+ * | mainB | subC |
+ * +-----------+------+
+ * Now we see subC needs subB, so we go back to it, append and relocate it:
+ *
+ * +-----------+------+------+
+ * | mainB | subC | subB |
+ * +-----------+------+------+
+ *
+ * At this point we unwind recursion, relocate calls in subC, then in mainB.
+ */
+static int
+bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
+{
+ struct bpf_program *subprog;
+ int i, j, err;
+
+ /* mark all subprogs as not relocated (yet) within the context of
+ * current main program
+ */
+ for (i = 0; i < obj->nr_programs; i++) {
+ subprog = &obj->programs[i];
+ if (!prog_is_subprog(obj, subprog))
+ continue;
+
+ subprog->sub_insn_off = 0;
+ for (j = 0; j < subprog->nr_reloc; j++)
+ if (subprog->reloc_desc[j].type == RELO_CALL)
+ subprog->reloc_desc[j].processed = false;
+ }
+
+ err = bpf_object__reloc_code(obj, prog, prog);
+ if (err)
+ return err;
+
+
return 0;
}
@@ -5161,35 +6366,45 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
return err;
}
}
- /* ensure .text is relocated first, as it's going to be copied as-is
- * later for sub-program calls
+ /* relocate data references first for all programs and sub-programs,
+ * as they don't change relative to code locations, so subsequent
+ * subprogram processing won't need to re-calculate any of them
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- if (prog->idx != obj->efile.text_shndx)
- continue;
-
- err = bpf_program__relocate(prog, obj);
+ err = bpf_object__relocate_data(obj, prog);
if (err) {
- pr_warn("failed to relocate '%s'\n", prog->section_name);
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
+ prog->name, err);
return err;
}
- break;
}
- /* now relocate everything but .text, which by now is relocated
- * properly, so we can copy raw sub-program instructions as is safely
+ /* now relocate subprogram calls and append used subprograms to main
+ * programs; each copy of subprogram code needs to be relocated
+ * differently for each main program, because its code location might
+ * have changed
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- if (prog->idx == obj->efile.text_shndx)
+ /* sub-program's sub-calls are relocated within the context of
+ * its main program only
+ */
+ if (prog_is_subprog(obj, prog))
continue;
- err = bpf_program__relocate(prog, obj);
+ err = bpf_object__relocate_calls(obj, prog);
if (err) {
- pr_warn("failed to relocate '%s'\n", prog->section_name);
+ pr_warn("prog '%s': failed to relocate calls: %d\n",
+ prog->name, err);
return err;
}
}
+ /* free up relocation descriptors */
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ }
return 0;
}
@@ -5230,8 +6445,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
i, (size_t)GELF_R_SYM(rel.r_info));
return -LIBBPF_ERRNO__FORMAT;
}
- name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name) ? : "<?>";
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
if (sym.st_shndx != obj->efile.btf_maps_shndx) {
pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
i, name);
@@ -5293,7 +6507,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
moff /= bpf_ptr_sz;
if (moff >= map->init_slots_sz) {
new_sz = moff + 1;
- tmp = realloc(map->init_slots, new_sz * host_ptr_sz);
+ tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
if (!tmp)
return -ENOMEM;
map->init_slots = tmp;
@@ -5310,41 +6524,98 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
return 0;
}
-static int bpf_object__collect_reloc(struct bpf_object *obj)
+static int cmp_relocs(const void *_a, const void *_b)
{
- int i, err;
+ const struct reloc_desc *a = _a;
+ const struct reloc_desc *b = _b;
- if (!obj_elf_valid(obj)) {
- pr_warn("Internal error: elf object is closed\n");
- return -LIBBPF_ERRNO__INTERNAL;
- }
+ if (a->insn_idx != b->insn_idx)
+ return a->insn_idx < b->insn_idx ? -1 : 1;
+
+ /* no two relocations should have the same insn_idx, but ... */
+ if (a->type != b->type)
+ return a->type < b->type ? -1 : 1;
+
+ return 0;
+}
+
+static int bpf_object__collect_relos(struct bpf_object *obj)
+{
+ int i, err;
for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
Elf_Data *data = obj->efile.reloc_sects[i].data;
int idx = shdr->sh_info;
- struct bpf_program *prog;
if (shdr->sh_type != SHT_REL) {
pr_warn("internal error at %d\n", __LINE__);
return -LIBBPF_ERRNO__INTERNAL;
}
- if (idx == obj->efile.st_ops_shndx) {
+ if (idx == obj->efile.st_ops_shndx)
err = bpf_object__collect_st_ops_relos(obj, shdr, data);
- } else if (idx == obj->efile.btf_maps_shndx) {
+ else if (idx == obj->efile.btf_maps_shndx)
err = bpf_object__collect_map_relos(obj, shdr, data);
- } else {
- prog = bpf_object__find_prog_by_idx(obj, idx);
- if (!prog) {
- pr_warn("relocation failed: no prog in section(%d)\n", idx);
- return -LIBBPF_ERRNO__RELOC;
- }
- err = bpf_program__collect_reloc(prog, shdr, data, obj);
- }
+ else
+ err = bpf_object__collect_prog_relos(obj, shdr, data);
if (err)
return err;
}
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *p = &obj->programs[i];
+
+ if (!p->nr_reloc)
+ continue;
+
+ qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
+ }
+ return 0;
+}
+
+static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
+{
+ if (BPF_CLASS(insn->code) == BPF_JMP &&
+ BPF_OP(insn->code) == BPF_CALL &&
+ BPF_SRC(insn->code) == BPF_K &&
+ insn->src_reg == 0 &&
+ insn->dst_reg == 0) {
+ *func_id = insn->imm;
+ return true;
+ }
+ return false;
+}
+
+static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+{
+ struct bpf_insn *insn = prog->insns;
+ enum bpf_func_id func_id;
+ int i;
+
+ for (i = 0; i < prog->insns_cnt; i++, insn++) {
+ if (!insn_is_helper_call(insn, &func_id))
+ continue;
+
+ /* on kernels that don't yet support
+ * bpf_probe_read_{kernel,user}[_str] helpers, fall back
+ * to bpf_probe_read() which works well for old kernels
+ */
+ switch (func_id) {
+ case BPF_FUNC_probe_read_kernel:
+ case BPF_FUNC_probe_read_user:
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ insn->imm = BPF_FUNC_probe_read;
+ break;
+ case BPF_FUNC_probe_read_kernel_str:
+ case BPF_FUNC_probe_read_user_str:
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ insn->imm = BPF_FUNC_probe_read_str;
+ break;
+ default:
+ break;
+ }
+ }
return 0;
}
@@ -5364,12 +6635,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
load_attr.prog_type = prog->type;
/* old kernels might not support specifying expected_attach_type */
- if (!prog->caps->exp_attach_type && prog->sec_def &&
+ if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
prog->sec_def->is_exp_attach_type_optional)
load_attr.expected_attach_type = 0;
else
load_attr.expected_attach_type = prog->expected_attach_type;
- if (prog->caps->name)
+ if (kernel_supports(FEAT_PROG_NAME))
load_attr.name = prog->name;
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
@@ -5387,7 +6658,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
}
/* specify func_info/line_info only if kernel supports them */
btf_fd = bpf_object__btf_fd(prog->obj);
- if (btf_fd >= 0 && prog->obj->caps.btf_func) {
+ if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
load_attr.prog_btf_fd = btf_fd;
load_attr.func_info = prog->func_info;
load_attr.func_info_rec_size = prog->func_info_rec_size;
@@ -5413,6 +6684,20 @@ retry_load:
if (ret >= 0) {
if (log_buf && load_attr.log_level)
pr_debug("verifier log:\n%s", log_buf);
+
+ if (prog->obj->rodata_map_idx >= 0 &&
+ kernel_supports(FEAT_PROG_BIND_MAP)) {
+ struct bpf_map *rodata_map =
+ &prog->obj->maps[prog->obj->rodata_map_idx];
+
+ if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("prog '%s': failed to bind .rodata map: %s\n",
+ prog->name, cp);
+ /* Don't fail hard if can't bind rodata. */
+ }
+ }
+
*pfd = ret;
ret = 0;
goto out;
@@ -5425,7 +6710,7 @@ retry_load:
free(log_buf);
goto retry_load;
}
- ret = -errno;
+ ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warn("load bpf program failed: %s\n", cp);
pr_perm_msg(ret);
@@ -5465,8 +6750,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
int err = 0, fd, i, btf_id;
if (prog->obj->loaded) {
- pr_warn("prog '%s'('%s'): can't load after object was loaded\n",
- prog->name, prog->section_name);
+ pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
return -EINVAL;
}
@@ -5482,7 +6766,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (prog->instances.nr < 0 || !prog->instances.fds) {
if (prog->preprocessor) {
pr_warn("Internal error: can't load program '%s'\n",
- prog->section_name);
+ prog->name);
return -LIBBPF_ERRNO__INTERNAL;
}
@@ -5497,8 +6781,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (!prog->preprocessor) {
if (prog->instances.nr != 1) {
- pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n",
- prog->section_name, prog->instances.nr);
+ pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
+ prog->name, prog->instances.nr);
}
err = load_program(prog, prog->insns, prog->insns_cnt,
license, kern_ver, &fd);
@@ -5516,13 +6800,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
prog->insns_cnt, &result);
if (err) {
pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
- i, prog->section_name);
+ i, prog->name);
goto out;
}
if (!result.new_insn_ptr || !result.new_insn_cnt) {
pr_debug("Skip loading the %dth instance of program '%s'\n",
- i, prog->section_name);
+ i, prog->name);
prog->instances.fds[i] = -1;
if (result.pfd)
*result.pfd = -1;
@@ -5533,7 +6817,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
result.new_insn_cnt, license, kern_ver, &fd);
if (err) {
pr_warn("Loading the %dth instance of program '%s' failed\n",
- i, prog->section_name);
+ i, prog->name);
goto out;
}
@@ -5543,18 +6827,12 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
}
out:
if (err)
- pr_warn("failed to load program '%s'\n", prog->section_name);
+ pr_warn("failed to load program '%s'\n", prog->name);
zfree(&prog->insns);
prog->insns_cnt = 0;
return err;
}
-static bool bpf_program__is_function_storage(const struct bpf_program *prog,
- const struct bpf_object *obj)
-{
- return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
-}
-
static int
bpf_object__load_progs(struct bpf_object *obj, int log_level)
{
@@ -5564,11 +6842,17 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- if (bpf_program__is_function_storage(prog, obj))
+ err = bpf_object__sanitize_prog(obj, prog);
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ if (prog_is_subprog(obj, prog))
continue;
if (!prog->load) {
- pr_debug("prog '%s'('%s'): skipped loading\n",
- prog->name, prog->section_name);
+ pr_debug("prog '%s': skipped loading\n", prog->name);
continue;
}
prog->log_level |= log_level;
@@ -5629,18 +6913,19 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
err = err ? : bpf_object__collect_externs(obj);
err = err ? : bpf_object__finalize_btf(obj);
err = err ? : bpf_object__init_maps(obj, opts);
- err = err ? : bpf_object__init_prog_names(obj);
- err = err ? : bpf_object__collect_reloc(obj);
+ err = err ? : bpf_object__collect_relos(obj);
if (err)
goto out;
bpf_object__elf_finish(obj);
bpf_object__for_each_program(prog, obj) {
- prog->sec_def = find_sec_def(prog->section_name);
+ prog->sec_def = find_sec_def(prog->sec_name);
if (!prog->sec_def)
/* couldn't guess, but user might manually specify */
continue;
+ if (prog->sec_def->is_sleepable)
+ prog->prog_flags |= BPF_F_SLEEPABLE;
bpf_program__set_type(prog, prog->sec_def->prog_type);
bpf_program__set_expected_attach_type(prog,
prog->sec_def->expected_attach_type);
@@ -5750,11 +7035,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
bpf_object__for_each_map(m, obj) {
if (!bpf_map__is_internal(m))
continue;
- if (!obj->caps.global_data) {
+ if (!kernel_supports(FEAT_GLOBAL_DATA)) {
pr_warn("kernel doesn't support global data\n");
return -ENOTSUP;
}
- if (!obj->caps.array_mmap)
+ if (!kernel_supports(FEAT_ARRAY_MMAP))
m->def.map_flags ^= BPF_F_MMAPABLE;
}
@@ -5809,10 +7094,72 @@ out:
return err;
}
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+{
+ struct extern_desc *ext;
+ int i, id;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ const struct btf_type *targ_var, *targ_type;
+ __u32 targ_type_id, local_type_id;
+ const char *targ_var_name;
+ int ret;
+
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KSYM || !ext->ksym.type_id)
+ continue;
+
+ id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
+ BTF_KIND_VAR);
+ if (id <= 0) {
+ pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
+ ext->name);
+ return -ESRCH;
+ }
+
+ /* find local type_id */
+ local_type_id = ext->ksym.type_id;
+
+ /* find target type_id */
+ targ_var = btf__type_by_id(obj->btf_vmlinux, id);
+ targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
+ targ_var->name_off);
+ targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
+ targ_var->type,
+ &targ_type_id);
+
+ ret = bpf_core_types_are_compat(obj->btf, local_type_id,
+ obj->btf_vmlinux, targ_type_id);
+ if (ret <= 0) {
+ const struct btf_type *local_type;
+ const char *targ_name, *local_name;
+
+ local_type = btf__type_by_id(obj->btf, local_type_id);
+ local_name = btf__name_by_offset(obj->btf,
+ local_type->name_off);
+ targ_name = btf__name_by_offset(obj->btf_vmlinux,
+ targ_type->name_off);
+
+ pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
+ ext->name, local_type_id,
+ btf_kind_str(local_type), local_name, targ_type_id,
+ btf_kind_str(targ_type), targ_name);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.vmlinux_btf_id = id;
+ pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
+ ext->name, id, btf_kind_str(targ_var), targ_var_name);
+ }
+ return 0;
+}
+
static int bpf_object__resolve_externs(struct bpf_object *obj,
const char *extra_kconfig)
{
bool need_config = false, need_kallsyms = false;
+ bool need_vmlinux_btf = false;
struct extern_desc *ext;
void *kcfg_data = NULL;
int err, i;
@@ -5843,7 +7190,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
strncmp(ext->name, "CONFIG_", 7) == 0) {
need_config = true;
} else if (ext->type == EXT_KSYM) {
- need_kallsyms = true;
+ if (ext->ksym.type_id)
+ need_vmlinux_btf = true;
+ else
+ need_kallsyms = true;
} else {
pr_warn("unrecognized extern '%s'\n", ext->name);
return -EINVAL;
@@ -5872,6 +7222,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
if (err)
return -EINVAL;
}
+ if (need_vmlinux_btf) {
+ err = bpf_object__resolve_ksyms_btf_id(obj);
+ if (err)
+ return -EINVAL;
+ }
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
@@ -5904,11 +7259,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
}
err = bpf_object__probe_loading(obj);
- err = err ? : bpf_object__probe_caps(obj);
+ err = err ? : bpf_object__load_vmlinux_btf(obj);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__sanitize_maps(obj);
- err = err ? : bpf_object__load_vmlinux_btf(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
err = err ? : bpf_object__create_maps(obj);
err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
@@ -6016,7 +7370,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->section_name, prog->instances.nr);
+ instance, prog->name, prog->instances.nr);
return -EINVAL;
}
@@ -6047,7 +7401,7 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->section_name, prog->instances.nr);
+ instance, prog->name, prog->instances.nr);
return -EINVAL;
}
@@ -6077,8 +7431,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
}
if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n",
- prog->section_name);
+ pr_warn("no instances of prog %s to pin\n", prog->name);
return -EINVAL;
}
@@ -6140,8 +7493,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
}
if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n",
- prog->section_name);
+ pr_warn("no instances of prog %s to pin\n", prog->name);
return -EINVAL;
}
@@ -6633,7 +7985,7 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
do {
prog = __bpf_program__iter(prog, obj, true);
- } while (prog && bpf_program__is_function_storage(prog, obj));
+ } while (prog && prog_is_subprog(obj, prog));
return prog;
}
@@ -6645,7 +7997,7 @@ bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
do {
prog = __bpf_program__iter(prog, obj, false);
- } while (prog && bpf_program__is_function_storage(prog, obj));
+ } while (prog && prog_is_subprog(obj, prog));
return prog;
}
@@ -6676,11 +8028,16 @@ const char *bpf_program__name(const struct bpf_program *prog)
return prog->name;
}
+const char *bpf_program__section_name(const struct bpf_program *prog)
+{
+ return prog->sec_name;
+}
+
const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
{
const char *title;
- title = prog->section_name;
+ title = prog->sec_name;
if (needs_copy) {
title = strdup(title);
if (!title) {
@@ -6713,7 +8070,7 @@ int bpf_program__fd(const struct bpf_program *prog)
size_t bpf_program__size(const struct bpf_program *prog)
{
- return prog->insns_cnt * sizeof(struct bpf_insn);
+ return prog->insns_cnt * BPF_INSN_SZ;
}
int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
@@ -6753,14 +8110,14 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
if (n >= prog->instances.nr || n < 0) {
pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
- n, prog->section_name, prog->instances.nr);
+ n, prog->name, prog->instances.nr);
return -EINVAL;
}
fd = prog->instances.fds[n];
if (fd < 0) {
pr_warn("%dth instance of program '%s' is invalid\n",
- n, prog->section_name);
+ n, prog->name);
return -ENOENT;
}
@@ -6910,6 +8267,21 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_FEXIT,
.is_attach_btf = true,
.attach_fn = attach_trace),
+ SEC_DEF("fentry.s/", TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fmod_ret.s/", TRACING,
+ .expected_attach_type = BPF_MODIFY_RETURN,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fexit.s/", TRACING,
+ .expected_attach_type = BPF_TRACE_FEXIT,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_trace),
SEC_DEF("freplace/", EXT,
.is_attach_btf = true,
.attach_fn = attach_trace),
@@ -6917,6 +8289,11 @@ static const struct bpf_sec_def section_defs[] = {
.is_attach_btf = true,
.expected_attach_type = BPF_LSM_MAC,
.attach_fn = attach_lsm),
+ SEC_DEF("lsm.s/", LSM,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .expected_attach_type = BPF_LSM_MAC,
+ .attach_fn = attach_lsm),
SEC_DEF("iter/", TRACING,
.expected_attach_type = BPF_TRACE_ITER,
.is_attach_btf = true,
@@ -6925,7 +8302,7 @@ static const struct bpf_sec_def section_defs[] = {
BPF_XDP_DEVMAP),
BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
BPF_XDP_CPUMAP),
- BPF_EAPROG_SEC("xdp", BPF_PROG_TYPE_XDP,
+ BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP,
BPF_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -7100,7 +8477,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
const struct btf *btf;
struct bpf_map *map;
Elf_Data *symbols;
- unsigned int moff;
+ unsigned int moff, insn_idx;
const char *name;
__u32 member_idx;
GElf_Sym sym;
@@ -7122,8 +8499,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
return -LIBBPF_ERRNO__FORMAT;
}
- name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name) ? : "<?>";
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
map = find_struct_ops_map_by_offset(obj, rel.r_offset);
if (!map) {
pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
@@ -7146,6 +8522,12 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
map->name, (size_t)rel.r_offset, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
+ if (sym.st_value % BPF_INSN_SZ) {
+ pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
+ map->name, (unsigned long long)sym.st_value);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ insn_idx = sym.st_value / BPF_INSN_SZ;
member = find_member_by_offset(st_ops->type, moff * 8);
if (!member) {
@@ -7162,7 +8544,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
return -EINVAL;
}
- prog = bpf_object__find_prog_by_idx(obj, shdr_idx);
+ prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
if (!prog) {
pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
map->name, shdr_idx, name);
@@ -7172,7 +8554,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
if (prog->type == BPF_PROG_TYPE_UNSPEC) {
const struct bpf_sec_def *sec_def;
- sec_def = find_sec_def(prog->section_name);
+ sec_def = find_sec_def(prog->sec_name);
if (sec_def &&
sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
/* for pr_warn */
@@ -7195,7 +8577,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
invalid_prog:
pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
- map->name, prog->name, prog->section_name, prog->type,
+ map->name, prog->name, prog->sec_name, prog->type,
prog->attach_btf_id, prog->expected_attach_type, name);
return -EINVAL;
}
@@ -7299,7 +8681,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)
{
enum bpf_attach_type attach_type = prog->expected_attach_type;
__u32 attach_prog_fd = prog->attach_prog_fd;
- const char *name = prog->section_name;
+ const char *name = prog->sec_name;
int i, err;
if (!name)
@@ -7640,7 +9022,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
prog->prog_ifindex = attr->ifindex;
prog->log_level = attr->log_level;
- prog->prog_flags = attr->prog_flags;
+ prog->prog_flags |= attr->prog_flags;
if (!first_prog)
first_prog = prog;
}
@@ -7826,14 +9208,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
int prog_fd, err;
if (pfd < 0) {
- pr_warn("program '%s': invalid perf event FD %d\n",
- bpf_program__title(prog, false), pfd);
+ pr_warn("prog '%s': invalid perf event FD %d\n",
+ prog->name, pfd);
return ERR_PTR(-EINVAL);
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
+ prog->name);
return ERR_PTR(-EINVAL);
}
@@ -7846,20 +9228,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
err = -errno;
free(link);
- pr_warn("program '%s': failed to attach to pfd %d: %s\n",
- bpf_program__title(prog, false), pfd,
- libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
if (err == -EPROTO)
- pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
- bpf_program__title(prog, false), pfd);
+ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+ prog->name, pfd);
return ERR_PTR(err);
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
err = -errno;
free(link);
- pr_warn("program '%s': failed to enable pfd %d: %s\n",
- bpf_program__title(prog, false), pfd,
- libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to enable pfd %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return ERR_PTR(err);
}
return link;
@@ -7981,9 +9361,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
0 /* offset */, -1 /* pid */);
if (pfd < 0) {
- pr_warn("program '%s': failed to create %s '%s' perf event: %s\n",
- bpf_program__title(prog, false),
- retprobe ? "kretprobe" : "kprobe", func_name,
+ pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
@@ -7991,9 +9370,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
if (IS_ERR(link)) {
close(pfd);
err = PTR_ERR(link);
- pr_warn("program '%s': failed to attach to %s '%s': %s\n",
- bpf_program__title(prog, false),
- retprobe ? "kretprobe" : "kprobe", func_name,
+ pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return link;
}
@@ -8006,7 +9384,7 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
const char *func_name;
bool retprobe;
- func_name = bpf_program__title(prog, false) + sec->len;
+ func_name = prog->sec_name + sec->len;
retprobe = strcmp(sec->sec, "kretprobe/") == 0;
return bpf_program__attach_kprobe(prog, retprobe, func_name);
@@ -8024,9 +9402,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
pfd = perf_event_open_probe(true /* uprobe */, retprobe,
binary_path, func_offset, pid);
if (pfd < 0) {
- pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
- bpf_program__title(prog, false),
- retprobe ? "uretprobe" : "uprobe",
+ pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+ prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
@@ -8035,9 +9412,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
if (IS_ERR(link)) {
close(pfd);
err = PTR_ERR(link);
- pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
- bpf_program__title(prog, false),
- retprobe ? "uretprobe" : "uprobe",
+ pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
+ prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return link;
@@ -8105,9 +9481,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
pfd = perf_event_open_tracepoint(tp_category, tp_name);
if (pfd < 0) {
- pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
- bpf_program__title(prog, false),
- tp_category, tp_name,
+ pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
+ prog->name, tp_category, tp_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
@@ -8115,9 +9490,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
if (IS_ERR(link)) {
close(pfd);
err = PTR_ERR(link);
- pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
- bpf_program__title(prog, false),
- tp_category, tp_name,
+ pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
+ prog->name, tp_category, tp_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return link;
}
@@ -8130,7 +9504,7 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
char *sec_name, *tp_cat, *tp_name;
struct bpf_link *link;
- sec_name = strdup(bpf_program__title(prog, false));
+ sec_name = strdup(prog->sec_name);
if (!sec_name)
return ERR_PTR(-ENOMEM);
@@ -8159,8 +9533,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -8173,9 +9546,8 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
if (pfd < 0) {
pfd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n",
- bpf_program__title(prog, false), tp_name,
- libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
+ prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
link->fd = pfd;
@@ -8185,7 +9557,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
- const char *tp_name = bpf_program__title(prog, false) + sec->len;
+ const char *tp_name = prog->sec_name + sec->len;
return bpf_program__attach_raw_tracepoint(prog, tp_name);
}
@@ -8199,8 +9571,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -8213,9 +9584,8 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
if (pfd < 0) {
pfd = -errno;
free(link);
- pr_warn("program '%s': failed to attach: %s\n",
- bpf_program__title(prog, false),
- libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach: %s\n",
+ prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
link->fd = pfd;
@@ -8251,9 +9621,11 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
}
static struct bpf_link *
-bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
+bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
const char *target_name)
{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
+ .target_btf_id = btf_id);
enum bpf_attach_type attach_type;
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
@@ -8261,8 +9633,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -8272,12 +9643,12 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
link->detach = &bpf_link__detach_fd;
attach_type = bpf_program__get_expected_attach_type(prog);
- link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL);
+ link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
if (link_fd < 0) {
link_fd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to %s: %s\n",
- bpf_program__title(prog, false), target_name,
+ pr_warn("prog '%s': failed to attach to %s: %s\n",
+ prog->name, target_name,
libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
return ERR_PTR(link_fd);
}
@@ -8288,19 +9659,51 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
struct bpf_link *
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
{
- return bpf_program__attach_fd(prog, cgroup_fd, "cgroup");
+ return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
}
struct bpf_link *
bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
{
- return bpf_program__attach_fd(prog, netns_fd, "netns");
+ return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
}
struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
{
/* target_fd/target_ifindex use the same field in LINK_CREATE */
- return bpf_program__attach_fd(prog, ifindex, "xdp");
+ return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
+}
+
+struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
+ int target_fd,
+ const char *attach_func_name)
+{
+ int btf_id;
+
+ if (!!target_fd != !!attach_func_name) {
+ pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
+ prog->name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (prog->type != BPF_PROG_TYPE_EXT) {
+ pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
+ prog->name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (target_fd) {
+ btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
+ if (btf_id < 0)
+ return ERR_PTR(btf_id);
+
+ return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
+ } else {
+ /* no target, so use raw_tracepoint_open for compatibility
+ * with old kernels
+ */
+ return bpf_program__attach_trace(prog);
+ }
}
struct bpf_link *
@@ -8321,8 +9724,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -8336,9 +9738,8 @@ bpf_program__attach_iter(struct bpf_program *prog,
if (link_fd < 0) {
link_fd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to iterator: %s\n",
- bpf_program__title(prog, false),
- libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach to iterator: %s\n",
+ prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
return ERR_PTR(link_fd);
}
link->fd = link_fd;
@@ -8349,7 +9750,7 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{
const struct bpf_sec_def *sec_def;
- sec_def = find_sec_def(bpf_program__title(prog, false));
+ sec_def = find_sec_def(prog->sec_name);
if (!sec_def || !sec_def->attach_fn)
return ERR_PTR(-ESRCH);
@@ -8594,7 +9995,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params p = {};
struct perf_event_attr attr = { 0, };
- attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
@@ -8832,6 +10233,11 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
return 0;
}
+int perf_buffer__epoll_fd(const struct perf_buffer *pb)
+{
+ return pb->epoll_fd;
+}
+
int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
{
int i, cnt, err;
@@ -8849,6 +10255,55 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
return cnt < 0 ? -errno : cnt;
}
+/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
+ * manager.
+ */
+size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
+{
+ return pb->cpu_cnt;
+}
+
+/*
+ * Return perf_event FD of a ring buffer in *buf_idx* slot of
+ * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
+ * select()/poll()/epoll() Linux syscalls.
+ */
+int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
+{
+ struct perf_cpu_buf *cpu_buf;
+
+ if (buf_idx >= pb->cpu_cnt)
+ return -EINVAL;
+
+ cpu_buf = pb->cpu_bufs[buf_idx];
+ if (!cpu_buf)
+ return -ENOENT;
+
+ return cpu_buf->fd;
+}
+
+/*
+ * Consume data from perf ring buffer corresponding to slot *buf_idx* in
+ * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
+ * consume, do nothing and return success.
+ * Returns:
+ * - 0 on success;
+ * - <0 on failure.
+ */
+int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
+{
+ struct perf_cpu_buf *cpu_buf;
+
+ if (buf_idx >= pb->cpu_cnt)
+ return -EINVAL;
+
+ cpu_buf = pb->cpu_bufs[buf_idx];
+ if (!cpu_buf)
+ return -ENOENT;
+
+ return perf_buffer__process_records(pb, cpu_buf);
+}
+
int perf_buffer__consume(struct perf_buffer *pb)
{
int i, err;
@@ -8861,7 +10316,7 @@ int perf_buffer__consume(struct perf_buffer *pb)
err = perf_buffer__process_records(pb, cpu_buf);
if (err) {
- pr_warn("error while processing records: %d\n", err);
+ pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
return err;
}
}
@@ -9129,9 +10584,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
btf_id = libbpf_find_prog_btf_id(attach_func_name,
attach_prog_fd);
else
- btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
- attach_func_name,
- prog->expected_attach_type);
+ btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
+ prog->expected_attach_type);
if (btf_id < 0)
return btf_id;
@@ -9365,12 +10819,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
struct bpf_program *prog = *s->progs[i].prog;
struct bpf_link **link = s->progs[i].link;
const struct bpf_sec_def *sec_def;
- const char *sec_name = bpf_program__title(prog, false);
if (!prog->load)
continue;
- sec_def = find_sec_def(sec_name);
+ sec_def = find_sec_def(prog->sec_name);
if (!sec_def || !sec_def->attach_fn)
continue;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5ecb4069a9f0..6909ee81113a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -198,8 +198,9 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
__u32 ifindex);
LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
-LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
- bool needs_copy);
+LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
+LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead")
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);
LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
@@ -260,6 +261,9 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
LIBBPF_API struct bpf_link *
bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_freplace(struct bpf_program *prog,
+ int target_fd, const char *attach_func_name);
struct bpf_map;
@@ -588,8 +592,12 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
const struct perf_buffer_raw_opts *opts);
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx);
+LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
typedef enum bpf_perf_event_ret
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index e35bd6cdbdbf..4ebfadf45b47 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -299,3 +299,41 @@ LIBBPF_0.1.0 {
btf__set_fd;
btf__set_pointer_size;
} LIBBPF_0.0.9;
+
+LIBBPF_0.2.0 {
+ global:
+ bpf_prog_bind_map;
+ bpf_prog_test_run_opts;
+ bpf_program__attach_freplace;
+ bpf_program__section_name;
+ btf__add_array;
+ btf__add_const;
+ btf__add_enum;
+ btf__add_enum_value;
+ btf__add_datasec;
+ btf__add_datasec_var_info;
+ btf__add_field;
+ btf__add_func;
+ btf__add_func_param;
+ btf__add_func_proto;
+ btf__add_fwd;
+ btf__add_int;
+ btf__add_ptr;
+ btf__add_restrict;
+ btf__add_str;
+ btf__add_struct;
+ btf__add_typedef;
+ btf__add_union;
+ btf__add_var;
+ btf__add_volatile;
+ btf__endianness;
+ btf__find_str;
+ btf__new_empty;
+ btf__set_endianness;
+ btf__str_by_offset;
+ perf_buffer__buffer_cnt;
+ perf_buffer__buffer_fd;
+ perf_buffer__epoll_fd;
+ perf_buffer__consume_buffer;
+ xsk_socket__create_shared;
+} LIBBPF_0.1.0;
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index a23ae1ac27eb..947d8bd8a7bb 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -15,6 +15,8 @@
#define LIBBPF_API __attribute__((visibility("default")))
#endif
+#define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg)))
+
/* Helper macro to declare and initialize libbpf options struct
*
* This dance with uninitialized declaration, followed by memset to zero,
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 50d70e90d5f1..d99bc847bf84 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -9,6 +9,15 @@
#ifndef __LIBBPF_LIBBPF_INTERNAL_H
#define __LIBBPF_LIBBPF_INTERNAL_H
+#include <stdlib.h>
+#include <limits.h>
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
#include "libbpf.h"
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
@@ -23,6 +32,12 @@
#define BTF_PARAM_ENC(name, type) (name), (type)
#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+#ifndef likely
+#define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
#ifndef min
# define min(x, y) ((x) < (y) ? (x) : (y))
#endif
@@ -63,6 +78,37 @@ do { \
#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+/*
+ * Re-implement glibc's reallocarray() for libbpf internal-only use.
+ * reallocarray(), unfortunately, is not available in all versions of glibc,
+ * so requires extra feature detection and using reallocarray() stub from
+ * <tools/libc_compat.h> and COMPAT_NEED_REALLOCARRAY. All this complicates
+ * build of libbpf unnecessarily and is just a maintenance burden. Instead,
+ * it's trivial to implement libbpf-specific internal version and use it
+ * throughout libbpf.
+ */
+static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
+{
+ size_t total;
+
+#if __has_builtin(__builtin_mul_overflow)
+ if (unlikely(__builtin_mul_overflow(nmemb, size, &total)))
+ return NULL;
+#else
+ if (size == 0 || nmemb > ULONG_MAX / size)
+ return NULL;
+ total = nmemb * size;
+#endif
+ return realloc(ptr, total);
+}
+
+void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt);
+int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
const char *type_name)
@@ -94,6 +140,11 @@ static inline bool libbpf_validate_opts(const char *opts,
((opts) && opts->sz >= offsetofend(typeof(*(opts)), field))
#define OPTS_GET(opts, field, fallback_value) \
(OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
+#define OPTS_SET(opts, field, value) \
+ do { \
+ if (OPTS_HAS(opts, field)) \
+ (opts)->field = value; \
+ } while (0)
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
@@ -105,18 +156,6 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
__u32 *off);
-struct nlattr;
-typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
-int libbpf_netlink_open(unsigned int *nl_pid);
-int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
-int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
-int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
-int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
- libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
-
struct btf_ext_info {
/*
* info points to the individual info section (e.g. func_info and
@@ -138,6 +177,44 @@ struct btf_ext_info {
i < (sec)->num_info; \
i++, rec = (void *)rec + (seg)->rec_size)
+/*
+ * The .BTF.ext ELF section layout defined as
+ * struct btf_ext_header
+ * func_info subsection
+ *
+ * The func_info subsection layout:
+ * record size for struct bpf_func_info in the func_info subsection
+ * struct btf_sec_func_info for section #1
+ * a list of bpf_func_info records for section #1
+ * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
+ * but may not be identical
+ * struct btf_sec_func_info for section #2
+ * a list of bpf_func_info records for section #2
+ * ......
+ *
+ * Note that the bpf_func_info record size in .BTF.ext may not
+ * be the same as the one defined in include/uapi/linux/bpf.h.
+ * The loader should ensure that record_size meets minimum
+ * requirement and pass the record as is to the kernel. The
+ * kernel will handle the func_info properly based on its contents.
+ */
+struct btf_ext_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 func_info_off;
+ __u32 func_info_len;
+ __u32 line_info_off;
+ __u32 line_info_len;
+
+ /* optional part of .BTF.ext header */
+ __u32 core_relo_off;
+ __u32 core_relo_len;
+};
+
struct btf_ext {
union {
struct btf_ext_header *hdr;
@@ -145,7 +222,7 @@ struct btf_ext {
};
struct btf_ext_info func_info;
struct btf_ext_info line_info;
- struct btf_ext_info field_reloc_info;
+ struct btf_ext_info core_relo_info;
__u32 data_size;
};
@@ -170,32 +247,40 @@ struct bpf_line_info_min {
__u32 line_col;
};
-/* bpf_field_info_kind encodes which aspect of captured field has to be
- * adjusted by relocations. Currently supported values are:
- * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes);
- * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise);
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
*/
-enum bpf_field_info_kind {
+enum bpf_core_relo_kind {
BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
- BPF_FIELD_BYTE_SIZE = 1,
+ BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
- BPF_FIELD_SIGNED = 3,
- BPF_FIELD_LSHIFT_U64 = 4,
- BPF_FIELD_RSHIFT_U64 = 5,
+ BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
+ BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
+ BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
+ BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
+ BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
+ BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
+ BPF_TYPE_SIZE = 9, /* type size in bytes */
+ BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
+ BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
};
-/* The minimum bpf_field_reloc checked by the loader
+/* The minimum bpf_core_relo checked by the loader
*
- * Field relocation captures the following data:
+ * CO-RE relocation captures the following data:
* - insn_off - instruction offset (in bytes) within a BPF program that needs
* its insn->imm field to be relocated with actual field info;
* - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- * field;
+ * type or field;
* - access_str_off - offset into corresponding .BTF string section. String
- * itself encodes an accessed field using a sequence of field and array
- * indicies, separated by colon (:). It's conceptually very close to LLVM's
- * getelementptr ([0]) instruction's arguments for identifying offset to
- * a field.
+ * interpretation depends on specific relocation kind:
+ * - for field-based relocations, string encodes an accessed field using
+ * a sequence of field and array indices, separated by colon (:). It's
+ * conceptually very close to LLVM's getelementptr ([0]) instruction's
+ * arguments for identifying offset to a field.
+ * - for type-based relocations, strings is expected to be just "0";
+ * - for enum value-based relocations, string contains an index of enum
+ * value within its enum type;
*
* Example to provide a better feel.
*
@@ -226,11 +311,11 @@ enum bpf_field_info_kind {
*
* [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
*/
-struct bpf_field_reloc {
+struct bpf_core_relo {
__u32 insn_off;
__u32 type_id;
__u32 access_str_off;
- enum bpf_field_info_kind kind;
+ enum bpf_core_relo_kind kind;
};
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 5a3d3f078408..5482a9b7ae2d 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -17,9 +17,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
static bool grep(const char *buffer, const char *pattern)
{
return !!strstr(buffer, pattern);
@@ -173,7 +170,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
return btf_fd;
}
-static int load_sk_storage_btf(void)
+static int load_local_storage_btf(void)
{
const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
/* struct bpf_spin_lock {
@@ -232,12 +229,13 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
key_size = 0;
break;
case BPF_MAP_TYPE_SK_STORAGE:
+ case BPF_MAP_TYPE_INODE_STORAGE:
btf_key_type_id = 1;
btf_value_type_id = 3;
value_size = 8;
max_entries = 0;
map_flags = BPF_F_NO_PREALLOC;
- btf_fd = load_sk_storage_btf();
+ btf_fd = load_local_storage_btf();
if (btf_fd < 0)
return false;
break;
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 312f887570b2..4dd73de00b6f 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -15,13 +15,12 @@
#include "libbpf_internal.h"
#include "nlattr.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#ifndef SOL_NETLINK
#define SOL_NETLINK 270
#endif
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+
typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
void *cookie);
@@ -31,7 +30,7 @@ struct xdp_id_md {
struct xdp_link_info info;
};
-int libbpf_netlink_open(__u32 *nl_pid)
+static int libbpf_netlink_open(__u32 *nl_pid)
{
struct sockaddr_nl sa;
socklen_t addrlen;
@@ -283,6 +282,9 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
return 0;
}
+static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+
int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags)
{
@@ -368,121 +370,3 @@ int libbpf_nl_get_link(int sock, unsigned int nl_pid,
return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
dump_link_nlmsg, cookie);
}
-
-static int __dump_class_nlmsg(struct nlmsghdr *nlh,
- libbpf_dump_nlmsg_t dump_class_nlmsg,
- void *cookie)
-{
- struct nlattr *tb[TCA_MAX + 1], *attr;
- struct tcmsg *t = NLMSG_DATA(nlh);
- int len;
-
- len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
- attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
- if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
- return -LIBBPF_ERRNO__NLPARSE;
-
- return dump_class_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie)
-{
- struct {
- struct nlmsghdr nlh;
- struct tcmsg t;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
- .nlh.nlmsg_type = RTM_GETTCLASS,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .t.tcm_family = AF_UNSPEC,
- .t.tcm_ifindex = ifindex,
- };
- int seq = time(NULL);
-
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
- return -errno;
-
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
- dump_class_nlmsg, cookie);
-}
-
-static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg,
- void *cookie)
-{
- struct nlattr *tb[TCA_MAX + 1], *attr;
- struct tcmsg *t = NLMSG_DATA(nlh);
- int len;
-
- len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
- attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
- if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
- return -LIBBPF_ERRNO__NLPARSE;
-
- return dump_qdisc_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
-{
- struct {
- struct nlmsghdr nlh;
- struct tcmsg t;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
- .nlh.nlmsg_type = RTM_GETQDISC,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .t.tcm_family = AF_UNSPEC,
- .t.tcm_ifindex = ifindex,
- };
- int seq = time(NULL);
-
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
- return -errno;
-
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
- dump_qdisc_nlmsg, cookie);
-}
-
-static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
- libbpf_dump_nlmsg_t dump_filter_nlmsg,
- void *cookie)
-{
- struct nlattr *tb[TCA_MAX + 1], *attr;
- struct tcmsg *t = NLMSG_DATA(nlh);
- int len;
-
- len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
- attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
- if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
- return -LIBBPF_ERRNO__NLPARSE;
-
- return dump_filter_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
- libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie)
-{
- struct {
- struct nlmsghdr nlh;
- struct tcmsg t;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
- .nlh.nlmsg_type = RTM_GETTFILTER,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .t.tcm_family = AF_UNSPEC,
- .t.tcm_ifindex = ifindex,
- .t.tcm_parent = handle,
- };
- int seq = time(NULL);
-
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
- return -errno;
-
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
- dump_filter_nlmsg, cookie);
-}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 0ad41dfea8eb..b607fa9852b1 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -7,14 +7,11 @@
*/
#include <errno.h>
-#include "nlattr.h"
-#include "libbpf_internal.h"
-#include <linux/rtnetlink.h>
#include <string.h>
#include <stdio.h>
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+#include <linux/rtnetlink.h>
+#include "nlattr.h"
+#include "libbpf_internal.h"
static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
[LIBBPF_NLA_U8] = sizeof(uint8_t),
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 4fc6c6cbb4eb..5c6522c89af1 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -16,15 +16,11 @@
#include <asm/barrier.h>
#include <sys/mman.h>
#include <sys/epoll.h>
-#include <tools/libc_compat.h>
#include "libbpf.h"
#include "libbpf_internal.h"
#include "bpf.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
struct ring {
ring_buffer_sample_fn sample_cb;
void *ctx;
@@ -82,12 +78,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
return -EINVAL;
}
- tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
+ tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
if (!tmp)
return -ENOMEM;
rb->rings = tmp;
- tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
+ tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
if (!tmp)
return -ENOMEM;
rb->events = tmp;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index f7f4efb70a4c..e3c98c007825 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -20,6 +20,8 @@
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_xdp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/sockios.h>
#include <net/if.h>
#include <sys/ioctl.h>
@@ -32,9 +34,6 @@
#include "libbpf_internal.h"
#include "xsk.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#ifndef SOL_XDP
#define SOL_XDP 283
#endif
@@ -48,26 +47,35 @@
#endif
struct xsk_umem {
- struct xsk_ring_prod *fill;
- struct xsk_ring_cons *comp;
+ struct xsk_ring_prod *fill_save;
+ struct xsk_ring_cons *comp_save;
char *umem_area;
struct xsk_umem_config config;
int fd;
int refcount;
+ struct list_head ctx_list;
+};
+
+struct xsk_ctx {
+ struct xsk_ring_prod *fill;
+ struct xsk_ring_cons *comp;
+ __u32 queue_id;
+ struct xsk_umem *umem;
+ int refcount;
+ int ifindex;
+ struct list_head list;
+ int prog_fd;
+ int xsks_map_fd;
+ char ifname[IFNAMSIZ];
};
struct xsk_socket {
struct xsk_ring_cons *rx;
struct xsk_ring_prod *tx;
__u64 outstanding_tx;
- struct xsk_umem *umem;
+ struct xsk_ctx *ctx;
struct xsk_socket_config config;
int fd;
- int ifindex;
- int prog_fd;
- int xsks_map_fd;
- __u32 queue_id;
- char ifname[IFNAMSIZ];
};
struct xsk_nl_info {
@@ -203,15 +211,73 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
return -EINVAL;
}
+static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xdp_mmap_offsets off;
+ void *map;
+ int err;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+ &umem->config.fill_size,
+ sizeof(umem->config.fill_size));
+ if (err)
+ return -errno;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+ &umem->config.comp_size,
+ sizeof(umem->config.comp_size));
+ if (err)
+ return -errno;
+
+ err = xsk_get_mmap_offsets(fd, &off);
+ if (err)
+ return -errno;
+
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ if (map == MAP_FAILED)
+ return -errno;
+
+ fill->mask = umem->config.fill_size - 1;
+ fill->size = umem->config.fill_size;
+ fill->producer = map + off.fr.producer;
+ fill->consumer = map + off.fr.consumer;
+ fill->flags = map + off.fr.flags;
+ fill->ring = map + off.fr.desc;
+ fill->cached_cons = umem->config.fill_size;
+
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap;
+ }
+
+ comp->mask = umem->config.comp_size - 1;
+ comp->size = umem->config.comp_size;
+ comp->producer = map + off.cr.producer;
+ comp->consumer = map + off.cr.consumer;
+ comp->flags = map + off.cr.flags;
+ comp->ring = map + off.cr.desc;
+
+ return 0;
+
+out_mmap:
+ munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ return err;
+}
+
int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
__u64 size, struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp,
const struct xsk_umem_config *usr_config)
{
- struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
struct xsk_umem *umem;
- void *map;
int err;
if (!umem_area || !umem_ptr || !fill || !comp)
@@ -230,6 +296,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
}
umem->umem_area = umem_area;
+ INIT_LIST_HEAD(&umem->ctx_list);
xsk_set_umem_config(&umem->config, usr_config);
memset(&mr, 0, sizeof(mr));
@@ -244,71 +311,16 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
err = -errno;
goto out_socket;
}
- err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
- &umem->config.fill_size,
- sizeof(umem->config.fill_size));
- if (err) {
- err = -errno;
- goto out_socket;
- }
- err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
- &umem->config.comp_size,
- sizeof(umem->config.comp_size));
- if (err) {
- err = -errno;
- goto out_socket;
- }
- err = xsk_get_mmap_offsets(umem->fd, &off);
- if (err) {
- err = -errno;
- goto out_socket;
- }
-
- map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
- XDP_UMEM_PGOFF_FILL_RING);
- if (map == MAP_FAILED) {
- err = -errno;
+ err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
+ if (err)
goto out_socket;
- }
-
- umem->fill = fill;
- fill->mask = umem->config.fill_size - 1;
- fill->size = umem->config.fill_size;
- fill->producer = map + off.fr.producer;
- fill->consumer = map + off.fr.consumer;
- fill->flags = map + off.fr.flags;
- fill->ring = map + off.fr.desc;
- fill->cached_prod = *fill->producer;
- /* cached_cons is "size" bigger than the real consumer pointer
- * See xsk_prod_nb_free
- */
- fill->cached_cons = *fill->consumer + umem->config.fill_size;
-
- map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
- XDP_UMEM_PGOFF_COMPLETION_RING);
- if (map == MAP_FAILED) {
- err = -errno;
- goto out_mmap;
- }
-
- umem->comp = comp;
- comp->mask = umem->config.comp_size - 1;
- comp->size = umem->config.comp_size;
- comp->producer = map + off.cr.producer;
- comp->consumer = map + off.cr.consumer;
- comp->flags = map + off.cr.flags;
- comp->ring = map + off.cr.desc;
- comp->cached_prod = *comp->producer;
- comp->cached_cons = *comp->consumer;
+ umem->fill_save = fill;
+ umem->comp_save = comp;
*umem_ptr = umem;
return 0;
-out_mmap:
- munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
out_socket:
close(umem->fd);
out_umem_alloc:
@@ -342,6 +354,7 @@ DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
static int xsk_load_xdp_prog(struct xsk_socket *xsk)
{
static const int log_buf_size = 16 * 1024;
+ struct xsk_ctx *ctx = xsk->ctx;
char log_buf[log_buf_size];
int err, prog_fd;
@@ -369,7 +382,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* *(u32 *)(r10 - 4) = r2 */
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
/* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* r3 = XDP_PASS */
BPF_MOV64_IMM(BPF_REG_3, 2),
/* call bpf_redirect_map */
@@ -381,7 +394,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* r2 += -4 */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
/* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* call bpf_map_lookup_elem */
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
/* r1 = r0 */
@@ -393,7 +406,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* r2 = *(u32 *)(r10 - 4) */
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
/* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* r3 = 0 */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* call bpf_redirect_map */
@@ -411,19 +424,21 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
return prog_fd;
}
- err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
+ err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
+ xsk->config.xdp_flags);
if (err) {
close(prog_fd);
return err;
}
- xsk->prog_fd = prog_fd;
+ ctx->prog_fd = prog_fd;
return 0;
}
static int xsk_get_max_queues(struct xsk_socket *xsk)
{
struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+ struct xsk_ctx *ctx = xsk->ctx;
struct ifreq ifr = {};
int fd, err, ret;
@@ -432,7 +447,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
return -errno;
ifr.ifr_data = (void *)&channels;
- memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
+ memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);
ifr.ifr_name[IFNAMSIZ - 1] = '\0';
err = ioctl(fd, SIOCETHTOOL, &ifr);
if (err && errno != EOPNOTSUPP) {
@@ -460,6 +475,7 @@ out:
static int xsk_create_bpf_maps(struct xsk_socket *xsk)
{
+ struct xsk_ctx *ctx = xsk->ctx;
int max_queues;
int fd;
@@ -472,15 +488,17 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
if (fd < 0)
return fd;
- xsk->xsks_map_fd = fd;
+ ctx->xsks_map_fd = fd;
return 0;
}
static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
{
- bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
- close(xsk->xsks_map_fd);
+ struct xsk_ctx *ctx = xsk->ctx;
+
+ bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
+ close(ctx->xsks_map_fd);
}
static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
@@ -488,10 +506,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
__u32 map_len = sizeof(struct bpf_map_info);
struct bpf_prog_info prog_info = {};
+ struct xsk_ctx *ctx = xsk->ctx;
struct bpf_map_info map_info;
int fd, err;
- err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
if (err)
return err;
@@ -505,11 +524,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
prog_info.nr_map_ids = num_maps;
prog_info.map_ids = (__u64)(unsigned long)map_ids;
- err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
if (err)
goto out_map_ids;
- xsk->xsks_map_fd = -1;
+ ctx->xsks_map_fd = -1;
for (i = 0; i < prog_info.nr_map_ids; i++) {
fd = bpf_map_get_fd_by_id(map_ids[i]);
@@ -523,7 +542,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
}
if (!strcmp(map_info.name, "xsks_map")) {
- xsk->xsks_map_fd = fd;
+ ctx->xsks_map_fd = fd;
continue;
}
@@ -531,7 +550,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
}
err = 0;
- if (xsk->xsks_map_fd == -1)
+ if (ctx->xsks_map_fd == -1)
err = -ENOENT;
out_map_ids:
@@ -541,16 +560,19 @@ out_map_ids:
static int xsk_set_bpf_maps(struct xsk_socket *xsk)
{
- return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
+ struct xsk_ctx *ctx = xsk->ctx;
+
+ return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
&xsk->fd, 0);
}
static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
{
+ struct xsk_ctx *ctx = xsk->ctx;
__u32 prog_id = 0;
int err;
- err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
+ err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
xsk->config.xdp_flags);
if (err)
return err;
@@ -566,12 +588,12 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
return err;
}
} else {
- xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (xsk->prog_fd < 0)
+ ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (ctx->prog_fd < 0)
return -errno;
err = xsk_lookup_bpf_maps(xsk);
if (err) {
- close(xsk->prog_fd);
+ close(ctx->prog_fd);
return err;
}
}
@@ -580,23 +602,108 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
err = xsk_set_bpf_maps(xsk);
if (err) {
xsk_delete_bpf_maps(xsk);
- close(xsk->prog_fd);
+ close(ctx->prog_fd);
return err;
}
return 0;
}
-int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
- __u32 queue_id, struct xsk_umem *umem,
- struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
- const struct xsk_socket_config *usr_config)
+static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
+ __u32 queue_id)
+{
+ struct xsk_ctx *ctx;
+
+ if (list_empty(&umem->ctx_list))
+ return NULL;
+
+ list_for_each_entry(ctx, &umem->ctx_list, list) {
+ if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
+ ctx->refcount++;
+ return ctx;
+ }
+ }
+
+ return NULL;
+}
+
+static void xsk_put_ctx(struct xsk_ctx *ctx)
+{
+ struct xsk_umem *umem = ctx->umem;
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (--ctx->refcount == 0) {
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err) {
+ munmap(ctx->fill->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size *
+ sizeof(__u64));
+ munmap(ctx->comp->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size *
+ sizeof(__u64));
+ }
+
+ list_del(&ctx->list);
+ free(ctx);
+ }
+}
+
+static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
+ struct xsk_umem *umem, int ifindex,
+ const char *ifname, __u32 queue_id,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xsk_ctx *ctx;
+ int err;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ if (!umem->fill_save) {
+ err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
+ if (err) {
+ free(ctx);
+ return NULL;
+ }
+ } else if (umem->fill_save != fill || umem->comp_save != comp) {
+ /* Copy over rings to new structs. */
+ memcpy(fill, umem->fill_save, sizeof(*fill));
+ memcpy(comp, umem->comp_save, sizeof(*comp));
+ }
+
+ ctx->ifindex = ifindex;
+ ctx->refcount = 1;
+ ctx->umem = umem;
+ ctx->queue_id = queue_id;
+ memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+ ctx->ifname[IFNAMSIZ - 1] = '\0';
+
+ umem->fill_save = NULL;
+ umem->comp_save = NULL;
+ ctx->fill = fill;
+ ctx->comp = comp;
+ list_add(&ctx->list, &umem->ctx_list);
+ return ctx;
+}
+
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *usr_config)
{
void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
struct xsk_socket *xsk;
- int err;
+ struct xsk_ctx *ctx;
+ int err, ifindex;
if (!umem || !xsk_ptr || !(rx || tx))
return -EFAULT;
@@ -609,10 +716,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
if (err)
goto out_xsk_alloc;
- if (umem->refcount &&
- !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
- pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n");
- err = -EBUSY;
+ xsk->outstanding_tx = 0;
+ ifindex = if_nametoindex(ifname);
+ if (!ifindex) {
+ err = -errno;
goto out_xsk_alloc;
}
@@ -626,16 +733,21 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->fd = umem->fd;
}
- xsk->outstanding_tx = 0;
- xsk->queue_id = queue_id;
- xsk->umem = umem;
- xsk->ifindex = if_nametoindex(ifname);
- if (!xsk->ifindex) {
- err = -errno;
- goto out_socket;
+ ctx = xsk_get_ctx(umem, ifindex, queue_id);
+ if (!ctx) {
+ if (!fill || !comp) {
+ err = -EFAULT;
+ goto out_socket;
+ }
+
+ ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
+ fill, comp);
+ if (!ctx) {
+ err = -ENOMEM;
+ goto out_socket;
+ }
}
- memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);
- xsk->ifname[IFNAMSIZ - 1] = '\0';
+ xsk->ctx = ctx;
if (rx) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -643,7 +755,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
sizeof(xsk->config.rx_size));
if (err) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
}
if (tx) {
@@ -652,14 +764,14 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
sizeof(xsk->config.tx_size));
if (err) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
if (err) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
if (rx) {
@@ -669,7 +781,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->fd, XDP_PGOFF_RX_RING);
if (rx_map == MAP_FAILED) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
rx->mask = xsk->config.rx_size - 1;
@@ -708,10 +820,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->tx = tx;
sxdp.sxdp_family = PF_XDP;
- sxdp.sxdp_ifindex = xsk->ifindex;
- sxdp.sxdp_queue_id = xsk->queue_id;
+ sxdp.sxdp_ifindex = ctx->ifindex;
+ sxdp.sxdp_queue_id = ctx->queue_id;
if (umem->refcount > 1) {
- sxdp.sxdp_flags = XDP_SHARED_UMEM;
+ sxdp.sxdp_flags |= XDP_SHARED_UMEM;
sxdp.sxdp_shared_umem_fd = umem->fd;
} else {
sxdp.sxdp_flags = xsk->config.bind_flags;
@@ -723,7 +835,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
goto out_mmap_tx;
}
- xsk->prog_fd = -1;
+ ctx->prog_fd = -1;
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk);
@@ -742,6 +854,8 @@ out_mmap_rx:
if (rx)
munmap(rx_map, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc));
+out_put_ctx:
+ xsk_put_ctx(ctx);
out_socket:
if (--umem->refcount)
close(xsk->fd);
@@ -750,25 +864,24 @@ out_xsk_alloc:
return err;
}
-int xsk_umem__delete(struct xsk_umem *umem)
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *usr_config)
{
- struct xdp_mmap_offsets off;
- int err;
+ return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
+ rx, tx, umem->fill_save,
+ umem->comp_save, usr_config);
+}
+int xsk_umem__delete(struct xsk_umem *umem)
+{
if (!umem)
return 0;
if (umem->refcount)
return -EBUSY;
- err = xsk_get_mmap_offsets(umem->fd, &off);
- if (!err) {
- munmap(umem->fill->ring - off.fr.desc,
- off.fr.desc + umem->config.fill_size * sizeof(__u64));
- munmap(umem->comp->ring - off.cr.desc,
- off.cr.desc + umem->config.comp_size * sizeof(__u64));
- }
-
close(umem->fd);
free(umem);
@@ -778,15 +891,16 @@ int xsk_umem__delete(struct xsk_umem *umem)
void xsk_socket__delete(struct xsk_socket *xsk)
{
size_t desc_sz = sizeof(struct xdp_desc);
+ struct xsk_ctx *ctx = xsk->ctx;
struct xdp_mmap_offsets off;
int err;
if (!xsk)
return;
- if (xsk->prog_fd != -1) {
+ if (ctx->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
- close(xsk->prog_fd);
+ close(ctx->prog_fd);
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
@@ -799,14 +913,15 @@ void xsk_socket__delete(struct xsk_socket *xsk)
munmap(xsk->tx->ring - off.tx.desc,
off.tx.desc + xsk->config.tx_size * desc_sz);
}
-
}
- xsk->umem->refcount--;
+ xsk_put_ctx(ctx);
+
+ ctx->umem->refcount--;
/* Do not close an fd that also has an associated umem connected
* to it.
*/
- if (xsk->fd != xsk->umem->fd)
+ if (xsk->fd != ctx->umem->fd)
close(xsk->fd);
free(xsk);
}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 584f6820a639..1069c46364ff 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -234,6 +234,15 @@ LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
struct xsk_ring_cons *rx,
struct xsk_ring_prod *tx,
const struct xsk_socket_config *config);
+LIBBPF_API int
+xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *config);
/* Returns 0 for success and -EBUSY if the umem is still in use. */
LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 2859f107abc8..bf02d62a3b2b 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -65,12 +65,14 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
ci = cj = ei = 0;
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
- if (cmp < 0)
+ if (cmp < 0) {
cmds->names[cj++] = cmds->names[ci++];
- else if (cmp == 0)
- ci++, ei++;
- else if (cmp > 0)
+ } else if (cmp == 0) {
+ ci++;
ei++;
+ } else if (cmp > 0) {
+ ei++;
+ }
}
while (ci < cmds->cnt)
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 33ba98d72b16..99d00870b160 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -3,9 +3,9 @@
C Self R W RMW Self R W DR DW RMW SV
-- ---- - - --- ---- - - -- -- --- --
-Store, e.g., WRITE_ONCE() Y Y
-Load, e.g., READ_ONCE() Y Y Y Y
-Unsuccessful RMW operation Y Y Y Y
+Relaxed store Y Y
+Relaxed load Y Y Y Y
+Relaxed RMW operation Y Y Y Y
rcu_dereference() Y Y Y Y
Successful *_acquire() R Y Y Y Y Y Y
Successful *_release() C Y Y Y W Y
@@ -17,14 +17,19 @@ smp_mb__before_atomic() CP Y Y Y a a a a Y
smp_mb__after_atomic() CP a a Y Y Y Y Y Y
-Key: C: Ordering is cumulative
- P: Ordering propagates
- R: Read, for example, READ_ONCE(), or read portion of RMW
- W: Write, for example, WRITE_ONCE(), or write portion of RMW
- Y: Provides ordering
- a: Provides ordering given intervening RMW atomic operation
- DR: Dependent read (address dependency)
- DW: Dependent write (address, data, or control dependency)
- RMW: Atomic read-modify-write operation
- SELF: Orders self, as opposed to accesses before and/or after
- SV: Orders later accesses to the same variable
+Key: Relaxed: A relaxed operation is either READ_ONCE(), WRITE_ONCE(),
+ a *_relaxed() RMW operation, an unsuccessful RMW
+ operation, a non-value-returning RMW operation such
+ as atomic_inc(), or one of the atomic*_read() and
+ atomic*_set() family of operations.
+ C: Ordering is cumulative
+ P: Ordering propagates
+ R: Read, for example, READ_ONCE(), or read portion of RMW
+ W: Write, for example, WRITE_ONCE(), or write portion of RMW
+ Y: Provides ordering
+ a: Provides ordering given intervening RMW atomic operation
+ DR: Dependent read (address dependency)
+ DW: Dependent write (address, data, or control dependency)
+ RMW: Atomic read-modify-write operation
+ SELF: Orders self, as opposed to accesses before and/or after
+ SV: Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt
new file mode 100644
index 000000000000..2f840dcd15cf
--- /dev/null
+++ b/tools/memory-model/Documentation/litmus-tests.txt
@@ -0,0 +1,1074 @@
+Linux-Kernel Memory Model Litmus Tests
+======================================
+
+This file describes the LKMM litmus-test format by example, describes
+some tricks and traps, and finally outlines LKMM's limitations. Earlier
+versions of this material appeared in a number of LWN articles, including:
+
+https://lwn.net/Articles/720550/
+ A formal kernel memory-ordering model (part 2)
+https://lwn.net/Articles/608550/
+ Axiomatic validation of memory barriers and atomic instructions
+https://lwn.net/Articles/470681/
+ Validating Memory Barriers and Atomic Instructions
+
+This document presents information in decreasing order of applicability,
+so that, where possible, the information that has proven more commonly
+useful is shown near the beginning.
+
+For information on installing LKMM, including the underlying "herd7"
+tool, please see tools/memory-model/README.
+
+
+Copy-Pasta
+==========
+
+As with other software, it is often better (if less macho) to adapt an
+existing litmus test than it is to create one from scratch. A number
+of litmus tests may be found in the kernel source tree:
+
+ tools/memory-model/litmus-tests/
+ Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available on github
+and kernel.org:
+
+ https://github.com/paulmckrcu/litmus
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+
+The -l and -L arguments to "git grep" can be quite helpful in identifying
+existing litmus tests that are similar to the one you need. But even if
+you start with an existing litmus test, it is still helpful to have a
+good understanding of the litmus-test format.
+
+
+Examples and Format
+===================
+
+This section describes the overall format of litmus tests, starting
+with a small example of the message-passing pattern and moving on to
+more complex examples that illustrate explicit initialization and LKMM's
+minimalistic set of flow-control statements.
+
+
+Message-Passing Example
+-----------------------
+
+This section gives an overview of the format of a litmus test using an
+example based on the common message-passing use case. This use case
+appears often in the Linux kernel. For example, a flag (modeled by "y"
+below) indicates that a buffer (modeled by "x" below) is now completely
+filled in and ready for use. It would be very bad if the consumer saw the
+flag set, but, due to memory misordering, saw old values in the buffer.
+
+This example asks whether smp_store_release() and smp_load_acquire()
+suffices to avoid this bad outcome:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 WRITE_ONCE(*x, 1);
+ 8 smp_store_release(y, 1);
+ 9 }
+10
+11 P1(int *x, int *y)
+12 {
+13 int r0;
+14 int r1;
+15
+16 r0 = smp_load_acquire(y);
+17 r1 = READ_ONCE(*x);
+18 }
+19
+20 exists (1:r0=1 /\ 1:r1=0)
+
+Line 1 starts with "C", which identifies this file as being in the
+LKMM C-language format (which, as we will see, is a small fragment
+of the full C language). The remainder of line 1 is the name of
+the test, which by convention is the filename with the ".litmus"
+suffix stripped. In this case, the actual test may be found in
+tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+in the Linux-kernel source tree.
+
+Mechanically generated litmus tests will often have an optional
+double-quoted comment string on the second line. Such strings are ignored
+when running the test. Yes, you can add your own comments to litmus
+tests, but this is a bit involved due to the use of multiple parsers.
+For now, you can use C-language comments in the C code, and these comments
+may be in either the "/* */" or the "//" style. A later section will
+cover the full litmus-test commenting story.
+
+Line 3 is the initialization section. Because the default initialization
+to zero suffices for this test, the "{}" syntax is used, which mean the
+initialization section is empty. Litmus tests requiring non-default
+initialization must have non-empty initialization sections, as in the
+example that will be presented later in this document.
+
+Lines 5-9 show the first process and lines 11-18 the second process. Each
+process corresponds to a Linux-kernel task (or kthread, workqueue, thread,
+and so on; LKMM discussions often use these terms interchangeably).
+The name of the first process is "P0" and that of the second "P1".
+You can name your processes anything you like as long as the names consist
+of a single "P" followed by a number, and as long as the numbers are
+consecutive starting with zero. This can actually be quite helpful,
+for example, a .litmus file matching "^P1(" but not matching "^P2("
+must contain a two-process litmus test.
+
+The argument list for each function are pointers to the global variables
+used by that function. Unlike normal C-language function parameters, the
+names are significant. The fact that both P0() and P1() have a formal
+parameter named "x" means that these two processes are working with the
+same global variable, also named "x". So the "int *x, int *y" on P0()
+and P1() mean that both processes are working with two shared global
+variables, "x" and "y". Global variables are always passed to processes
+by reference, hence "P0(int *x, int *y)", but *never* "P0(int x, int y)".
+
+P0() has no local variables, but P1() has two of them named "r0" and "r1".
+These names may be freely chosen, but for historical reasons stemming from
+other litmus-test formats, it is conventional to use names consisting of
+"r" followed by a number as shown here. A common bug in litmus tests
+is forgetting to add a global variable to a process's parameter list.
+This will sometimes result in an error message, but can also cause the
+intended global to instead be silently treated as an undeclared local
+variable.
+
+Each process's code is similar to Linux-kernel C, as can be seen on lines
+7-8 and 13-17. This code may use many of the Linux kernel's atomic
+operations, some of its exclusive-lock functions, and some of its RCU
+and SRCU functions. An approximate list of the currently supported
+functions may be found in the linux-kernel.def file.
+
+The P0() process does "WRITE_ONCE(*x, 1)" on line 7. Because "x" is a
+pointer in P0()'s parameter list, this does an unordered store to global
+variable "x". Line 8 does "smp_store_release(y, 1)", and because "y"
+is also in P0()'s parameter list, this does a release store to global
+variable "y".
+
+The P1() process declares two local variables on lines 13 and 14.
+Line 16 does "r0 = smp_load_acquire(y)" which does an acquire load
+from global variable "y" into local variable "r0". Line 17 does a
+"r1 = READ_ONCE(*x)", which does an unordered load from "*x" into local
+variable "r1". Both "x" and "y" are in P1()'s parameter list, so both
+reference the same global variables that are used by P0().
+
+Line 20 is the "exists" assertion expression to evaluate the final state.
+This final state is evaluated after the dust has settled: both processes
+have completed and all of their memory references and memory barriers
+have propagated to all parts of the system. The references to the local
+variables "r0" and "r1" in line 24 must be prefixed with "1:" to specify
+which process they are local to.
+
+Note that the assertion expression is written in the litmus-test
+language rather than in C. For example, single "=" is an equality
+operator rather than an assignment. The "/\" character combination means
+"and". Similarly, "\/" stands for "or". Both of these are ASCII-art
+representations of the corresponding mathematical symbols. Finally,
+"~" stands for "logical not", which is "!" in C, and not to be confused
+with the C-language "~" operator which instead stands for "bitwise not".
+Parentheses may be used to override precedence.
+
+The "exists" assertion on line 20 is satisfied if the consumer sees the
+flag ("y") set but the buffer ("x") as not yet filled in, that is, if P1()
+loaded a value from "x" that was equal to 1 but loaded a value from "y"
+that was still equal to zero.
+
+This example can be checked by running the following command, which
+absolutely must be run from the tools/memory-model directory and from
+this directory only:
+
+herd7 -conf linux-kernel.cfg litmus-tests/MP+pooncerelease+poacquireonce.litmus
+
+The output is the result of something similar to a full state-space
+search, and is as follows:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=0)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.00
+12 Hash=579aaa14d8c35a39429b02e698241d09
+
+The most pertinent line is line 10, which contains "Never 0 3", which
+indicates that the bad result flagged by the "exists" clause never
+happens. This line might instead say "Sometimes" to indicate that the
+bad result happened in some but not all executions, or it might say
+"Always" to indicate that the bad result happened in all executions.
+(The herd7 tool doesn't judge, so it is only an LKMM convention that the
+"exists" clause indicates a bad result. To see this, invert the "exists"
+clause's condition and run the test.) The numbers ("0 3") at the end
+of this line indicate the number of end states satisfying the "exists"
+clause (0) and the number not not satisfying that clause (3).
+
+Another important part of this output is shown in lines 2-5, repeated here:
+
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+
+Line 2 gives the total number of end states, and each of lines 3-5 list
+one of these states, with the first ("1:r0=0; 1:r1=0;") indicating that
+both of P1()'s loads returned the value "0". As expected, given the
+"Never" on line 10, the state flagged by the "exists" clause is not
+listed. This full list of states can be helpful when debugging a new
+litmus test.
+
+The rest of the output is not normally needed, either due to irrelevance
+or due to being redundant with the lines discussed above. However, the
+following paragraph lists them for the benefit of readers possessed of
+an insatiable curiosity. Other readers should feel free to skip ahead.
+
+Line 1 echos the test name, along with the "Test" and "Allowed". Line 6's
+"No" says that the "exists" clause was not satisfied by any execution,
+and as such it has the same meaning as line 10's "Never". Line 7 is a
+lead-in to line 8's "Positive: 0 Negative: 3", which lists the number
+of end states satisfying and not satisfying the "exists" clause, just
+like the two numbers at the end of line 10. Line 9 repeats the "exists"
+clause so that you don't have to look it up in the litmus-test file.
+The number at the end of line 11 (which begins with "Time") gives the
+time in seconds required to analyze the litmus test. Small tests such
+as this one complete in a few milliseconds, so "0.00" is quite common.
+Line 12 gives a hash of the contents for the litmus-test file, and is used
+by tooling that manages litmus tests and their output. This tooling is
+used by people modifying LKMM itself, and among other things lets such
+people know which of the several thousand relevant litmus tests were
+affected by a given change to LKMM.
+
+
+Initialization
+--------------
+
+The previous example relied on the default zero initialization for
+"x" and "y", but a similar litmus test could instead initialize them
+to some other value:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {
+ 4 x=42;
+ 5 y=42;
+ 6 }
+ 7
+ 8 P0(int *x, int *y)
+ 9 {
+10 WRITE_ONCE(*x, 1);
+11 smp_store_release(y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16 int r0;
+17 int r1;
+18
+19 r0 = smp_load_acquire(y);
+20 r1 = READ_ONCE(*x);
+21 }
+22
+23 exists (1:r0=1 /\ 1:r1=42)
+
+Lines 3-6 now initialize both "x" and "y" to the value 42. This also
+means that the "exists" clause on line 23 must change "1:r1=0" to
+"1:r1=42".
+
+Running the test gives the same overall result as before, but with the
+value 42 appearing in place of the value zero:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=1; 1:r1=1;
+ 4 1:r0=42; 1:r1=1;
+ 5 1:r0=42; 1:r1=42;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=42)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.02
+12 Hash=ab9a9b7940a75a792266be279a980156
+
+It is tempting to avoid the open-coded repetitions of the value "42"
+by defining another global variable "initval=42" and replacing all
+occurrences of "42" with "initval". This will not, repeat *not*,
+initialize "x" and "y" to 42, but instead to the address of "initval"
+(try it!). See the section below on linked lists to learn more about
+why this approach to initialization can be useful.
+
+
+Control Structures
+------------------
+
+LKMM supports the C-language "if" statement, which allows modeling of
+conditional branches. In LKMM, conditional branches can affect ordering,
+but only if you are *very* careful (compilers are surprisingly able
+to optimize away conditional branches). The following example shows
+the "load buffering" (LB) use case that is used in the Linux kernel to
+synchronize between ring-buffer producers and consumers. In the example
+below, P0() is one side checking to see if an operation may proceed and
+P1() is the other side completing its update.
+
+ 1 C LB+fencembonceonce+ctrlonceonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r0;
+ 8
+ 9 r0 = READ_ONCE(*x);
+10 if (r0)
+11 WRITE_ONCE(*y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16 int r0;
+17
+18 r0 = READ_ONCE(*y);
+19 smp_mb();
+20 WRITE_ONCE(*x, 1);
+21 }
+22
+23 exists (0:r0=1 /\ 1:r0=1)
+
+P1()'s "if" statement on line 10 works as expected, so that line 11 is
+executed only if line 9 loads a non-zero value from "x". Because P1()'s
+write of "1" to "x" happens only after P1()'s read from "y", one would
+hope that the "exists" clause cannot be satisfied. LKMM agrees:
+
+ 1 Test LB+fencembonceonce+ctrlonceonce Allowed
+ 2 States 2
+ 3 0:r0=0; 1:r0=0;
+ 4 0:r0=1; 1:r0=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r0=1 /\ 1:r0=1)
+ 9 Observation LB+fencembonceonce+ctrlonceonce Never 0 2
+10 Time LB+fencembonceonce+ctrlonceonce 0.00
+11 Hash=e5260556f6de495fd39b556d1b831c3b
+
+However, there is no "while" statement due to the fact that full
+state-space search has some difficulty with iteration. However, there
+are tricks that may be used to handle some special cases, which are
+discussed below. In addition, loop-unrolling tricks may be applied,
+albeit sparingly.
+
+
+Tricks and Traps
+================
+
+This section covers extracting debug output from herd7, emulating
+spin loops, handling trivial linked lists, adding comments to litmus tests,
+emulating call_rcu(), and finally tricks to improve herd7 performance
+in order to better handle large litmus tests.
+
+
+Debug Output
+------------
+
+By default, the herd7 state output includes all variables mentioned
+in the "exists" clause. But sometimes debugging efforts are greatly
+aided by the values of other variables. Consider this litmus test
+(tools/memory-order/litmus-tests/SB+rfionceonce-poonceonces.litmus but
+slightly modified), which probes an obscure corner of hardware memory
+ordering:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r1;
+ 8 int r2;
+ 9
+10 WRITE_ONCE(*x, 1);
+11 r1 = READ_ONCE(*x);
+12 r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17 int r3;
+18 int r4;
+19
+20 WRITE_ONCE(*y, 1);
+21 r3 = READ_ONCE(*y);
+22 r4 = READ_ONCE(*x);
+23 }
+24
+25 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output is as follows:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r2=0; 1:r4=0;
+ 4 0:r2=0; 1:r4=1;
+ 5 0:r2=1; 1:r4=0;
+ 6 0:r2=1; 1:r4=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=c7f30fe0faebb7d565405d55b7318ada
+
+(This output indicates that CPUs are permitted to "snoop their own
+store buffers", which all of Linux's CPU families other than s390 will
+happily do. Such snooping results in disagreement among CPUs on the
+order of stores from different CPUs, which is rarely an issue.)
+
+But the herd7 output shows only the two variables mentioned in the
+"exists" clause. Someone modifying this test might wish to know the
+values of "x", "y", "0:r1", and "0:r3" as well. The "locations"
+statement on line 25 shows how to cause herd7 to display additional
+variables:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r1;
+ 8 int r2;
+ 9
+10 WRITE_ONCE(*x, 1);
+11 r1 = READ_ONCE(*x);
+12 r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17 int r3;
+18 int r4;
+19
+20 WRITE_ONCE(*y, 1);
+21 r3 = READ_ONCE(*y);
+22 r4 = READ_ONCE(*x);
+23 }
+24
+25 locations [0:r1; 1:r3; x; y]
+26 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output then displays the values of all the variables:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=0; x=1; y=1;
+ 4 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=1; x=1; y=1;
+ 5 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=0; x=1; y=1;
+ 6 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=1; x=1; y=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=40de8418c4b395388f6501cafd1ed38d
+
+What if you would like to know the value of a particular global variable
+at some particular point in a given process's execution? One approach
+is to use a READ_ONCE() to load that global variable into a new local
+variable, then add that local variable to the "locations" clause.
+But be careful: In some litmus tests, adding a READ_ONCE() will change
+the outcome! For one example, please see the C-READ_ONCE.litmus and
+C-READ_ONCE-omitted.litmus tests located here:
+
+ https://github.com/paulmckrcu/litmus/blob/master/manual/kernel/
+
+
+Spin Loops
+----------
+
+The analysis carried out by herd7 explores full state space, which is
+at best of exponential time complexity. Adding processes and increasing
+the amount of code in a give process can greatly increase execution time.
+Potentially infinite loops, such as those used to wait for locks to
+become available, are clearly problematic.
+
+Fortunately, it is possible to avoid state-space explosion by specially
+modeling such loops. For example, the following litmus tests emulates
+locking using xchg_acquire(), but instead of enclosing xchg_acquire()
+in a spin loop, it instead excludes executions that fail to acquire the
+lock using a herd7 "filter" clause. Note that for exclusive locking, you
+are better off using the spin_lock() and spin_unlock() that LKMM directly
+models, if for no other reason that these are much faster. However, the
+techniques illustrated in this section can be used for other purposes,
+such as emulating reader-writer locking, which LKMM does not yet model.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4 }
+ 5
+ 6 P0(int *sl, int *x0, int *x1)
+ 7 {
+ 8 int r2;
+ 9 int r1;
+10
+11 r2 = xchg_acquire(sl, 1);
+12 WRITE_ONCE(*x0, 1);
+13 r1 = READ_ONCE(*x1);
+14 smp_store_release(sl, 0);
+15 }
+16
+17 P1(int *sl, int *x0, int *x1)
+18 {
+19 int r2;
+20 int r1;
+21
+22 r2 = xchg_acquire(sl, 1);
+23 WRITE_ONCE(*x1, 1);
+24 r1 = READ_ONCE(*x0);
+25 smp_store_release(sl, 0);
+26 }
+27
+28 filter (0:r2=0 /\ 1:r2=0)
+29 exists (0:r1=0 /\ 1:r1=0)
+
+This litmus test may be found here:
+
+https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd/C-SB+l-o-o-u+l-o-o-u-X.litmus
+
+This test uses two global variables, "x1" and "x2", and also emulates a
+single global spinlock named "sl". This spinlock is held by whichever
+process changes the value of "sl" from "0" to "1", and is released when
+that process sets "sl" back to "0". P0()'s lock acquisition is emulated
+on line 11 using xchg_acquire(), which unconditionally stores the value
+"1" to "sl" and stores either "0" or "1" to "r2", depending on whether
+the lock acquisition was successful or unsuccessful (due to "sl" already
+having the value "1"), respectively. P1() operates in a similar manner.
+
+Rather unconventionally, execution appears to proceed to the critical
+section on lines 12 and 13 in either case. Line 14 then uses an
+smp_store_release() to store zero to "sl", thus emulating lock release.
+
+The case where xchg_acquire() fails to acquire the lock is handled by
+the "filter" clause on line 28, which tells herd7 to keep only those
+executions in which both "0:r2" and "1:r2" are zero, that is to pay
+attention only to those executions in which both locks are actually
+acquired. Thus, the bogus executions that would execute the critical
+sections are discarded and any effects that they might have had are
+ignored. Note well that the "filter" clause keeps those executions
+for which its expression is satisfied, that is, for which the expression
+evaluates to true. In other words, the "filter" clause says what to
+keep, not what to discard.
+
+The result of running this test is as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 2
+ 3 0:r1=0; 1:r1=1;
+ 4 0:r1=1; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r1=0 /\ 1:r1=0)
+ 9 Observation C-SB+l-o-o-u+l-o-o-u-X Never 0 2
+10 Time C-SB+l-o-o-u+l-o-o-u-X 0.03
+
+The "Never" on line 9 indicates that this use of xchg_acquire() and
+smp_store_release() really does correctly emulate locking.
+
+Why doesn't the litmus test take the simpler approach of using a spin loop
+to handle failed spinlock acquisitions, like the kernel does? The key
+insight behind this litmus test is that spin loops have no effect on the
+possible "exists"-clause outcomes of program execution in the absence
+of deadlock. In other words, given a high-quality lock-acquisition
+primitive in a deadlock-free program running on high-quality hardware,
+each lock acquisition will eventually succeed. Because herd7 already
+explores the full state space, the length of time required to actually
+acquire the lock does not matter. After all, herd7 already models all
+possible durations of the xchg_acquire() statements.
+
+Why not just add the "filter" clause to the "exists" clause, thus
+avoiding the "filter" clause entirely? This does work, but is slower.
+The reason that the "filter" clause is faster is that (in the common case)
+herd7 knows to abandon an execution as soon as the "filter" expression
+fails to be satisfied. In contrast, the "exists" clause is evaluated
+only at the end of time, thus requiring herd7 to waste time on bogus
+executions in which both critical sections proceed concurrently. In
+addition, some LKMM users like the separation of concerns provided by
+using the both the "filter" and "exists" clauses.
+
+Readers lacking a pathological interest in odd corner cases should feel
+free to skip the remainder of this section.
+
+But what if the litmus test were to temporarily set "0:r2" to a non-zero
+value? Wouldn't that cause herd7 to abandon the execution prematurely
+due to an early mismatch of the "filter" clause?
+
+Why not just try it? Line 4 of the following modified litmus test
+introduces a new global variable "x2" that is initialized to "1". Line 23
+of P1() reads that variable into "1:r2" to force an early mismatch with
+the "filter" clause. Line 24 does a known-true "if" condition to avoid
+and static analysis that herd7 might do. Finally the "exists" clause
+on line 32 is updated to a condition that is alway satisfied at the end
+of the test.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4 x2=1;
+ 5 }
+ 6
+ 7 P0(int *sl, int *x0, int *x1)
+ 8 {
+ 9 int r2;
+10 int r1;
+11
+12 r2 = xchg_acquire(sl, 1);
+13 WRITE_ONCE(*x0, 1);
+14 r1 = READ_ONCE(*x1);
+15 smp_store_release(sl, 0);
+16 }
+17
+18 P1(int *sl, int *x0, int *x1, int *x2)
+19 {
+20 int r2;
+21 int r1;
+22
+23 r2 = READ_ONCE(*x2);
+24 if (r2)
+25 r2 = xchg_acquire(sl, 1);
+26 WRITE_ONCE(*x1, 1);
+27 r1 = READ_ONCE(*x0);
+28 smp_store_release(sl, 0);
+29 }
+30
+31 filter (0:r2=0 /\ 1:r2=0)
+32 exists (x1=1)
+
+If the "filter" clause were to check each variable at each point in the
+execution, running this litmus test would display no executions because
+all executions would be filtered out at line 23. However, the output
+is instead as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 1
+ 3 x1=1;
+ 4 Ok
+ 5 Witnesses
+ 6 Positive: 2 Negative: 0
+ 7 Condition exists (x1=1)
+ 8 Observation C-SB+l-o-o-u+l-o-o-u-X Always 2 0
+ 9 Time C-SB+l-o-o-u+l-o-o-u-X 0.04
+10 Hash=080bc508da7f291e122c6de76c0088e3
+
+Line 3 shows that there is one execution that did not get filtered out,
+so the "filter" clause is evaluated only on the last assignment to
+the variables that it checks. In this case, the "filter" clause is a
+disjunction, so it might be evaluated twice, once at the final (and only)
+assignment to "0:r2" and once at the final assignment to "1:r2".
+
+
+Linked Lists
+------------
+
+LKMM can handle linked lists, but only linked lists in which each node
+contains nothing except a pointer to the next node in the list. This is
+of course quite restrictive, but there is nevertheless quite a bit that
+can be done within these confines, as can be seen in the litmus test
+at tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus:
+
+ 1 C MP+onceassign+derefonce
+ 2
+ 3 {
+ 4 y=z;
+ 5 z=0;
+ 6 }
+ 7
+ 8 P0(int *x, int **y)
+ 9 {
+10 WRITE_ONCE(*x, 1);
+11 rcu_assign_pointer(*y, x);
+12 }
+13
+14 P1(int *x, int **y)
+15 {
+16 int *r0;
+17 int r1;
+18
+19 rcu_read_lock();
+20 r0 = rcu_dereference(*y);
+21 r1 = READ_ONCE(*r0);
+22 rcu_read_unlock();
+23 }
+24
+25 exists (1:r0=x /\ 1:r1=0)
+
+Line 4's "y=z" may seem odd, given that "z" has not yet been initialized.
+But "y=z" does not set the value of "y" to that of "z", but instead
+sets the value of "y" to the *address* of "z". Lines 4 and 5 therefore
+create a simple linked list, with "y" pointing to "z" and "z" having a
+NULL pointer. A much longer linked list could be created if desired,
+and circular singly linked lists can also be created and manipulated.
+
+The "exists" clause works the same way, with the "1:r0=x" comparing P1()'s
+"r0" not to the value of "x", but again to its address. This term of the
+"exists" clause therefore tests whether line 20's load from "y" saw the
+value stored by line 11, which is in fact what is required in this case.
+
+P0()'s line 10 initializes "x" to the value 1 then line 11 links to "x"
+from "y", replacing "z".
+
+P1()'s line 20 loads a pointer from "y", and line 21 dereferences that
+pointer. The RCU read-side critical section spanning lines 19-22 is just
+for show in this example. Note that the address used for line 21's load
+depends on (in this case, "is exactly the same as") the value loaded by
+line 20. This is an example of what is called an "address dependency".
+This particular address dependency extends from the load on line 20 to the
+load on line 21. Address dependencies provide a weak form of ordering.
+
+Running this test results in the following:
+
+ 1 Test MP+onceassign+derefonce Allowed
+ 2 States 2
+ 3 1:r0=x; 1:r1=1;
+ 4 1:r0=z; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (1:r0=x /\ 1:r1=0)
+ 9 Observation MP+onceassign+derefonce Never 0 2
+10 Time MP+onceassign+derefonce 0.00
+11 Hash=49ef7a741563570102448a256a0c8568
+
+The only possible outcomes feature P1() loading a pointer to "z"
+(which contains zero) on the one hand and P1() loading a pointer to "x"
+(which contains the value one) on the other. This should be reassuring
+because it says that RCU readers cannot see the old preinitialization
+values when accessing a newly inserted list node. This undesirable
+scenario is flagged by the "exists" clause, and would occur if P1()
+loaded a pointer to "x", but obtained the pre-initialization value of
+zero after dereferencing that pointer.
+
+
+Comments
+--------
+
+Different portions of a litmus test are processed by different parsers,
+which has the charming effect of requiring different comment syntax in
+different portions of the litmus test. The C-syntax portions use
+C-language comments (either "/* */" or "//"), while the other portions
+use Ocaml comments "(* *)".
+
+The following litmus test illustrates the comment style corresponding
+to each syntactic unit of the test:
+
+ 1 C MP+onceassign+derefonce (* A *)
+ 2
+ 3 (* B *)
+ 4
+ 5 {
+ 6 y=z; (* C *)
+ 7 z=0;
+ 8 } // D
+ 9
+10 // E
+11
+12 P0(int *x, int **y) // F
+13 {
+14 WRITE_ONCE(*x, 1); // G
+15 rcu_assign_pointer(*y, x);
+16 }
+17
+18 // H
+19
+20 P1(int *x, int **y)
+21 {
+22 int *r0;
+23 int r1;
+24
+25 rcu_read_lock();
+26 r0 = rcu_dereference(*y);
+27 r1 = READ_ONCE(*r0);
+28 rcu_read_unlock();
+29 }
+30
+31 // I
+32
+33 exists (* J *) (1:r0=x /\ (* K *) 1:r1=0) (* L *)
+
+In short, use C-language comments in the C code and Ocaml comments in
+the rest of the litmus test.
+
+On the other hand, if you prefer C-style comments everywhere, the
+C preprocessor is your friend.
+
+
+Asynchronous RCU Grace Periods
+------------------------------
+
+The following litmus test is derived from the example show in
+Documentation/litmus-tests/rcu/RCU+sync+free.litmus, but converted to
+emulate call_rcu():
+
+ 1 C RCU+sync+free
+ 2
+ 3 {
+ 4 int x = 1;
+ 5 int *y = &x;
+ 6 int z = 1;
+ 7 }
+ 8
+ 9 P0(int *x, int *z, int **y)
+10 {
+11 int *r0;
+12 int r1;
+13
+14 rcu_read_lock();
+15 r0 = rcu_dereference(*y);
+16 r1 = READ_ONCE(*r0);
+17 rcu_read_unlock();
+18 }
+19
+20 P1(int *z, int **y, int *c)
+21 {
+22 rcu_assign_pointer(*y, z);
+23 smp_store_release(*c, 1); // Emulate call_rcu().
+24 }
+25
+26 P2(int *x, int *z, int **y, int *c)
+27 {
+28 int r0;
+29
+30 r0 = smp_load_acquire(*c); // Note call_rcu() request.
+31 synchronize_rcu(); // Wait one grace period.
+32 WRITE_ONCE(*x, 0); // Emulate the RCU callback.
+33 }
+34
+35 filter (2:r0=1) (* Reject too-early starts. *)
+36 exists (0:r0=x /\ 0:r1=0)
+
+Lines 4-6 initialize a linked list headed by "y" that initially contains
+"x". In addition, "z" is pre-initialized to prepare for P1(), which
+will replace "x" with "z" in this list.
+
+P0() on lines 9-18 enters an RCU read-side critical section, loads the
+list header "y" and dereferences it, leaving the node in "0:r0" and
+the node's value in "0:r1".
+
+P1() on lines 20-24 updates the list header to instead reference "z",
+then emulates call_rcu() by doing a release store into "c".
+
+P2() on lines 27-33 emulates the behind-the-scenes effect of doing a
+call_rcu(). Line 30 first does an acquire load from "c", then line 31
+waits for an RCU grace period to elapse, and finally line 32 emulates
+the RCU callback, which in turn emulates a call to kfree().
+
+Of course, it is possible for P2() to start too soon, so that the
+value of "2:r0" is zero rather than the required value of "1".
+The "filter" clause on line 35 handles this possibility, rejecting
+all executions in which "2:r0" is not equal to the value "1".
+
+
+Performance
+-----------
+
+LKMM's exploration of the full state-space can be extremely helpful,
+but it does not come for free. The price is exponential computational
+complexity in terms of the number of processes, the average number
+of statements in each process, and the total number of stores in the
+litmus test.
+
+So it is best to start small and then work up. Where possible, break
+your code down into small pieces each representing a core concurrency
+requirement.
+
+That said, herd7 is quite fast. On an unprepossessing x86 laptop, it
+was able to analyze the following 10-process RCU litmus test in about
+six seconds.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R.litmus
+
+One way to make herd7 run faster is to use the "-speedcheck true" option.
+This option prevents herd7 from generating all possible end states,
+instead causing it to focus solely on whether or not the "exists"
+clause can be satisfied. With this option, herd7 evaluates the above
+litmus test in about 300 milliseconds, for more than an order of magnitude
+improvement in performance.
+
+Larger 16-process litmus tests that would normally consume 15 minutes
+of time complete in about 40 seconds with this option. To be fair,
+you do get an extra 65,535 states when you leave off the "-speedcheck
+true" option.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R.litmus
+
+Nevertheless, litmus-test analysis really is of exponential complexity,
+whether with or without "-speedcheck true". Increasing by just three
+processes to a 19-process litmus test requires 2 hours and 40 minutes
+without, and about 8 minutes with "-speedcheck true". Each of these
+results represent roughly an order of magnitude slowdown compared to the
+16-process litmus test. Again, to be fair, the multi-hour run explores
+no fewer than 524,287 additional states compared to the shorter one.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R.litmus
+
+If you don't like command-line arguments, you can obtain a similar speedup
+by adding a "filter" clause with exactly the same expression as your
+"exists" clause.
+
+However, please note that seeing the full set of states can be extremely
+helpful when developing and debugging litmus tests.
+
+
+LIMITATIONS
+===========
+
+Limitations of the Linux-kernel memory model (LKMM) include:
+
+1. Compiler optimizations are not accurately modeled. Of course,
+ the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
+ ability to optimize, but under some circumstances it is possible
+ for the compiler to undermine the memory model. For more
+ information, see Documentation/explanation.txt (in particular,
+ the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
+ sections).
+
+ Note that this limitation in turn limits LKMM's ability to
+ accurately model address, control, and data dependencies.
+ For example, if the compiler can deduce the value of some variable
+ carrying a dependency, then the compiler can break that dependency
+ by substituting a constant of that value.
+
+2. Multiple access sizes for a single variable are not supported,
+ and neither are misaligned or partially overlapping accesses.
+
+3. Exceptions and interrupts are not modeled. In some cases,
+ this limitation can be overcome by modeling the interrupt or
+ exception with an additional process.
+
+4. I/O such as MMIO or DMA is not supported.
+
+5. Self-modifying code (such as that found in the kernel's
+ alternatives mechanism, function tracer, Berkeley Packet Filter
+ JIT compiler, and module loader) is not supported.
+
+6. Complete modeling of all variants of atomic read-modify-write
+ operations, locking primitives, and RCU is not provided.
+ For example, call_rcu() and rcu_barrier() are not supported.
+ However, a substantial amount of support is provided for these
+ operations, as shown in the linux-kernel.def file.
+
+ Here are specific limitations:
+
+ a. When rcu_assign_pointer() is passed NULL, the Linux
+ kernel provides no ordering, but LKMM models this
+ case as a store release.
+
+ b. The "unless" RMW operations are not currently modeled:
+ atomic_long_add_unless(), atomic_inc_unless_negative(),
+ and atomic_dec_unless_positive(). These can be emulated
+ in litmus tests, for example, by using atomic_cmpxchg().
+
+ One exception of this limitation is atomic_add_unless(),
+ which is provided directly by herd7 (so no corresponding
+ definition in linux-kernel.def). atomic_add_unless() is
+ modeled by herd7 therefore it can be used in litmus tests.
+
+ c. The call_rcu() function is not modeled. As was shown above,
+ it can be emulated in litmus tests by adding another
+ process that invokes synchronize_rcu() and the body of the
+ callback function, with (for example) a release-acquire
+ from the site of the emulated call_rcu() to the beginning
+ of the additional process.
+
+ d. The rcu_barrier() function is not modeled. It can be
+ emulated in litmus tests emulating call_rcu() via
+ (for example) a release-acquire from the end of each
+ additional call_rcu() process to the site of the
+ emulated rcu-barrier().
+
+ e. Although sleepable RCU (SRCU) is now modeled, there
+ are some subtle differences between its semantics and
+ those in the Linux kernel. For example, the kernel
+ might interpret the following sequence as two partially
+ overlapping SRCU read-side critical sections:
+
+ 1 r1 = srcu_read_lock(&my_srcu);
+ 2 do_something_1();
+ 3 r2 = srcu_read_lock(&my_srcu);
+ 4 do_something_2();
+ 5 srcu_read_unlock(&my_srcu, r1);
+ 6 do_something_3();
+ 7 srcu_read_unlock(&my_srcu, r2);
+
+ In contrast, LKMM will interpret this as a nested pair of
+ SRCU read-side critical sections, with the outer critical
+ section spanning lines 1-7 and the inner critical section
+ spanning lines 3-5.
+
+ This difference would be more of a concern had anyone
+ identified a reasonable use case for partially overlapping
+ SRCU read-side critical sections. For more information
+ on the trickiness of such overlapping, please see:
+ https://paulmck.livejournal.com/40593.html
+
+ f. Reader-writer locking is not modeled. It can be
+ emulated in litmus tests using atomic read-modify-write
+ operations.
+
+The fragment of the C language supported by these litmus tests is quite
+limited and in some ways non-standard:
+
+1. There is no automatic C-preprocessor pass. You can of course
+ run it manually, if you choose.
+
+2. There is no way to create functions other than the Pn() functions
+ that model the concurrent processes.
+
+3. The Pn() functions' formal parameters must be pointers to the
+ global shared variables. Nothing can be passed by value into
+ these functions.
+
+4. The only functions that can be invoked are those built directly
+ into herd7 or that are defined in the linux-kernel.def file.
+
+5. The "switch", "do", "for", "while", and "goto" C statements are
+ not supported. The "switch" statement can be emulated by the
+ "if" statement. The "do", "for", and "while" statements can
+ often be emulated by manually unrolling the loop, or perhaps by
+ enlisting the aid of the C preprocessor to minimize the resulting
+ code duplication. Some uses of "goto" can be emulated by "if",
+ and some others by unrolling.
+
+6. Although you can use a wide variety of types in litmus-test
+ variable declarations, and especially in global-variable
+ declarations, the "herd7" tool understands only int and
+ pointer types. There is no support for floating-point types,
+ enumerations, characters, strings, arrays, or structures.
+
+7. Parsing of variable declarations is very loose, with almost no
+ type checking.
+
+8. Initializers differ from their C-language counterparts.
+ For example, when an initializer contains the name of a shared
+ variable, that name denotes a pointer to that variable, not
+ the current value of that variable. For example, "int x = y"
+ is interpreted the way "int x = &y" would be in C.
+
+9. Dynamic memory allocation is not supported, although this can
+ be worked around in some cases by supplying multiple statically
+ allocated variables.
+
+Some of these limitations may be overcome in the future, but others are
+more likely to be addressed by incorporating the Linux-kernel memory model
+into other tools.
+
+Finally, please note that LKMM is subject to change as hardware, use cases,
+and compilers evolve.
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index 63c4adfed884..03f58b11c252 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -1,7 +1,7 @@
This document provides "recipes", that is, litmus tests for commonly
occurring situations, as well as a few that illustrate subtly broken but
attractive nuisances. Many of these recipes include example code from
-v4.13 of the Linux kernel.
+v5.7 of the Linux kernel.
The first section covers simple special cases, the second section
takes off the training wheels to cover more involved examples,
@@ -278,7 +278,7 @@ is present if the value loaded determines the address of a later access
first place (control dependency). Note that the term "data dependency"
is sometimes casually used to cover both address and data dependencies.
-In lib/prime_numbers.c, the expand_to_next_prime() function invokes
+In lib/math/prime_numbers.c, the expand_to_next_prime() function invokes
rcu_assign_pointer(), and the next_prime_number() function invokes
rcu_dereference(). This combination mediates access to a bit vector
that is expanded as additional primes are needed.
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index ecbbaa5396d4..c5fdfd19df24 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -120,7 +120,7 @@ o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding
o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and
semantics of the weak consistency model specification language
- cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531
+ cat". CoRR abs/1608.07531 (2016). https://arxiv.org/abs/1608.07531
Memory-model comparisons
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
new file mode 100644
index 000000000000..81e1a0ec5342
--- /dev/null
+++ b/tools/memory-model/Documentation/simple.txt
@@ -0,0 +1,271 @@
+This document provides options for those wishing to keep their
+memory-ordering lives simple, as is necessary for those whose domain
+is complex. After all, there are bugs other than memory-ordering bugs,
+and the time spent gaining memory-ordering knowledge is not available
+for gaining domain knowledge. Furthermore Linux-kernel memory model
+(LKMM) is quite complex, with subtle differences in code often having
+dramatic effects on correctness.
+
+The options near the beginning of this list are quite simple. The idea
+is not that kernel hackers don't already know about them, but rather
+that they might need the occasional reminder.
+
+Please note that this is a generic guide, and that specific subsystems
+will often have special requirements or idioms. For example, developers
+of MMIO-based device drivers will often need to use mb(), rmb(), and
+wmb(), and therefore might find smp_mb(), smp_rmb(), and smp_wmb()
+to be more natural than smp_load_acquire() and smp_store_release().
+On the other hand, those coming in from other environments will likely
+be more familiar with these last two.
+
+
+Single-threaded code
+====================
+
+In single-threaded code, there is no reordering, at least assuming
+that your toolchain and hardware are working correctly. In addition,
+it is generally a mistake to assume your code will only run in a single
+threaded context as the kernel can enter the same code path on multiple
+CPUs at the same time. One important exception is a function that makes
+no external data references.
+
+In the general case, you will need to take explicit steps to ensure that
+your code really is executed within a single thread that does not access
+shared variables. A simple way to achieve this is to define a global lock
+that you acquire at the beginning of your code and release at the end,
+taking care to ensure that all references to your code's shared data are
+also carried out under that same lock. Because only one thread can hold
+this lock at a given time, your code will be executed single-threaded.
+This approach is called "code locking".
+
+Code locking can severely limit both performance and scalability, so it
+should be used with caution, and only on code paths that execute rarely.
+After all, a huge amount of effort was required to remove the Linux
+kernel's old "Big Kernel Lock", so let's please be very careful about
+adding new "little kernel locks".
+
+One of the advantages of locking is that, in happy contrast with the
+year 1981, almost all kernel developers are very familiar with locking.
+The Linux kernel's lockdep (CONFIG_PROVE_LOCKING=y) is very helpful with
+the formerly feared deadlock scenarios.
+
+Please use the standard locking primitives provided by the kernel rather
+than rolling your own. For one thing, the standard primitives interact
+properly with lockdep. For another thing, these primitives have been
+tuned to deal better with high contention. And for one final thing, it is
+surprisingly hard to correctly code production-quality lock acquisition
+and release functions. After all, even simple non-production-quality
+locking functions must carefully prevent both the CPU and the compiler
+from moving code in either direction across the locking function.
+
+Despite the scalability limitations of single-threaded code, RCU
+takes this approach for much of its grace-period processing and also
+for early-boot operation. The reason RCU is able to scale despite
+single-threaded grace-period processing is use of batching, where all
+updates that accumulated during one grace period are handled by the
+next one. In other words, slowing down grace-period processing makes
+it more efficient. Nor is RCU unique: Similar batching optimizations
+are used in many I/O operations.
+
+
+Packaged code
+=============
+
+Even if performance and scalability concerns prevent your code from
+being completely single-threaded, it is often possible to use library
+functions that handle the concurrency nearly or entirely on their own.
+This approach delegates any LKMM worries to the library maintainer.
+
+In the kernel, what is the "library"? Quite a bit. It includes the
+contents of the lib/ directory, much of the include/linux/ directory along
+with a lot of other heavily used APIs. But heavily used examples include
+the list macros (for example, include/linux/{,rcu}list.h), workqueues,
+smp_call_function(), and the various hash tables and search trees.
+
+
+Data locking
+============
+
+With code locking, we use single-threaded code execution to guarantee
+serialized access to the data that the code is accessing. However,
+we can also achieve this by instead associating the lock with specific
+instances of the data structures. This creates a "critical section"
+in the code execution that will execute as though it is single threaded.
+By placing all the accesses and modifications to a shared data structure
+inside a critical section, we ensure that the execution context that
+holds the lock has exclusive access to the shared data.
+
+The poster boy for this approach is the hash table, where placing a lock
+in each hash bucket allows operations on different buckets to proceed
+concurrently. This works because the buckets do not overlap with each
+other, so that an operation on one bucket does not interfere with any
+other bucket.
+
+As the number of buckets increases, data locking scales naturally.
+In particular, if the amount of data increases with the number of CPUs,
+increasing the number of buckets as the number of CPUs increase results
+in a naturally scalable data structure.
+
+
+Per-CPU processing
+==================
+
+Partitioning processing and data over CPUs allows each CPU to take
+a single-threaded approach while providing excellent performance and
+scalability. Of course, there is no free lunch: The dark side of this
+excellence is substantially increased memory footprint.
+
+In addition, it is sometimes necessary to occasionally update some global
+view of this processing and data, in which case something like locking
+must be used to protect this global view. This is the approach taken
+by the percpu_counter infrastructure. In many cases, there are already
+generic/library variants of commonly used per-cpu constructs available.
+Please use them rather than rolling your own.
+
+RCU uses DEFINE_PER_CPU*() declaration to create a number of per-CPU
+data sets. For example, each CPU does private quiescent-state processing
+within its instance of the per-CPU rcu_data structure, and then uses data
+locking to report quiescent states up the grace-period combining tree.
+
+
+Packaged primitives: Sequence locking
+=====================================
+
+Lockless programming is considered by many to be more difficult than
+lock-based programming, but there are a few lockless design patterns that
+have been built out into an API. One of these APIs is sequence locking.
+Although this APIs can be used in extremely complex ways, there are simple
+and effective ways of using it that avoid the need to pay attention to
+memory ordering.
+
+The basic keep-things-simple rule for sequence locking is "do not write
+in read-side code". Yes, you can do writes from within sequence-locking
+readers, but it won't be so simple. For example, such writes will be
+lockless and should be idempotent.
+
+For more sophisticated use cases, LKMM can guide you, including use
+cases involving combining sequence locking with other synchronization
+primitives. (LKMM does not yet know about sequence locking, so it is
+currently necessary to open-code it in your litmus tests.)
+
+Additional information may be found in include/linux/seqlock.h.
+
+Packaged primitives: RCU
+========================
+
+Another lockless design pattern that has been baked into an API
+is RCU. The Linux kernel makes sophisticated use of RCU, but the
+keep-things-simple rules for RCU are "do not write in read-side code"
+and "do not update anything that is visible to and accessed by readers",
+and "protect updates with locking".
+
+These rules are illustrated by the functions foo_update_a() and
+foo_get_a() shown in Documentation/RCU/whatisRCU.rst. Additional
+RCU usage patterns maybe found in Documentation/RCU and in the
+source code.
+
+
+Packaged primitives: Atomic operations
+======================================
+
+Back in the day, the Linux kernel had three types of atomic operations:
+
+1. Initialization and read-out, such as atomic_set() and atomic_read().
+
+2. Operations that did not return a value and provided no ordering,
+ such as atomic_inc() and atomic_dec().
+
+3. Operations that returned a value and provided full ordering, such as
+ atomic_add_return() and atomic_dec_and_test(). Note that some
+ value-returning operations provide full ordering only conditionally.
+ For example, cmpxchg() provides ordering only upon success.
+
+More recent kernels have operations that return a value but do not
+provide full ordering. These are flagged with either a _relaxed()
+suffix (providing no ordering), or an _acquire() or _release() suffix
+(providing limited ordering).
+
+Additional information may be found in these files:
+
+Documentation/atomic_t.txt
+Documentation/atomic_bitops.txt
+Documentation/core-api/atomic_ops.rst
+Documentation/core-api/refcount-vs-atomic.rst
+
+Reading code using these primitives is often also quite helpful.
+
+
+Lockless, fully ordered
+=======================
+
+When using locking, there often comes a time when it is necessary
+to access some variable or another without holding the data lock
+that serializes access to that variable.
+
+If you want to keep things simple, use the initialization and read-out
+operations from the previous section only when there are no racing
+accesses. Otherwise, use only fully ordered operations when accessing
+or modifying the variable. This approach guarantees that code prior
+to a given access to that variable will be seen by all CPUs has having
+happened before any code following any later access to that same variable.
+
+Please note that per-CPU functions are not atomic operations and
+hence they do not provide any ordering guarantees at all.
+
+If the lockless accesses are frequently executed reads that are used
+only for heuristics, or if they are frequently executed writes that
+are used only for statistics, please see the next section.
+
+
+Lockless statistics and heuristics
+==================================
+
+Unordered primitives such as atomic_read(), atomic_set(), READ_ONCE(), and
+WRITE_ONCE() can safely be used in some cases. These primitives provide
+no ordering, but they do prevent the compiler from carrying out a number
+of destructive optimizations (for which please see the next section).
+One example use for these primitives is statistics, such as per-CPU
+counters exemplified by the rt_cache_stat structure's routing-cache
+statistics counters. Another example use case is heuristics, such as
+the jiffies_till_first_fqs and jiffies_till_next_fqs kernel parameters
+controlling how often RCU scans for idle CPUs.
+
+But be careful. "Unordered" really does mean "unordered". It is all
+too easy to assume ordering, and this assumption must be avoided when
+using these primitives.
+
+
+Don't let the compiler trip you up
+==================================
+
+It can be quite tempting to use plain C-language accesses for lockless
+loads from and stores to shared variables. Although this is both
+possible and quite common in the Linux kernel, it does require a
+surprising amount of analysis, care, and knowledge about the compiler.
+Yes, some decades ago it was not unfair to consider a C compiler to be
+an assembler with added syntax and better portability, but the advent of
+sophisticated optimizing compilers mean that those days are long gone.
+Today's optimizing compilers can profoundly rewrite your code during the
+translation process, and have long been ready, willing, and able to do so.
+
+Therefore, if you really need to use C-language assignments instead of
+READ_ONCE(), WRITE_ONCE(), and so on, you will need to have a very good
+understanding of both the C standard and your compiler. Here are some
+introductory references and some tooling to start you on this noble quest:
+
+Who's afraid of a big bad optimizing compiler?
+ https://lwn.net/Articles/793253/
+Calibrating your fear of big bad optimizing compilers
+ https://lwn.net/Articles/799218/
+Concurrency bugs should fear the big bad data-race detector (part 1)
+ https://lwn.net/Articles/816850/
+Concurrency bugs should fear the big bad data-race detector (part 2)
+ https://lwn.net/Articles/816854/
+
+
+More complex use cases
+======================
+
+If the alternatives above do not do what you need, please look at the
+recipes-pairs.txt file to peel off the next layer of the memory-ordering
+onion.
diff --git a/tools/memory-model/README b/tools/memory-model/README
index ecb7385376bf..c8144d4aafa0 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -63,10 +63,32 @@ BASIC USAGE: HERD7
==================
The memory model is used, in conjunction with "herd7", to exhaustively
-explore the state space of small litmus tests.
+explore the state space of small litmus tests. Documentation describing
+the format, features, capabilities and limitations of these litmus
+tests is available in tools/memory-model/Documentation/litmus-tests.txt.
-For example, to run SB+fencembonceonces.litmus against the memory model:
+Example litmus tests may be found in the Linux-kernel source tree:
+ tools/memory-model/litmus-tests/
+ Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available here:
+
+ https://github.com/paulmckrcu/litmus
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+
+Documentation describing litmus tests and now to use them may be found
+here:
+
+ tools/memory-model/Documentation/litmus-tests.txt
+
+The remainder of this section uses the SB+fencembonceonces.litmus test
+located in the tools/memory-model directory.
+
+To run SB+fencembonceonces.litmus against the memory model:
+
+ $ cd $LINUX_SOURCE_TREE/tools/memory-model
$ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus
Here is the corresponding output:
@@ -87,7 +109,11 @@ Here is the corresponding output:
The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
this litmus test's "exists" clause can not be satisfied.
-See "herd7 -help" or "herdtools7/doc/" for more information.
+See "herd7 -help" or "herdtools7/doc/" for more information on running the
+tool itself, but please be aware that this documentation is intended for
+people who work on the memory model itself, that is, people making changes
+to the tools/memory-model/linux-kernel.* files. It is not intended for
+people focusing on writing, understanding, and running LKMM litmus tests.
=====================
@@ -124,7 +150,11 @@ that during two million trials, the state specified in this litmus
test's "exists" clause was not reached.
And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/"
-for more information.
+for more information. And again, please be aware that this documentation
+is intended for people who work on the memory model itself, that is,
+people making changes to the tools/memory-model/linux-kernel.* files.
+It is not intended for people focusing on writing, understanding, and
+running LKMM litmus tests.
====================
@@ -137,12 +167,21 @@ Documentation/cheatsheet.txt
Documentation/explanation.txt
Describes the memory model in detail.
+Documentation/litmus-tests.txt
+ Describes the format, features, capabilities, and limitations
+ of the litmus tests that LKMM can evaluate.
+
Documentation/recipes.txt
Lists common memory-ordering patterns.
Documentation/references.txt
Provides background reading.
+Documentation/simple.txt
+ Starting point for someone new to Linux-kernel concurrency.
+ And also for those needing a reminder of the simpler approaches
+ to concurrency!
+
linux-kernel.bell
Categorizes the relevant instructions, including memory
references, memory barriers, atomic read-modify-write operations,
@@ -187,116 +226,3 @@ README
This file.
scripts Various scripts, see scripts/README.
-
-
-===========
-LIMITATIONS
-===========
-
-The Linux-kernel memory model (LKMM) has the following limitations:
-
-1. Compiler optimizations are not accurately modeled. Of course,
- the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
- ability to optimize, but under some circumstances it is possible
- for the compiler to undermine the memory model. For more
- information, see Documentation/explanation.txt (in particular,
- the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
- sections).
-
- Note that this limitation in turn limits LKMM's ability to
- accurately model address, control, and data dependencies.
- For example, if the compiler can deduce the value of some variable
- carrying a dependency, then the compiler can break that dependency
- by substituting a constant of that value.
-
-2. Multiple access sizes for a single variable are not supported,
- and neither are misaligned or partially overlapping accesses.
-
-3. Exceptions and interrupts are not modeled. In some cases,
- this limitation can be overcome by modeling the interrupt or
- exception with an additional process.
-
-4. I/O such as MMIO or DMA is not supported.
-
-5. Self-modifying code (such as that found in the kernel's
- alternatives mechanism, function tracer, Berkeley Packet Filter
- JIT compiler, and module loader) is not supported.
-
-6. Complete modeling of all variants of atomic read-modify-write
- operations, locking primitives, and RCU is not provided.
- For example, call_rcu() and rcu_barrier() are not supported.
- However, a substantial amount of support is provided for these
- operations, as shown in the linux-kernel.def file.
-
- a. When rcu_assign_pointer() is passed NULL, the Linux
- kernel provides no ordering, but LKMM models this
- case as a store release.
-
- b. The "unless" RMW operations are not currently modeled:
- atomic_long_add_unless(), atomic_inc_unless_negative(),
- and atomic_dec_unless_positive(). These can be emulated
- in litmus tests, for example, by using atomic_cmpxchg().
-
- One exception of this limitation is atomic_add_unless(),
- which is provided directly by herd7 (so no corresponding
- definition in linux-kernel.def). atomic_add_unless() is
- modeled by herd7 therefore it can be used in litmus tests.
-
- c. The call_rcu() function is not modeled. It can be
- emulated in litmus tests by adding another process that
- invokes synchronize_rcu() and the body of the callback
- function, with (for example) a release-acquire from
- the site of the emulated call_rcu() to the beginning
- of the additional process.
-
- d. The rcu_barrier() function is not modeled. It can be
- emulated in litmus tests emulating call_rcu() via
- (for example) a release-acquire from the end of each
- additional call_rcu() process to the site of the
- emulated rcu-barrier().
-
- e. Although sleepable RCU (SRCU) is now modeled, there
- are some subtle differences between its semantics and
- those in the Linux kernel. For example, the kernel
- might interpret the following sequence as two partially
- overlapping SRCU read-side critical sections:
-
- 1 r1 = srcu_read_lock(&my_srcu);
- 2 do_something_1();
- 3 r2 = srcu_read_lock(&my_srcu);
- 4 do_something_2();
- 5 srcu_read_unlock(&my_srcu, r1);
- 6 do_something_3();
- 7 srcu_read_unlock(&my_srcu, r2);
-
- In contrast, LKMM will interpret this as a nested pair of
- SRCU read-side critical sections, with the outer critical
- section spanning lines 1-7 and the inner critical section
- spanning lines 3-5.
-
- This difference would be more of a concern had anyone
- identified a reasonable use case for partially overlapping
- SRCU read-side critical sections. For more information,
- please see: https://paulmck.livejournal.com/40593.html
-
- f. Reader-writer locking is not modeled. It can be
- emulated in litmus tests using atomic read-modify-write
- operations.
-
-The "herd7" tool has some additional limitations of its own, apart from
-the memory model:
-
-1. Non-trivial data structures such as arrays or structures are
- not supported. However, pointers are supported, allowing trivial
- linked lists to be constructed.
-
-2. Dynamic memory allocation is not supported, although this can
- be worked around in some cases by supplying multiple statically
- allocated variables.
-
-Some of these limitations may be overcome in the future, but others are
-more likely to be addressed by incorporating the Linux-kernel memory model
-into other tools.
-
-Finally, please note that LKMM is subject to change as hardware, use cases,
-and compilers evolve.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 7770edcda3a0..4ea9a833dde7 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -37,7 +37,7 @@ INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
-I$(srctree)/tools/arch/$(SRCARCH)/include \
-I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
-WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
+WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
@@ -55,6 +55,10 @@ ifeq ($(SRCARCH),x86)
SUBCMD_ORC := y
endif
+ifeq ($(SUBCMD_ORC),y)
+ CFLAGS += -DINSN_USE_ORC
+endif
+
export SUBCMD_CHECK SUBCMD_ORC
export srctree OUTPUT CFLAGS SRCARCH AWK
include $(srctree)/tools/build/Makefile.include
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index 2e2ce089b0e9..4a84c3081b8e 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -11,7 +11,9 @@
#include "objtool.h"
#include "cfi.h"
+#ifdef INSN_USE_ORC
#include <asm/orc_types.h>
+#endif
enum insn_type {
INSN_JUMP_CONDITIONAL,
@@ -86,4 +88,6 @@ unsigned long arch_dest_reloc_offset(int addend);
const char *arch_nop_insn(int len);
+int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
+
#endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
index 7c5004008e97..9f7869b5c5e0 100644
--- a/tools/objtool/arch/x86/Build
+++ b/tools/objtool/arch/x86/Build
@@ -1,3 +1,4 @@
+objtool-y += special.o
objtool-y += decode.o
inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 1967370440b3..cde9c36e40ae 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -15,6 +15,7 @@
#include "../../elf.h"
#include "../../arch.h"
#include "../../warn.h"
+#include <asm/orc_types.h>
static unsigned char op_to_cfi_reg[][2] = {
{CFI_AX, CFI_R8},
@@ -583,3 +584,39 @@ const char *arch_nop_insn(int len)
return nops[len-1];
}
+
+int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
+{
+ struct cfi_reg *cfa = &insn->cfi.cfa;
+
+ switch (sp_reg) {
+ case ORC_REG_UNDEFINED:
+ cfa->base = CFI_UNDEFINED;
+ break;
+ case ORC_REG_SP:
+ cfa->base = CFI_SP;
+ break;
+ case ORC_REG_BP:
+ cfa->base = CFI_BP;
+ break;
+ case ORC_REG_SP_INDIRECT:
+ cfa->base = CFI_SP_INDIRECT;
+ break;
+ case ORC_REG_R10:
+ cfa->base = CFI_R10;
+ break;
+ case ORC_REG_R13:
+ cfa->base = CFI_R13;
+ break;
+ case ORC_REG_DI:
+ cfa->base = CFI_DI;
+ break;
+ case ORC_REG_DX:
+ cfa->base = CFI_DX;
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch_special.h
new file mode 100644
index 000000000000..d818b2bffa02
--- /dev/null
+++ b/tools/objtool/arch/x86/include/arch_special.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _X86_ARCH_SPECIAL_H
+#define _X86_ARCH_SPECIAL_H
+
+#define EX_ENTRY_SIZE 12
+#define EX_ORIG_OFFSET 0
+#define EX_NEW_OFFSET 4
+
+#define JUMP_ENTRY_SIZE 16
+#define JUMP_ORIG_OFFSET 0
+#define JUMP_NEW_OFFSET 4
+
+#define ALT_ENTRY_SIZE 13
+#define ALT_ORIG_OFFSET 0
+#define ALT_NEW_OFFSET 4
+#define ALT_FEATURE_OFFSET 8
+#define ALT_ORIG_LEN_OFFSET 10
+#define ALT_NEW_LEN_OFFSET 11
+
+#endif /* _X86_ARCH_SPECIAL_H */
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
new file mode 100644
index 000000000000..fd4af88c0ea5
--- /dev/null
+++ b/tools/objtool/arch/x86/special.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <string.h>
+
+#include "../../special.h"
+#include "../../builtin.h"
+
+#define X86_FEATURE_POPCNT (4 * 32 + 23)
+#define X86_FEATURE_SMAP (9 * 32 + 20)
+
+void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+{
+ switch (feature) {
+ case X86_FEATURE_SMAP:
+ /*
+ * If UACCESS validation is enabled; force that alternative;
+ * otherwise force it the other way.
+ *
+ * What we want to avoid is having both the original and the
+ * alternative code flow at the same time, in that case we can
+ * find paths that see the STAC but take the NOP instead of
+ * CLAC and the other way around.
+ */
+ if (uaccess)
+ alt->skip_orig = true;
+ else
+ alt->skip_alt = true;
+ break;
+ case X86_FEATURE_POPCNT:
+ /*
+ * It has been requested that we don't validate the !POPCNT
+ * feature path which is a "very very small percentage of
+ * machines".
+ */
+ alt->skip_orig = true;
+ break;
+ default:
+ break;
+ }
+}
+
+bool arch_support_alt_relocation(struct special_alt *special_alt,
+ struct instruction *insn,
+ struct reloc *reloc)
+{
+ /*
+ * The x86 alternatives code adjusts the offsets only when it
+ * encounters a branch instruction at the very beginning of the
+ * replacement group.
+ */
+ return insn->offset == special_alt->new_off &&
+ (insn->type == INSN_CALL || is_static_jump(insn));
+}
+
+/*
+ * There are 3 basic jump table patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ * This is the most common case by far. It jumps to an address in a simple
+ * jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ * This is caused by a rare GCC quirk, currently only seen in three driver
+ * functions in the kernel, only with certain obscure non-distro configs.
+ *
+ * As part of an optimization, GCC makes a copy of an existing switch jump
+ * table, modifies it, and then hard-codes the jump (albeit with an indirect
+ * jump) to use a single entry in the table. The rest of the jump table and
+ * some of its jump targets remain as dead code.
+ *
+ * In such a case we can just crudely ignore all unreachable instruction
+ * warnings for the entire object file. Ideally we would just ignore them
+ * for the function, but that would require redesigning the code quite a
+ * bit. And honestly that's just not worth doing: unreachable instruction
+ * warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ * ... some instructions ...
+ * jmpq *(%reg1,%reg2,8)
+ *
+ * This is a fairly uncommon pattern which is new for GCC 6. As of this
+ * writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ * As of GCC 7 there are quite a few more of these and the 'in between' code
+ * is significant. Esp. with KASAN enabled some of the code between the mov
+ * and jmpq uses .rodata itself, which can confuse things.
+ *
+ * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ * ensure the same register is used in the mov and jump instructions.
+ *
+ * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ */
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct reloc *text_reloc, *rodata_reloc;
+ struct section *table_sec;
+ unsigned long table_offset;
+
+ /* look for a relocation which references .rodata */
+ text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
+ !text_reloc->sym->sec->rodata)
+ return NULL;
+
+ table_offset = text_reloc->addend;
+ table_sec = text_reloc->sym->sec;
+
+ if (text_reloc->type == R_X86_64_PC32)
+ table_offset += 4;
+
+ /*
+ * Make sure the .rodata address isn't associated with a
+ * symbol. GCC jump tables are anonymous data.
+ *
+ * Also support C jump tables which are in the same format as
+ * switch jump tables. For objtool to recognize them, they
+ * need to be placed in the C_JUMP_TABLE_SECTION section. They
+ * have symbols associated with them.
+ */
+ if (find_symbol_containing(table_sec, table_offset) &&
+ strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
+ return NULL;
+
+ /*
+ * Each table entry has a rela associated with it. The rela
+ * should reference text in the same function as the original
+ * instruction.
+ */
+ rodata_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+ if (!rodata_reloc)
+ return NULL;
+
+ /*
+ * Use of RIP-relative switch jumps is quite rare, and
+ * indicates a rare GCC quirk/bug which can leave dead
+ * code behind.
+ */
+ if (text_reloc->type == R_X86_64_PC32)
+ file->ignore_unreachables = true;
+
+ return rodata_reloc;
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 7a44174967b5..c6d199bfd0ae 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -41,6 +41,8 @@ const struct option check_options[] = {
int cmd_check(int argc, const char **argv)
{
const char *objname, *s;
+ struct objtool_file *file;
+ int ret;
argc = parse_options(argc, argv, check_options, check_usage, 0);
@@ -53,5 +55,16 @@ int cmd_check(int argc, const char **argv)
if (s && !s[9])
vmlinux = true;
- return check(objname, false);
+ file = objtool_open_read(objname);
+ if (!file)
+ return 1;
+
+ ret = check(file);
+ if (ret)
+ return ret;
+
+ if (file->elf->changed)
+ return elf_write(file->elf);
+
+ return 0;
}
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index b1dfe2007962..7b31121fa60b 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -31,13 +31,38 @@ int cmd_orc(int argc, const char **argv)
usage_with_options(orc_usage, check_options);
if (!strncmp(argv[0], "gen", 3)) {
+ struct objtool_file *file;
+ int ret;
+
argc = parse_options(argc, argv, check_options, orc_usage, 0);
if (argc != 1)
usage_with_options(orc_usage, check_options);
objname = argv[0];
- return check(objname, true);
+ file = objtool_open_read(objname);
+ if (!file)
+ return 1;
+
+ ret = check(file);
+ if (ret)
+ return ret;
+
+ if (list_empty(&file->insn_list))
+ return 0;
+
+ ret = create_orc(file);
+ if (ret)
+ return ret;
+
+ ret = create_orc_sections(file);
+ if (ret)
+ return ret;
+
+ if (!file->elf->changed)
+ return 0;
+
+ return elf_write(file->elf);
}
if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 90a66891441a..c6ab44543c92 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -14,20 +14,19 @@
#include "warn.h"
#include "arch_elf.h"
+#include <linux/objtool.h>
#include <linux/hashtable.h>
#include <linux/kernel.h>
+#include <linux/static_call_types.h>
#define FAKE_JUMP_OFFSET -1
-#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
-
struct alternative {
struct list_head list;
struct instruction *insn;
bool skip_orig;
};
-const char *objname;
struct cfi_init_state initial_func_cfi;
struct instruction *find_insn(struct objtool_file *file,
@@ -110,12 +109,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
for (insn = next_insn_same_sec(file, insn); insn; \
insn = next_insn_same_sec(file, insn))
-static bool is_static_jump(struct instruction *insn)
-{
- return insn->type == INSN_JUMP_CONDITIONAL ||
- insn->type == INSN_JUMP_UNCONDITIONAL;
-}
-
static bool is_sibling_call(struct instruction *insn)
{
/* An indirect jump is either a sibling call or a jump to a table. */
@@ -433,6 +426,103 @@ reachable:
return 0;
}
+static int create_static_call_sections(struct objtool_file *file)
+{
+ struct section *sec, *reloc_sec;
+ struct reloc *reloc;
+ struct static_call_site *site;
+ struct instruction *insn;
+ struct symbol *key_sym;
+ char *key_name, *tmp;
+ int idx;
+
+ sec = find_section_by_name(file->elf, ".static_call_sites");
+ if (sec) {
+ INIT_LIST_HEAD(&file->static_call_list);
+ WARN("file already has .static_call_sites section, skipping");
+ return 0;
+ }
+
+ if (list_empty(&file->static_call_list))
+ return 0;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node)
+ idx++;
+
+ sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
+ sizeof(struct static_call_site), idx);
+ if (!sec)
+ return -1;
+
+ reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+ if (!reloc_sec)
+ return -1;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+
+ site = (struct static_call_site *)sec->data->d_buf + idx;
+ memset(site, 0, sizeof(struct static_call_site));
+
+ /* populate reloc for 'addr' */
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+ reloc->sym = insn->sec->sym;
+ reloc->addend = insn->offset;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(struct static_call_site);
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ /* find key symbol */
+ key_name = strdup(insn->call_dest->name);
+ if (!key_name) {
+ perror("strdup");
+ return -1;
+ }
+ if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
+ STATIC_CALL_TRAMP_PREFIX_LEN)) {
+ WARN("static_call: trampoline name malformed: %s", key_name);
+ return -1;
+ }
+ tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
+ memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN);
+
+ key_sym = find_symbol_by_name(file->elf, tmp);
+ if (!key_sym) {
+ WARN("static_call: can't find static_call_key symbol: %s", tmp);
+ return -1;
+ }
+ free(key_name);
+
+ /* populate reloc for 'key' */
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+ reloc->sym = key_sym;
+ reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(struct static_call_site) + 4;
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ idx++;
+ }
+
+ if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+ return -1;
+
+ return 0;
+}
+
/*
* Warnings shouldn't be reported for ignored functions.
*/
@@ -493,6 +583,8 @@ static const char *uaccess_safe_builtin[] = {
"__asan_store4_noabort",
"__asan_store8_noabort",
"__asan_store16_noabort",
+ "__kasan_check_read",
+ "__kasan_check_write",
/* KASAN in-line */
"__asan_report_load_n_noabort",
"__asan_report_load1_noabort",
@@ -528,6 +620,61 @@ static const char *uaccess_safe_builtin[] = {
"__tsan_write4",
"__tsan_write8",
"__tsan_write16",
+ "__tsan_read_write1",
+ "__tsan_read_write2",
+ "__tsan_read_write4",
+ "__tsan_read_write8",
+ "__tsan_read_write16",
+ "__tsan_atomic8_load",
+ "__tsan_atomic16_load",
+ "__tsan_atomic32_load",
+ "__tsan_atomic64_load",
+ "__tsan_atomic8_store",
+ "__tsan_atomic16_store",
+ "__tsan_atomic32_store",
+ "__tsan_atomic64_store",
+ "__tsan_atomic8_exchange",
+ "__tsan_atomic16_exchange",
+ "__tsan_atomic32_exchange",
+ "__tsan_atomic64_exchange",
+ "__tsan_atomic8_fetch_add",
+ "__tsan_atomic16_fetch_add",
+ "__tsan_atomic32_fetch_add",
+ "__tsan_atomic64_fetch_add",
+ "__tsan_atomic8_fetch_sub",
+ "__tsan_atomic16_fetch_sub",
+ "__tsan_atomic32_fetch_sub",
+ "__tsan_atomic64_fetch_sub",
+ "__tsan_atomic8_fetch_and",
+ "__tsan_atomic16_fetch_and",
+ "__tsan_atomic32_fetch_and",
+ "__tsan_atomic64_fetch_and",
+ "__tsan_atomic8_fetch_or",
+ "__tsan_atomic16_fetch_or",
+ "__tsan_atomic32_fetch_or",
+ "__tsan_atomic64_fetch_or",
+ "__tsan_atomic8_fetch_xor",
+ "__tsan_atomic16_fetch_xor",
+ "__tsan_atomic32_fetch_xor",
+ "__tsan_atomic64_fetch_xor",
+ "__tsan_atomic8_fetch_nand",
+ "__tsan_atomic16_fetch_nand",
+ "__tsan_atomic32_fetch_nand",
+ "__tsan_atomic64_fetch_nand",
+ "__tsan_atomic8_compare_exchange_strong",
+ "__tsan_atomic16_compare_exchange_strong",
+ "__tsan_atomic32_compare_exchange_strong",
+ "__tsan_atomic64_compare_exchange_strong",
+ "__tsan_atomic8_compare_exchange_weak",
+ "__tsan_atomic16_compare_exchange_weak",
+ "__tsan_atomic32_compare_exchange_weak",
+ "__tsan_atomic64_compare_exchange_weak",
+ "__tsan_atomic8_compare_exchange_val",
+ "__tsan_atomic16_compare_exchange_val",
+ "__tsan_atomic32_compare_exchange_val",
+ "__tsan_atomic64_compare_exchange_val",
+ "__tsan_atomic_thread_fence",
+ "__tsan_atomic_signal_fence",
/* KCOV */
"write_comp_data",
"check_kcov_mode",
@@ -548,8 +695,9 @@ static const char *uaccess_safe_builtin[] = {
"__ubsan_handle_shift_out_of_bounds",
/* misc */
"csum_partial_copy_generic",
- "__memcpy_mcsafe",
- "mcsafe_handle_tail",
+ "copy_mc_fragile",
+ "copy_mc_fragile_handle_tail",
+ "copy_mc_enhanced_fast_string",
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
NULL
};
@@ -649,6 +797,10 @@ static int add_jump_destinations(struct objtool_file *file)
} else {
/* external sibling call */
insn->call_dest = reloc->sym;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
continue;
}
@@ -700,6 +852,10 @@ static int add_jump_destinations(struct objtool_file *file)
/* internal sibling call */
insn->call_dest = insn->jump_dest->func;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
}
}
}
@@ -717,6 +873,17 @@ static void remove_insn_ops(struct instruction *insn)
}
}
+static struct symbol *find_call_destination(struct section *sec, unsigned long offset)
+{
+ struct symbol *call_dest;
+
+ call_dest = find_func_by_offset(sec, offset);
+ if (!call_dest)
+ call_dest = find_symbol_by_offset(sec, offset);
+
+ return call_dest;
+}
+
/*
* Find the destination instructions for all calls.
*/
@@ -734,9 +901,7 @@ static int add_call_destinations(struct objtool_file *file)
insn->offset, insn->len);
if (!reloc) {
dest_off = arch_jump_destination(insn);
- insn->call_dest = find_func_by_offset(insn->sec, dest_off);
- if (!insn->call_dest)
- insn->call_dest = find_symbol_by_offset(insn->sec, dest_off);
+ insn->call_dest = find_call_destination(insn->sec, dest_off);
if (insn->ignore)
continue;
@@ -754,8 +919,8 @@ static int add_call_destinations(struct objtool_file *file)
} else if (reloc->sym->type == STT_SECTION) {
dest_off = arch_dest_reloc_offset(reloc->addend);
- insn->call_dest = find_func_by_offset(reloc->sym->sec,
- dest_off);
+ insn->call_dest = find_call_destination(reloc->sym->sec,
+ dest_off);
if (!insn->call_dest) {
WARN_FUNC("can't find call dest symbol at %s+0x%lx",
insn->sec, insn->offset,
@@ -867,6 +1032,8 @@ static int handle_group_alt(struct objtool_file *file,
alt_group = alt_group_next_index++;
insn = *new_insn;
sec_for_each_insn_from(file, insn) {
+ struct reloc *alt_reloc;
+
if (insn->offset >= special_alt->new_off + special_alt->new_len)
break;
@@ -883,14 +1050,11 @@ static int handle_group_alt(struct objtool_file *file,
* .altinstr_replacement section, unless the arch's
* alternatives code can adjust the relative offsets
* accordingly.
- *
- * The x86 alternatives code adjusts the offsets only when it
- * encounters a branch instruction at the very beginning of the
- * replacement group.
*/
- if ((insn->offset != special_alt->new_off ||
- (insn->type != INSN_CALL && !is_static_jump(insn))) &&
- find_reloc_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) {
+ alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (alt_reloc &&
+ !arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
WARN_FUNC("unsupported relocation in alternatives section",
insn->sec, insn->offset);
@@ -1092,56 +1256,15 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
}
/*
- * find_jump_table() - Given a dynamic jump, find the switch jump table in
- * .rodata associated with it.
- *
- * There are 3 basic patterns:
- *
- * 1. jmpq *[rodata addr](,%reg,8)
- *
- * This is the most common case by far. It jumps to an address in a simple
- * jump table which is stored in .rodata.
- *
- * 2. jmpq *[rodata addr](%rip)
- *
- * This is caused by a rare GCC quirk, currently only seen in three driver
- * functions in the kernel, only with certain obscure non-distro configs.
- *
- * As part of an optimization, GCC makes a copy of an existing switch jump
- * table, modifies it, and then hard-codes the jump (albeit with an indirect
- * jump) to use a single entry in the table. The rest of the jump table and
- * some of its jump targets remain as dead code.
- *
- * In such a case we can just crudely ignore all unreachable instruction
- * warnings for the entire object file. Ideally we would just ignore them
- * for the function, but that would require redesigning the code quite a
- * bit. And honestly that's just not worth doing: unreachable instruction
- * warnings are of questionable value anyway, and this is such a rare issue.
- *
- * 3. mov [rodata addr],%reg1
- * ... some instructions ...
- * jmpq *(%reg1,%reg2,8)
- *
- * This is a fairly uncommon pattern which is new for GCC 6. As of this
- * writing, there are 11 occurrences of it in the allmodconfig kernel.
- *
- * As of GCC 7 there are quite a few more of these and the 'in between' code
- * is significant. Esp. with KASAN enabled some of the code between the mov
- * and jmpq uses .rodata itself, which can confuse things.
- *
- * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
- * ensure the same register is used in the mov and jump instructions.
- *
- * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ * find_jump_table() - Given a dynamic jump, find the switch jump table
+ * associated with it.
*/
static struct reloc *find_jump_table(struct objtool_file *file,
struct symbol *func,
struct instruction *insn)
{
- struct reloc *text_reloc, *table_reloc;
+ struct reloc *table_reloc;
struct instruction *dest_insn, *orig_insn = insn;
- struct section *table_sec;
- unsigned long table_offset;
/*
* Backward search using the @first_jump_src links, these help avoid
@@ -1162,52 +1285,13 @@ static struct reloc *find_jump_table(struct objtool_file *file,
insn->jump_dest->offset > orig_insn->offset))
break;
- /* look for a relocation which references .rodata */
- text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
- insn->offset, insn->len);
- if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
- !text_reloc->sym->sec->rodata)
- continue;
-
- table_offset = text_reloc->addend;
- table_sec = text_reloc->sym->sec;
-
- if (text_reloc->type == R_X86_64_PC32)
- table_offset += 4;
-
- /*
- * Make sure the .rodata address isn't associated with a
- * symbol. GCC jump tables are anonymous data.
- *
- * Also support C jump tables which are in the same format as
- * switch jump tables. For objtool to recognize them, they
- * need to be placed in the C_JUMP_TABLE_SECTION section. They
- * have symbols associated with them.
- */
- if (find_symbol_containing(table_sec, table_offset) &&
- strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
- continue;
-
- /*
- * Each table entry has a reloc associated with it. The reloc
- * should reference text in the same function as the original
- * instruction.
- */
- table_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+ table_reloc = arch_find_switch_table(file, insn);
if (!table_reloc)
continue;
dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend);
if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func)
continue;
- /*
- * Use of RIP-relative switch jumps is quite rare, and
- * indicates a rare GCC quirk/bug which can leave dead code
- * behind.
- */
- if (text_reloc->type == R_X86_64_PC32)
- file->ignore_unreachables = true;
-
return table_reloc;
}
@@ -1350,32 +1434,7 @@ static int read_unwind_hints(struct objtool_file *file)
insn->hint = true;
- switch (hint->sp_reg) {
- case ORC_REG_UNDEFINED:
- cfa->base = CFI_UNDEFINED;
- break;
- case ORC_REG_SP:
- cfa->base = CFI_SP;
- break;
- case ORC_REG_BP:
- cfa->base = CFI_BP;
- break;
- case ORC_REG_SP_INDIRECT:
- cfa->base = CFI_SP_INDIRECT;
- break;
- case ORC_REG_R10:
- cfa->base = CFI_R10;
- break;
- case ORC_REG_R13:
- cfa->base = CFI_R13;
- break;
- case ORC_REG_DI:
- cfa->base = CFI_DI;
- break;
- case ORC_REG_DX:
- cfa->base = CFI_DX;
- break;
- default:
+ if (arch_decode_hint_reg(insn, hint->sp_reg)) {
WARN_FUNC("unsupported unwind_hint sp base reg %d",
insn->sec, insn->offset, hint->sp_reg);
return -1;
@@ -1522,6 +1581,23 @@ static int read_intra_function_calls(struct objtool_file *file)
return 0;
}
+static int read_static_call_tramps(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->bind == STB_GLOBAL &&
+ !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+ strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+ func->static_call_tramp = true;
+ }
+ }
+
+ return 0;
+}
+
static void mark_rodata(struct objtool_file *file)
{
struct section *sec;
@@ -1569,6 +1645,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ ret = read_static_call_tramps(file);
+ if (ret)
+ return ret;
+
ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -1768,7 +1848,8 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
return 0;
}
- if (cfi->type == ORC_TYPE_REGS || cfi->type == ORC_TYPE_REGS_IRET)
+ if (cfi->type == UNWIND_HINT_TYPE_REGS ||
+ cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL)
return update_cfi_state_regs(insn, cfi, op);
switch (op->dest.type) {
@@ -2016,7 +2097,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
/* drap: push %rbp */
cfi->stack_size = 0;
- } else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+ } else {
/* drap: push %reg */
save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size);
@@ -2045,9 +2126,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
/* save drap offset so we know when to restore it */
cfi->drap_offset = op->dest.offset;
- }
-
- else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+ } else {
/* drap: mov reg, disp(%rbp) */
save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset);
@@ -2432,6 +2511,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (dead_end_function(file, insn->call_dest))
return 0;
+ if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
+
break;
case INSN_JUMP_CONDITIONAL:
@@ -2612,9 +2696,10 @@ static bool is_ubsan_insn(struct instruction *insn)
"__ubsan_handle_builtin_unreachable"));
}
-static bool ignore_unreachable_insn(struct instruction *insn)
+static bool ignore_unreachable_insn(struct objtool_file *file, struct instruction *insn)
{
int i;
+ struct instruction *prev_insn;
if (insn->ignore || insn->type == INSN_NOP)
return true;
@@ -2631,6 +2716,9 @@ static bool ignore_unreachable_insn(struct instruction *insn)
!strcmp(insn->sec->name, ".altinstr_aux"))
return true;
+ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
+ return true;
+
if (!insn->func)
return false;
@@ -2639,8 +2727,11 @@ static bool ignore_unreachable_insn(struct instruction *insn)
* __builtin_unreachable(). The BUG() macro has an unreachable() after
* the UD2, which causes GCC's undefined trap logic to emit another UD2
* (or occasionally a JMP to UD2).
+ *
+ * It may also insert a UD2 after calling a __noreturn function.
*/
- if (list_prev_entry(insn, list)->dead_end &&
+ prev_insn = list_prev_entry(insn, list);
+ if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) &&
(insn->type == INSN_BUG ||
(insn->type == INSN_JUMP_UNCONDITIONAL &&
insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
@@ -2767,7 +2858,7 @@ static int validate_reachable_instructions(struct objtool_file *file)
return 0;
for_each_insn(file, insn) {
- if (insn->visited || ignore_unreachable_insn(insn))
+ if (insn->visited || ignore_unreachable_insn(file, insn))
continue;
WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
@@ -2777,36 +2868,22 @@ static int validate_reachable_instructions(struct objtool_file *file)
return 0;
}
-static struct objtool_file file;
-
-int check(const char *_objname, bool orc)
+int check(struct objtool_file *file)
{
int ret, warnings = 0;
- objname = _objname;
-
- file.elf = elf_open_read(objname, O_RDWR);
- if (!file.elf)
- return 1;
-
- INIT_LIST_HEAD(&file.insn_list);
- hash_init(file.insn_hash);
- file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
- file.ignore_unreachables = no_unreachable;
- file.hints = false;
-
arch_initial_func_cfi_state(&initial_func_cfi);
- ret = decode_sections(&file);
+ ret = decode_sections(file);
if (ret < 0)
goto out;
warnings += ret;
- if (list_empty(&file.insn_list))
+ if (list_empty(&file->insn_list))
goto out;
if (vmlinux && !validate_dup) {
- ret = validate_vmlinux_functions(&file);
+ ret = validate_vmlinux_functions(file);
if (ret < 0)
goto out;
@@ -2815,44 +2892,33 @@ int check(const char *_objname, bool orc)
}
if (retpoline) {
- ret = validate_retpoline(&file);
+ ret = validate_retpoline(file);
if (ret < 0)
return ret;
warnings += ret;
}
- ret = validate_functions(&file);
+ ret = validate_functions(file);
if (ret < 0)
goto out;
warnings += ret;
- ret = validate_unwind_hints(&file, NULL);
+ ret = validate_unwind_hints(file, NULL);
if (ret < 0)
goto out;
warnings += ret;
if (!warnings) {
- ret = validate_reachable_instructions(&file);
+ ret = validate_reachable_instructions(file);
if (ret < 0)
goto out;
warnings += ret;
}
- if (orc) {
- ret = create_orc(&file);
- if (ret < 0)
- goto out;
-
- ret = create_orc_sections(&file);
- if (ret < 0)
- goto out;
- }
-
- if (file.elf->changed) {
- ret = elf_write(file.elf);
- if (ret < 0)
- goto out;
- }
+ ret = create_static_call_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
out:
if (ret < 0) {
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 061aa96e15d3..5ec00a4b891b 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -22,6 +22,7 @@ struct insn_state {
struct instruction {
struct list_head list;
struct hlist_node hash;
+ struct list_head static_call_node;
struct section *sec;
unsigned long offset;
unsigned int len;
@@ -42,9 +43,17 @@ struct instruction {
struct symbol *func;
struct list_head stack_ops;
struct cfi_state cfi;
+#ifdef INSN_USE_ORC
struct orc_entry orc;
+#endif
};
+static inline bool is_static_jump(struct instruction *insn)
+{
+ return insn->type == INSN_JUMP_CONDITIONAL ||
+ insn->type == INSN_JUMP_UNCONDITIONAL;
+}
+
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset);
@@ -57,5 +66,4 @@ struct instruction *find_insn(struct objtool_file *file,
insn->sec == sec; \
insn = list_next_entry(insn, list))
-
#endif /* _CHECK_H */
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 3ddbd66f1a37..4e1d7460574b 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -652,7 +652,7 @@ err:
}
struct section *elf_create_section(struct elf *elf, const char *name,
- size_t entsize, int nr)
+ unsigned int sh_flags, size_t entsize, int nr)
{
struct section *sec, *shstrtab;
size_t size = entsize * nr;
@@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->sh.sh_entsize = entsize;
sec->sh.sh_type = SHT_PROGBITS;
sec->sh.sh_addralign = 1;
- sec->sh.sh_flags = SHF_ALLOC;
+ sec->sh.sh_flags = SHF_ALLOC | sh_flags;
/* Add section name to .shstrtab (or .strtab for Clang) */
@@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect
strcpy(relocname, ".rel");
strcat(relocname, base->name);
- sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0);
+ sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0);
free(relocname);
if (!sec)
return NULL;
@@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
strcpy(relocname, ".rela");
strcat(relocname, base->name);
- sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0);
+ sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
free(relocname);
if (!sec)
return NULL;
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 6cc80a075166..807f8c670097 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -56,6 +56,7 @@ struct symbol {
unsigned int len;
struct symbol *pfunc, *cfunc, *alias;
bool uaccess_safe;
+ bool static_call_tramp;
};
struct reloc {
@@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc)
}
struct elf *elf_open_read(const char *name, int flags);
-struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr);
+struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
void elf_add_reloc(struct elf *elf, struct reloc *reloc);
int elf_write_insn(struct elf *elf, struct section *sec,
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 58fdda510653..9df0cd86d310 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -22,6 +22,8 @@
#include <linux/kernel.h>
#include "builtin.h"
+#include "objtool.h"
+#include "warn.h"
struct cmd_struct {
const char *name;
@@ -39,6 +41,34 @@ static struct cmd_struct objtool_cmds[] = {
bool help;
+const char *objname;
+static struct objtool_file file;
+
+struct objtool_file *objtool_open_read(const char *_objname)
+{
+ if (objname) {
+ if (strcmp(objname, _objname)) {
+ WARN("won't handle more than one file at a time");
+ return NULL;
+ }
+ return &file;
+ }
+ objname = _objname;
+
+ file.elf = elf_open_read(objname, O_RDWR);
+ if (!file.elf)
+ return NULL;
+
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
+ INIT_LIST_HEAD(&file.static_call_list);
+ file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
+ file.ignore_unreachables = no_unreachable;
+ file.hints = false;
+
+ return &file;
+}
+
static void cmd_usage(void)
{
unsigned int i, longest = 0;
diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h
index 528028a66816..4125d4578b23 100644
--- a/tools/objtool/objtool.h
+++ b/tools/objtool/objtool.h
@@ -12,14 +12,19 @@
#include "elf.h"
+#define __weak __attribute__((weak))
+
struct objtool_file {
struct elf *elf;
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
+ struct list_head static_call_list;
bool ignore_unreachables, c_file, hints, rodata;
};
-int check(const char *objname, bool orc);
+struct objtool_file *objtool_open_read(const char *_objname);
+
+int check(struct objtool_file *file);
int orc_dump(const char *objname);
int create_orc(struct objtool_file *file);
int create_orc_sections(struct objtool_file *file);
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index fca46e006fc2..5e6a95368d35 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -4,6 +4,7 @@
*/
#include <unistd.h>
+#include <linux/objtool.h>
#include <asm/orc_types.h>
#include "objtool.h"
#include "warn.h"
@@ -37,12 +38,12 @@ static const char *reg_name(unsigned int reg)
static const char *orc_type_name(unsigned int type)
{
switch (type) {
- case ORC_TYPE_CALL:
+ case UNWIND_HINT_TYPE_CALL:
return "call";
- case ORC_TYPE_REGS:
+ case UNWIND_HINT_TYPE_REGS:
return "regs";
- case ORC_TYPE_REGS_IRET:
- return "iret";
+ case UNWIND_HINT_TYPE_REGS_PARTIAL:
+ return "regs (partial)";
default:
return "?";
}
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 968f55e6dd94..235663b96adc 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -6,6 +6,9 @@
#include <stdlib.h>
#include <string.h>
+#include <linux/objtool.h>
+#include <asm/orc_types.h>
+
#include "check.h"
#include "warn.h"
@@ -18,6 +21,9 @@ int create_orc(struct objtool_file *file)
struct cfi_reg *cfa = &insn->cfi.cfa;
struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
+ if (!insn->sec->text)
+ continue;
+
orc->end = insn->cfi.end;
if (cfa->base == CFI_UNDEFINED) {
@@ -143,7 +149,7 @@ int create_orc_sections(struct objtool_file *file)
struct orc_entry empty = {
.sp_reg = ORC_REG_UNDEFINED,
.bp_reg = ORC_REG_UNDEFINED,
- .type = ORC_TYPE_CALL,
+ .type = UNWIND_HINT_TYPE_CALL,
};
sec = find_section_by_name(file->elf, ".orc_unwind");
@@ -177,7 +183,7 @@ int create_orc_sections(struct objtool_file *file)
/* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
- sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
if (!sec)
return -1;
@@ -186,7 +192,7 @@ int create_orc_sections(struct objtool_file *file)
return -1;
/* create .orc_unwind section */
- u_sec = elf_create_section(file->elf, ".orc_unwind",
+ u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
sizeof(struct orc_entry), idx);
/* populate sections */
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index e893f1e48e44..1a2420febd08 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -14,24 +14,7 @@
#include "builtin.h"
#include "special.h"
#include "warn.h"
-
-#define EX_ENTRY_SIZE 12
-#define EX_ORIG_OFFSET 0
-#define EX_NEW_OFFSET 4
-
-#define JUMP_ENTRY_SIZE 16
-#define JUMP_ORIG_OFFSET 0
-#define JUMP_NEW_OFFSET 4
-
-#define ALT_ENTRY_SIZE 13
-#define ALT_ORIG_OFFSET 0
-#define ALT_NEW_OFFSET 4
-#define ALT_FEATURE_OFFSET 8
-#define ALT_ORIG_LEN_OFFSET 10
-#define ALT_NEW_LEN_OFFSET 11
-
-#define X86_FEATURE_POPCNT (4*32+23)
-#define X86_FEATURE_SMAP (9*32+20)
+#include "arch_special.h"
struct special_entry {
const char *sec;
@@ -68,6 +51,10 @@ struct special_entry entries[] = {
{},
};
+void __weak arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+{
+}
+
static int get_alt_entry(struct elf *elf, struct special_entry *entry,
struct section *sec, int idx,
struct special_alt *alt)
@@ -92,30 +79,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
feature = *(unsigned short *)(sec->data->d_buf + offset +
entry->feature);
-
- /*
- * It has been requested that we don't validate the !POPCNT
- * feature path which is a "very very small percentage of
- * machines".
- */
- if (feature == X86_FEATURE_POPCNT)
- alt->skip_orig = true;
-
- /*
- * If UACCESS validation is enabled; force that alternative;
- * otherwise force it the other way.
- *
- * What we want to avoid is having both the original and the
- * alternative code flow at the same time, in that case we can
- * find paths that see the STAC but take the NOP instead of
- * CLAC and the other way around.
- */
- if (feature == X86_FEATURE_SMAP) {
- if (uaccess)
- alt->skip_orig = true;
- else
- alt->skip_alt = true;
- }
+ arch_handle_alternative(feature, alt);
}
orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
diff --git a/tools/objtool/special.h b/tools/objtool/special.h
index 35061530e46e..abddf38ef334 100644
--- a/tools/objtool/special.h
+++ b/tools/objtool/special.h
@@ -7,8 +7,11 @@
#define _SPECIAL_H
#include <stdbool.h>
+#include "check.h"
#include "elf.h"
+#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
+
struct special_alt {
struct list_head list;
@@ -28,4 +31,11 @@ struct special_alt {
int special_get_alts(struct elf *elf, struct list_head *alts);
+void arch_handle_alternative(unsigned short feature, struct special_alt *alt);
+
+bool arch_support_alt_relocation(struct special_alt *special_alt,
+ struct instruction *insn,
+ struct reloc *reloc);
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+ struct instruction *insn);
#endif /* _SPECIAL_H */
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 2a1261bfbb62..606a4b5e929f 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -1,13 +1,27 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-FILES='
+if [ -z "$SRCARCH" ]; then
+ echo 'sync-check.sh: error: missing $SRCARCH environment variable' >&2
+ exit 1
+fi
+
+FILES="include/linux/objtool.h"
+
+if [ "$SRCARCH" = "x86" ]; then
+FILES="$FILES
arch/x86/include/asm/inat_types.h
arch/x86/include/asm/orc_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
-'
+include/linux/static_call_types.h
+arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]'
+arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]'
+arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]'
+arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]'
+"
+fi
check_2 () {
file1=$1
@@ -40,11 +54,12 @@ fi
cd ../..
-for i in $FILES; do
- check $i
-done
+while read -r file_entry; do
+ if [ -z "$file_entry" ]; then
+ continue
+ fi
-check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
-check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
-check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
-check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
+ check $file_entry
+done <<EOF
+$FILES
+EOF
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
index 942ea5e8ac36..7843e9a7a72f 100644
--- a/tools/objtool/weak.c
+++ b/tools/objtool/weak.c
@@ -9,17 +9,13 @@
#include <errno.h>
#include "objtool.h"
-#define __weak __attribute__((weak))
-
#define UNSUPPORTED(name) \
({ \
fprintf(stderr, "error: objtool: " name " not implemented\n"); \
return ENOSYS; \
})
-const char __weak *objname;
-
-int __weak check(const char *_objname, bool orc)
+int __weak check(struct objtool_file *file)
{
UNSUPPORTED("check subcommand");
}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 190be4fa5c21..d1b5fb89992f 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -16,7 +16,7 @@ $(shell printf "" > $(OUTPUT).config-detected)
detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
-CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
+CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
include $(srctree)/tools/scripts/Makefile.arch
@@ -483,10 +483,6 @@ ifndef NO_LIBELF
EXTLIBS += -lelf
$(call detected,CONFIG_LIBELF)
- ifeq ($(feature-libelf-mmap), 1)
- CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
- endif
-
ifeq ($(feature-libelf-getphdrnum), 1)
CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
endif
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 3ca6fe057a0b..b168364ac050 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -32,7 +32,7 @@
18 spu oldstat sys_ni_syscall
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid
-21 nospu mount sys_mount compat_sys_mount
+21 nospu mount sys_mount
22 32 umount sys_oldumount
22 64 umount sys_ni_syscall
22 spu umount sys_ni_syscall
@@ -189,8 +189,8 @@
142 common _newselect sys_select compat_sys_select
143 common flock sys_flock
144 common msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
149 nospu _sysctl sys_ni_syscall
@@ -363,7 +363,7 @@
282 common unshare sys_unshare
283 common splice sys_splice
284 common tee sys_tee
-285 common vmsplice sys_vmsplice compat_sys_vmsplice
+285 common vmsplice sys_vmsplice
286 common openat sys_openat compat_sys_openat
287 common mkdirat sys_mkdirat
288 common mknodat sys_mknodat
@@ -443,8 +443,8 @@
348 common syncfs sys_syncfs
349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
350 common setns sys_setns
-351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+351 nospu process_vm_readv sys_process_vm_readv
+352 nospu process_vm_writev sys_process_vm_writev
353 nospu finit_module sys_finit_module
354 nospu kcmp sys_kcmp
355 common sched_setattr sys_sched_setattr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 6a0bbea225db..d2fa9647ce25 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -26,7 +26,7 @@
16 32 lchown - compat_sys_s390_lchown16
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid sys_getpid
-21 common mount sys_mount compat_sys_mount
+21 common mount sys_mount
22 common umount sys_oldumount compat_sys_oldumount
23 32 setuid - compat_sys_s390_setuid16
24 32 getuid - compat_sys_s390_getuid16
@@ -134,8 +134,8 @@
142 64 select sys_select -
143 common flock sys_flock sys_flock
144 common msync sys_msync compat_sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
149 common _sysctl - -
@@ -316,7 +316,7 @@
306 common splice sys_splice compat_sys_splice
307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
308 common tee sys_tee compat_sys_tee
-309 common vmsplice sys_vmsplice compat_sys_vmsplice
+309 common vmsplice sys_vmsplice sys_vmsplice
310 common move_pages sys_move_pages compat_sys_move_pages
311 common getcpu sys_getcpu compat_sys_getcpu
312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
@@ -347,8 +347,8 @@
337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
338 common syncfs sys_syncfs sys_syncfs
339 common setns sys_setns sys_setns
-340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev
342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
343 common kcmp sys_kcmp compat_sys_kcmp
344 common finit_module sys_finit_module compat_sys_finit_module
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index f30d6ae9a688..347809649ba2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,8 +371,8 @@
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
+515 x32 readv sys_readv
+516 x32 writev sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
@@ -388,15 +388,15 @@
529 x32 waitid compat_sys_waitid
530 x32 set_robust_list compat_sys_set_robust_list
531 x32 get_robust_list compat_sys_get_robust_list
-532 x32 vmsplice compat_sys_vmsplice
+532 x32 vmsplice sys_vmsplice
533 x32 move_pages compat_sys_move_pages
534 x32 preadv compat_sys_preadv64
535 x32 pwritev compat_sys_pwritev64
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
537 x32 recvmmsg compat_sys_recvmmsg_time64
538 x32 sendmmsg compat_sys_sendmmsg
-539 x32 process_vm_readv compat_sys_process_vm_readv
-540 x32 process_vm_writev compat_sys_process_vm_writev
+539 x32 process_vm_readv sys_process_vm_readv
+540 x32 process_vm_writev sys_process_vm_writev
541 x32 setsockopt sys_setsockopt
542 x32 getsockopt sys_getsockopt
543 x32 io_setup compat_sys_io_setup
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index dd68a40a790c..878db6a59a41 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -13,7 +13,6 @@ perf-y += synthesize.o
perf-y += kallsyms-parse.o
perf-y += find-bit-bench.o
-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
deleted file mode 100644
index 4130734dde84..000000000000
--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
- * happy.
- */
-#include <linux/types.h>
-
-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++, from++) {
- /*
- * Call the assembly routine back directly since
- * memcpy_mcsafe() may silently fallback to memcpy.
- */
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
- if (rem)
- break;
- }
- return len;
-}
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 2feb751516ab..0374adcb223c 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -328,12 +328,6 @@ config_bpf_program(struct bpf_program *prog)
probe_conf.no_inlines = false;
probe_conf.force_add = false;
- config_str = bpf_program__title(prog, false);
- if (IS_ERR(config_str)) {
- pr_debug("bpf: unable to get title for program\n");
- return PTR_ERR(config_str);
- }
-
priv = calloc(sizeof(*priv), 1);
if (!priv) {
pr_debug("bpf: failed to alloc priv\n");
@@ -341,6 +335,7 @@ config_bpf_program(struct bpf_program *prog)
}
pev = &priv->pev;
+ config_str = bpf_program__section_name(prog);
pr_debug("bpf: config program '%s'\n", config_str);
err = parse_prog_config(config_str, &main_str, &is_tp, pev);
if (err)
@@ -454,10 +449,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
if (err) {
const char *title;
- title = bpf_program__title(prog, false);
- if (!title)
- title = "[unknown]";
-
+ title = bpf_program__section_name(prog);
pr_debug("Failed to generate prologue for program %s\n",
title);
return err;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ff4f4c47e148..03e264a27cd3 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -28,7 +28,7 @@ struct option;
* libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
* for newer versions we can use mmap to reduce memory usage:
*/
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
+#ifdef ELF_C_READ_MMAP
# define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
#else
# define PERF_ELF_C_READ_MMAP ELF_C_READ
diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile
index ebd3e1a1c28e..a249c50ebf55 100644
--- a/tools/power/acpi/Makefile
+++ b/tools/power/acpi/Makefile
@@ -7,6 +7,8 @@
include ../../scripts/Makefile.include
+.NOTPARALLEL:
+
all: acpidbg acpidump ec
clean: acpidbg_clean acpidump_clean ec_clean
install: acpidbg_install acpidump_install ec_install
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index dd38c2b2e1b4..11c5046dce16 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -110,7 +110,7 @@ u32 gbl_table_count = 0;
*
* RETURN: Status; Converted from errno.
*
- * DESCRIPTION: Get last errno and conver it to acpi_status.
+ * DESCRIPTION: Get last errno and convert it to acpi_status.
*
*****************************************************************************/
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 6ed82fba5aaa..7b2164e07057 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -99,13 +99,17 @@ static unsigned long string_to_frequency(const char *str)
continue;
if (str[cp] == '.') {
- while (power > -1 && isdigit(str[cp+1]))
- cp++, power--;
+ while (power > -1 && isdigit(str[cp+1])) {
+ cp++;
+ power--;
+ }
}
- if (power >= -1) /* not enough => pad */
+ if (power >= -1) { /* not enough => pad */
pad = power + 1;
- else /* to much => strip */
- pad = 0, cp += power + 1;
+ } else { /* too much => strip */
+ pad = 0;
+ cp += power + 1;
+ }
/* check bounds */
if (cp <= 0 || cp + pad > NORM_FREQ_LEN - 1)
return 0;
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 46ff97e909c6..1bc36a1db14f 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -171,7 +171,7 @@ class SystemValues:
tracefuncs = {
'sys_sync': {},
'ksys_sync': {},
- '__pm_notifier_call_chain': {},
+ 'pm_notifier_call_chain_robust': {},
'pm_prepare_console': {},
'pm_notifier_call_chain': {},
'freeze_processes': {},
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 9f4b190f1d74..cd089a505859 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.5";
+static const char *version_str = "v1.6";
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -545,20 +545,23 @@ static void set_cpu_present_cpu_mask(void)
}
}
-int get_core_count(int pkg_id, int die_id)
+int get_max_punit_core_id(int pkg_id, int die_id)
{
- int cnt = 0;
+ int max_id = 0;
+ int i;
- if (pkg_id < MAX_PACKAGE_COUNT && die_id < MAX_DIE_PER_PACKAGE) {
- int i;
+ for (i = 0; i < topo_max_cpus; ++i)
+ {
+ if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask))
+ continue;
- for (i = 0; i < sizeof(long long) * 8; ++i) {
- if (core_mask[pkg_id][die_id] & (1ULL << i))
- cnt++;
- }
+ if (cpu_map[i].pkg_id == pkg_id &&
+ cpu_map[i].die_id == die_id &&
+ cpu_map[i].punit_cpu_core > max_id)
+ max_id = cpu_map[i].punit_cpu_core;
}
- return cnt;
+ return max_id;
}
int get_cpu_count(int pkg_id, int die_id)
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index a7f4337c5777..1d7ecb54352e 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -396,7 +396,7 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
{
struct isst_pkg_ctdp_level_info ctdp_level;
struct isst_pkg_ctdp pkg_dev;
- int i, ret, core_cnt, max;
+ int i, ret, max_punit_core, max_mask_index;
unsigned int req, resp;
ret = isst_get_ctdp_levels(cpu, &pkg_dev);
@@ -421,10 +421,10 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask);
- core_cnt = get_core_count(get_physical_package_id(cpu), get_physical_die_id(cpu));
- max = core_cnt > 32 ? 2 : 1;
+ max_punit_core = get_max_punit_core_id(get_physical_package_id(cpu), get_physical_die_id(cpu));
+ max_mask_index = max_punit_core > 32 ? 2 : 1;
- for (i = 0; i < max; ++i) {
+ for (i = 0; i < max_mask_index; ++i) {
unsigned long long mask;
int count;
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 094ba4589a9c..29715e9c2e06 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -170,7 +170,7 @@ struct isst_pkg_ctdp {
extern int get_topo_max_cpus(void);
extern int get_cpu_count(int pkg_id, int die_id);
-extern int get_core_count(int pkg_id, int die_id);
+extern int get_max_punit_core_id(int pkg_id, int die_id);
/* Common interfaces */
FILE *get_output_file(void);
diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config
index 239b9f03da2c..a7f0603d33f6 100644
--- a/tools/testing/kunit/configs/broken_on_uml.config
+++ b/tools/testing/kunit/configs/broken_on_uml.config
@@ -39,3 +39,4 @@
# CONFIG_QCOM_CPR is not set
# CONFIG_RESET_BRCMSTB_RESCAL is not set
# CONFIG_RESET_INTEL_GW is not set
+# CONFIG_ADI_AXI_ADC is not set
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 425ef40067e7..ebf5f5763dee 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -17,6 +17,7 @@ from collections import namedtuple
from enum import Enum, auto
import kunit_config
+import kunit_json
import kunit_kernel
import kunit_parser
@@ -30,9 +31,9 @@ KunitBuildRequest = namedtuple('KunitBuildRequest',
KunitExecRequest = namedtuple('KunitExecRequest',
['timeout', 'build_dir', 'alltests'])
KunitParseRequest = namedtuple('KunitParseRequest',
- ['raw_output', 'input_data'])
+ ['raw_output', 'input_data', 'build_dir', 'json'])
KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
- 'build_dir', 'alltests',
+ 'build_dir', 'alltests', 'json',
'make_options'])
KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
@@ -113,12 +114,22 @@ def parse_tests(request: KunitParseRequest) -> KunitResult:
test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS,
[],
'Tests not Parsed.')
+
if request.raw_output:
kunit_parser.raw_output(request.input_data)
else:
test_result = kunit_parser.parse_run_tests(request.input_data)
parse_end = time.time()
+ if request.json:
+ json_obj = kunit_json.get_json_result(
+ test_result=test_result,
+ def_config='kunit_defconfig',
+ build_dir=request.build_dir,
+ json_path=request.json)
+ if request.json == 'stdout':
+ print(json_obj)
+
if test_result.status != kunit_parser.TestStatus.SUCCESS:
return KunitResult(KunitStatus.TEST_FAILURE, test_result,
parse_end - parse_start)
@@ -151,7 +162,9 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
return exec_result
parse_request = KunitParseRequest(request.raw_output,
- exec_result.result)
+ exec_result.result,
+ request.build_dir,
+ request.json)
parse_result = parse_tests(parse_request)
run_end = time.time()
@@ -195,7 +208,12 @@ def add_exec_opts(parser):
def add_parse_opts(parser):
parser.add_argument('--raw_output', help='don\'t format output from kernel',
action='store_true')
-
+ parser.add_argument('--json',
+ nargs='?',
+ help='Stores test results in a JSON, and either '
+ 'prints to stdout or saves to file if a '
+ 'filename is specified',
+ type=str, const='stdout', default=None)
def main(argv, linux=None):
parser = argparse.ArgumentParser(
@@ -237,10 +255,16 @@ def main(argv, linux=None):
cli_args = parser.parse_args(argv)
+ if get_kernel_root_path():
+ os.chdir(get_kernel_root_path())
+
if cli_args.subcommand == 'run':
if not os.path.exists(cli_args.build_dir):
os.mkdir(cli_args.build_dir)
+ if not os.path.exists(kunit_kernel.kunitconfig_path):
+ create_default_kunitconfig()
+
if not linux:
linux = kunit_kernel.LinuxSourceTree()
@@ -249,14 +273,18 @@ def main(argv, linux=None):
cli_args.jobs,
cli_args.build_dir,
cli_args.alltests,
+ cli_args.json,
cli_args.make_options)
result = run_tests(linux, request)
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
elif cli_args.subcommand == 'config':
- if cli_args.build_dir:
- if not os.path.exists(cli_args.build_dir):
- os.mkdir(cli_args.build_dir)
+ if cli_args.build_dir and (
+ not os.path.exists(cli_args.build_dir)):
+ os.mkdir(cli_args.build_dir)
+
+ if not os.path.exists(kunit_kernel.kunitconfig_path):
+ create_default_kunitconfig()
if not linux:
linux = kunit_kernel.LinuxSourceTree()
@@ -270,10 +298,6 @@ def main(argv, linux=None):
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
elif cli_args.subcommand == 'build':
- if cli_args.build_dir:
- if not os.path.exists(cli_args.build_dir):
- os.mkdir(cli_args.build_dir)
-
if not linux:
linux = kunit_kernel.LinuxSourceTree()
@@ -288,10 +312,6 @@ def main(argv, linux=None):
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
elif cli_args.subcommand == 'exec':
- if cli_args.build_dir:
- if not os.path.exists(cli_args.build_dir):
- os.mkdir(cli_args.build_dir)
-
if not linux:
linux = kunit_kernel.LinuxSourceTree()
@@ -300,7 +320,9 @@ def main(argv, linux=None):
cli_args.alltests)
exec_result = exec_tests(linux, exec_request)
parse_request = KunitParseRequest(cli_args.raw_output,
- exec_result.result)
+ exec_result.result,
+ cli_args.build_dir,
+ cli_args.json)
result = parse_tests(parse_request)
kunit_parser.print_with_timestamp((
'Elapsed time: %.3fs\n') % (
@@ -314,7 +336,9 @@ def main(argv, linux=None):
with open(cli_args.file, 'r') as f:
kunit_output = f.read().splitlines()
request = KunitParseRequest(cli_args.raw_output,
- kunit_output)
+ kunit_output,
+ cli_args.build_dir,
+ cli_args.json)
result = parse_tests(request)
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
new file mode 100644
index 000000000000..624b31b2dbd6
--- /dev/null
+++ b/tools/testing/kunit/kunit_json.py
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generates JSON from KUnit results according to
+# KernelCI spec: https://github.com/kernelci/kernelci-doc/wiki/Test-API
+#
+# Copyright (C) 2020, Google LLC.
+# Author: Heidi Fahim <heidifahim@google.com>
+
+import json
+import os
+
+import kunit_parser
+
+from kunit_parser import TestStatus
+
+def get_json_result(test_result, def_config, build_dir, json_path):
+ sub_groups = []
+
+ # Each test suite is mapped to a KernelCI sub_group
+ for test_suite in test_result.suites:
+ sub_group = {
+ "name": test_suite.name,
+ "arch": "UM",
+ "defconfig": def_config,
+ "build_environment": build_dir,
+ "test_cases": [],
+ "lab_name": None,
+ "kernel": None,
+ "job": None,
+ "git_branch": "kselftest",
+ }
+ test_cases = []
+ # TODO: Add attachments attribute in test_case with detailed
+ # failure message, see https://api.kernelci.org/schema-test-case.html#get
+ for case in test_suite.cases:
+ test_case = {"name": case.name, "status": "FAIL"}
+ if case.status == TestStatus.SUCCESS:
+ test_case["status"] = "PASS"
+ elif case.status == TestStatus.TEST_CRASHED:
+ test_case["status"] = "ERROR"
+ test_cases.append(test_case)
+ sub_group["test_cases"] = test_cases
+ sub_groups.append(sub_group)
+ test_group = {
+ "name": "KUnit Test Group",
+ "arch": "UM",
+ "defconfig": def_config,
+ "build_environment": build_dir,
+ "sub_groups": sub_groups,
+ "lab_name": None,
+ "kernel": None,
+ "job": None,
+ "git_branch": "kselftest",
+ }
+ json_obj = json.dumps(test_group, indent=4)
+ if json_path != 'stdout':
+ with open(json_path, 'w') as result_path:
+ result_path.write(json_obj)
+ root = __file__.split('tools/testing/kunit/')[0]
+ kunit_parser.print_with_timestamp(
+ "Test results stored in %s" %
+ os.path.join(root, result_path.name))
+ return json_obj
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index e20e2056cb38..b557b1e93f98 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -36,9 +36,9 @@ class LinuxSourceTreeOperations(object):
try:
subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT)
except OSError as e:
- raise ConfigError('Could not call make command: ' + e)
+ raise ConfigError('Could not call make command: ' + str(e))
except subprocess.CalledProcessError as e:
- raise ConfigError(e.output)
+ raise ConfigError(e.output.decode())
def make_olddefconfig(self, build_dir, make_options):
command = ['make', 'ARCH=um', 'olddefconfig']
@@ -49,22 +49,27 @@ class LinuxSourceTreeOperations(object):
try:
subprocess.check_output(command, stderr=subprocess.STDOUT)
except OSError as e:
- raise ConfigError('Could not call make command: ' + e)
+ raise ConfigError('Could not call make command: ' + str(e))
except subprocess.CalledProcessError as e:
- raise ConfigError(e.output)
+ raise ConfigError(e.output.decode())
- def make_allyesconfig(self):
+ def make_allyesconfig(self, build_dir, make_options):
kunit_parser.print_with_timestamp(
'Enabling all CONFIGs for UML...')
+ command = ['make', 'ARCH=um', 'allyesconfig']
+ if make_options:
+ command.extend(make_options)
+ if build_dir:
+ command += ['O=' + build_dir]
process = subprocess.Popen(
- ['make', 'ARCH=um', 'allyesconfig'],
+ command,
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT)
process.wait()
kunit_parser.print_with_timestamp(
'Disabling broken configs to run KUnit tests...')
with ExitStack() as es:
- config = open(KCONFIG_PATH, 'a')
+ config = open(get_kconfig_path(build_dir), 'a')
disable = open(BROKEN_ALLCONFIG_PATH, 'r').read()
config.write(disable)
kunit_parser.print_with_timestamp(
@@ -79,9 +84,9 @@ class LinuxSourceTreeOperations(object):
try:
subprocess.check_output(command, stderr=subprocess.STDOUT)
except OSError as e:
- raise BuildError('Could not call execute make: ' + e)
+ raise BuildError('Could not call execute make: ' + str(e))
except subprocess.CalledProcessError as e:
- raise BuildError(e.output)
+ raise BuildError(e.output.decode())
def linux_bin(self, params, timeout, build_dir, outfile):
"""Runs the Linux UML binary. Must be named 'linux'."""
@@ -161,9 +166,9 @@ class LinuxSourceTree(object):
return self.build_config(build_dir, make_options)
def build_um_kernel(self, alltests, jobs, build_dir, make_options):
- if alltests:
- self._ops.make_allyesconfig()
try:
+ if alltests:
+ self._ops.make_allyesconfig(build_dir, make_options)
self._ops.make_olddefconfig(build_dir, make_options)
self._ops.make(jobs, build_dir, make_options)
except (ConfigError, BuildError) as e:
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 287c74d821c3..99c3c5671ea4 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -11,11 +11,13 @@ from unittest import mock
import tempfile, shutil # Handling test_tmpdir
+import json
import os
import kunit_config
import kunit_parser
import kunit_kernel
+import kunit_json
import kunit
test_tmpdir = ''
@@ -230,6 +232,37 @@ class KUnitParserTest(unittest.TestCase):
result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual('kunit-resource-test', result.suites[0].name)
+class KUnitJsonTest(unittest.TestCase):
+
+ def _json_for(self, log_file):
+ with(open(get_absolute_path(log_file))) as file:
+ test_result = kunit_parser.parse_run_tests(file)
+ json_obj = kunit_json.get_json_result(
+ test_result=test_result,
+ def_config='kunit_defconfig',
+ build_dir=None,
+ json_path='stdout')
+ return json.loads(json_obj)
+
+ def test_failed_test_json(self):
+ result = self._json_for(
+ 'test_data/test_is_test_passed-failure.log')
+ self.assertEqual(
+ {'name': 'example_simple_test', 'status': 'FAIL'},
+ result["sub_groups"][1]["test_cases"][0])
+
+ def test_crashed_test_json(self):
+ result = self._json_for(
+ 'test_data/test_is_test_passed-crash.log')
+ self.assertEqual(
+ {'name': 'example_simple_test', 'status': 'ERROR'},
+ result["sub_groups"][1]["test_cases"][0])
+
+ def test_no_tests_json(self):
+ result = self._json_for(
+ 'test_data/test_is_test_passed-no_tests_run.log')
+ self.assertEqual(0, len(result['sub_groups']))
+
class StrContains(str):
def __eq__(self, other):
return self in other
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c
index 7e5d979e73cb..fb342a8c98d3 100644
--- a/tools/testing/nvdimm/dax-dev.c
+++ b/tools/testing/nvdimm/dax-dev.c
@@ -9,12 +9,19 @@
phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size)
{
- struct resource *res = &dev_dax->region->res;
- phys_addr_t addr;
+ int i;
- addr = pgoff * PAGE_SIZE + res->start;
- if (addr >= res->start && addr <= res->end) {
- if (addr + size - 1 <= res->end) {
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ struct dev_dax_range *dax_range = &dev_dax->ranges[i];
+ struct range *range = &dax_range->range;
+ unsigned long long pgoff_end;
+ phys_addr_t addr;
+
+ pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
+ if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
+ continue;
+ addr = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
+ if (addr + size - 1 <= range->end) {
if (get_nfit_res(addr)) {
struct page *page;
@@ -23,9 +30,10 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
page = vmalloc_to_page((void *)addr);
return PFN_PHYS(page_to_pfn(page));
- } else
- return addr;
+ }
+ return addr;
}
+ break;
}
return -1;
}
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 03e40b3b0106..c62d372d426f 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -126,7 +126,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
int error;
- resource_size_t offset = pgmap->res.start;
+ resource_size_t offset = pgmap->range.start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (!nfit_res)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a1a5dc645b40..2ac0fff6dad8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,8 @@
#include "nfit_test.h"
#include "../watermark.h"
-#include <asm/mcsafe_test.h>
+#include <asm/copy_mc_test.h>
+#include <asm/mce.h>
/*
* Generate an NFIT table to describe the following topology:
@@ -3283,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
.id_table = nfit_test_id,
};
-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
enum INJECT {
INJECT_NONE,
@@ -3291,7 +3292,7 @@ enum INJECT {
INJECT_DST,
};
-static void mcsafe_test_init(char *dst, char *src, size_t size)
+static void copy_mc_test_init(char *dst, char *src, size_t size)
{
size_t i;
@@ -3300,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
src[i] = (char) i;
}
-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
size_t size, unsigned long rem)
{
size_t i;
@@ -3321,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
return true;
}
-void mcsafe_test(void)
+void copy_mc_test(void)
{
char *inject_desc[] = { "none", "source", "destination" };
enum INJECT inj;
- if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+ if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
pr_info("%s: run...\n", __func__);
} else {
pr_info("%s: disabled, skip.\n", __func__);
@@ -3344,31 +3345,31 @@ void mcsafe_test(void)
switch (inj) {
case INJECT_NONE:
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(NULL);
- dst = &mcsafe_buf[2048];
- src = &mcsafe_buf[1024 - i];
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(NULL);
+ dst = &copy_mc_buf[2048];
+ src = &copy_mc_buf[1024 - i];
expect = 0;
break;
case INJECT_SRC:
- mcsafe_inject_src(&mcsafe_buf[1024]);
- mcsafe_inject_dst(NULL);
- dst = &mcsafe_buf[2048];
- src = &mcsafe_buf[1024 - i];
+ copy_mc_inject_src(&copy_mc_buf[1024]);
+ copy_mc_inject_dst(NULL);
+ dst = &copy_mc_buf[2048];
+ src = &copy_mc_buf[1024 - i];
expect = 512 - i;
break;
case INJECT_DST:
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(&mcsafe_buf[2048]);
- dst = &mcsafe_buf[2048 - i];
- src = &mcsafe_buf[1024];
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(&copy_mc_buf[2048]);
+ dst = &copy_mc_buf[2048 - i];
+ src = &copy_mc_buf[1024];
expect = 512 - i;
break;
}
- mcsafe_test_init(dst, src, 512);
- rem = __memcpy_mcsafe(dst, src, 512);
- valid = mcsafe_test_validate(dst, src, 512, expect);
+ copy_mc_test_init(dst, src, 512);
+ rem = copy_mc_fragile(dst, src, 512);
+ valid = copy_mc_test_validate(dst, src, 512, expect);
if (rem == expect && valid)
continue;
pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
@@ -3380,8 +3381,8 @@ void mcsafe_test(void)
}
}
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(NULL);
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(NULL);
}
static __init int nfit_test_init(void)
@@ -3392,7 +3393,7 @@ static __init int nfit_test_init(void)
libnvdimm_test();
acpi_nfit_test();
device_dax_test();
- mcsafe_test();
+ copy_mc_test();
dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 9018f45d631d..d9c283503159 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -88,10 +88,10 @@ endif
# of the targets gets built.
FORCE_TARGETS ?=
-# Clear LDFLAGS and MAKEFLAGS if called from main
-# Makefile to avoid test build failures when test
-# Makefile doesn't have explicit build rules.
-ifeq (1,$(MAKELEVEL))
+# Clear LDFLAGS and MAKEFLAGS when implicit rules are missing. This provides
+# implicit rules to sub-test Makefiles which avoids build failures in test
+# Makefile that don't have explicit build rules.
+ifeq (,$(LINK.c))
override LDFLAGS =
override MAKEFLAGS =
endif
@@ -206,6 +206,7 @@ KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH))
# Avoid changing the rest of the logic here and lib.mk.
INSTALL_PATH := $(KSFT_INSTALL_PATH)
ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+TEST_LIST := $(INSTALL_PATH)/kselftest-list.txt
install: all
ifdef INSTALL_PATH
@@ -214,6 +215,8 @@ ifdef INSTALL_PATH
install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
+ install -m 744 run_kselftest.sh $(INSTALL_PATH)/
+ rm -f $(TEST_LIST)
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
@@ -222,33 +225,18 @@ ifdef INSTALL_PATH
ret=$$((ret * $$?)); \
done; exit $$ret;
- @# Ask all targets to emit their test scripts
- echo "#!/bin/sh" > $(ALL_SCRIPT)
- echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT)
- echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT)
- echo ". ./kselftest/runner.sh" >> $(ALL_SCRIPT)
- echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
- echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT)
- echo " logfile=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
- echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT)
- echo "fi" >> $(ALL_SCRIPT)
- @# While building run_kselftest.sh skip also non-existent TARGET dirs:
+ @# Ask all targets to emit their test scripts
+ @# While building kselftest-list.text skip also non-existent TARGET dirs:
@# they could be the result of a build failure and should NOT be
@# included in the generated runlist.
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
[ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
- echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
- echo "cd $$TARGET" >> $(ALL_SCRIPT); \
- echo -n "run_many" >> $(ALL_SCRIPT); \
echo -n "Emit Tests for $$TARGET\n"; \
- $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
- echo "" >> $(ALL_SCRIPT); \
- echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+ $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \
+ -C $$TARGET emit_tests >> $(TEST_LIST); \
done;
-
- chmod u+x $(ALL_SCRIPT)
else
$(error Error: set INSTALL_PATH to use install)
endif
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 93b567d23c8b..2c9d012797a7 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal
+ARM64_SUBTARGETS ?= tags signal pauth fp mte
else
ARM64_SUBTARGETS :=
endif
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
new file mode 100644
index 000000000000..d66f76d2a650
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -0,0 +1,5 @@
+fpsimd-test
+sve-probe-vls
+sve-ptrace
+sve-test
+vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
new file mode 100644
index 000000000000..a57009d3a0dc
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls
+TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+
+all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+
+fpsimd-test: fpsimd-test.o
+ $(CC) -nostdlib $^ -o $@
+sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
+sve-probe-vls: sve-probe-vls.o
+sve-test: sve-test.o
+ $(CC) -nostdlib $^ -o $@
+vlset: vlset.o
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README
new file mode 100644
index 000000000000..03e3dad865d8
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/README
@@ -0,0 +1,100 @@
+This directory contains a mix of tests integrated with kselftest and
+standalone stress tests.
+
+kselftest tests
+===============
+
+sve-probe-vls - Checks the SVE vector length enumeration interface
+sve-ptrace - Checks the SVE ptrace interface
+
+Running the non-kselftest tests
+===============================
+
+sve-stress performs an SVE context switch stress test, as described
+below.
+
+(The fpsimd-stress test works the same way; just substitute "fpsimd" for
+"sve" in the following commands.)
+
+
+The test runs until killed by the user.
+
+If no context switch error was detected, you will see output such as
+the following:
+
+$ ./sve-stress
+(wait for some time)
+^C
+Vector length: 512 bits
+PID: 1573
+Terminated by signal 15, no error, iterations=9467, signals=1014
+Vector length: 512 bits
+PID: 1575
+Terminated by signal 15, no error, iterations=9448, signals=1028
+Vector length: 512 bits
+PID: 1577
+Terminated by signal 15, no error, iterations=9436, signals=1039
+Vector length: 512 bits
+PID: 1579
+Terminated by signal 15, no error, iterations=9421, signals=1039
+Vector length: 512 bits
+PID: 1581
+Terminated by signal 15, no error, iterations=9403, signals=1039
+Vector length: 512 bits
+PID: 1583
+Terminated by signal 15, no error, iterations=9385, signals=1036
+Vector length: 512 bits
+PID: 1585
+Terminated by signal 15, no error, iterations=9376, signals=1039
+Vector length: 512 bits
+PID: 1587
+Terminated by signal 15, no error, iterations=9361, signals=1039
+Vector length: 512 bits
+PID: 1589
+Terminated by signal 15, no error, iterations=9350, signals=1039
+
+
+If an error was detected, details of the mismatch will be printed
+instead of "no error".
+
+Ideally, the test should be allowed to run for many minutes or hours
+to maximise test coverage.
+
+
+KVM stress testing
+==================
+
+To try to reproduce the bugs that we have been observing, sve-stress
+should be run in parallel in two KVM guests, while simultaneously
+running on the host.
+
+1) Start 2 guests, using the following command for each:
+
+$ lkvm run --console=virtio -pconsole=hvc0 --sve Image
+
+(Depending on the hardware GIC implementation, you may also need
+--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I
+can't remember whether my branch is new enough for that. Try without
+the option first.)
+
+Kvmtool occupies the terminal until you kill it (Ctrl+A x),
+or until the guest terminates. It is therefore recommended to run
+each instance in separate terminal (use screen or ssh etc.) This
+allows multiple guests to be run in parallel while running other
+commands on the host.
+
+Within the guest, the host filesystem is accessible, mounted on /host.
+
+2) Run the sve-stress on *each* guest with the Vector-Length set to 32:
+guest$ ./vlset --inherit 32 ./sve-stress
+
+3) Run the sve-stress on the host with the maximum Vector-Length:
+host$ ./vlset --inherit --max ./sve-stress
+
+
+Again, the test should be allowed to run for many minutes or hours to
+maximise test coverage.
+
+If no error is detected, you will see output from each sve-stress
+instance similar to that illustrated above; otherwise details of the
+observed mismatches will be printed.
diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h
new file mode 100644
index 000000000000..a180851496ec
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-offsets.h
@@ -0,0 +1,11 @@
+#define sa_sz 32
+#define sa_flags 8
+#define sa_handler 0
+#define sa_mask_sz 8
+#define SIGUSR1 10
+#define SIGTERM 15
+#define SIGINT 2
+#define SIGABRT 6
+#define SA_NODEFER 1073741824
+#define SA_SIGINFO 4
+#define ucontext_regs 184
diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
new file mode 100644
index 000000000000..8944f2189206
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/assembler.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+.macro __for from:req, to:req
+ .if (\from) == (\to)
+ _for__body %\from
+ .else
+ __for \from, %(\from) + ((\to) - (\from)) / 2
+ __for %(\from) + ((\to) - (\from)) / 2 + 1, \to
+ .endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+ .macro _for__body \var:req
+ .noaltmacro
+ \insn
+ .altmacro
+ .endm
+
+ .altmacro
+ __for \from, \to
+ .noaltmacro
+
+ .purgem _for__body
+.endm
+
+.macro function name
+ .macro endfunction
+ .type \name, @function
+ .purgem endfunction
+ .endm
+\name:
+.endm
+
+.macro define_accessor name, num, insn
+ .macro \name\()_entry n
+ \insn \n, 1
+ ret
+ .endm
+
+function \name
+ adr x2, .L__accessor_tbl\@
+ add x2, x2, x0, lsl #3
+ br x2
+
+.L__accessor_tbl\@:
+ _for x, 0, (\num) - 1, \name\()_entry \x
+endfunction
+
+ .purgem \name\()_entry
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress
new file mode 100755
index 000000000000..781b5b022eaf
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-stress
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+trap child_died CHLD
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./fpsimd-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
new file mode 100644
index 000000000000..1c5556bdd11d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple FPSIMD context switch test
+// Repeatedly writes unique test patterns into each FPSIMD register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NVR 32
+#define MAXVL_B (128 / 8)
+
+.macro _vldr Vn:req, Xt:req
+ ld1 {v\Vn\().2d}, [x\Xt]
+.endm
+
+.macro _vstr Vn:req, Xt:req
+ st1 {v\Vn\().2d}, [x\Xt]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// FPSIMD registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getv,setp) or store to (setv,setp)
+// All clobber x0-x2
+define_accessor setv, NVR, _vldr
+define_accessor getv, NVR, _vstr
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+ mov x1, sp
+ str x30, [sp, #-32]! // Result can't be > 20 digits
+
+ mov x2, #0
+ strb w2, [x1, #-1]! // Write the NUL terminator
+
+ mov x2, #10
+0: udiv x3, x0, x2 // div-mod loop to generate the digits
+ msub x0, x3, x2, x0
+ add w0, w0, #'0'
+ strb w0, [x1, #-1]!
+ mov x0, x3
+ cbnz x3, 0b
+
+ ldrb w0, [x1]
+ cbnz w0, 1f
+ mov w0, #'0' // Print "0" for 0, not ""
+ strb w0, [x1, #-1]!
+
+1: mov x0, x1
+ bl puts
+
+ ldr x30, [sp], #32
+ ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+ mov x5, x30
+
+ bl putdec
+ mov x0, #'\n'
+ bl putc
+
+ ret x5
+endfunction
+
+
+// Clobbers x0-x3,x8
+function puthexb
+ str x30, [sp, #-0x10]!
+
+ mov w3, w0
+ lsr w0, w0, #4
+ bl puthexnibble
+ mov w0, w3
+
+ ldr x30, [sp], #0x10
+ // fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+ and w0, w0, #0xf
+ cmp w0, #10
+ blo 1f
+ add w0, w0, #'a' - ('9' + 1)
+1: add w0, w0, #'0'
+ b putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+ str x30, [sp, #-0x10]!
+
+ mov x4, x0
+ mov x5, x1
+
+0: subs x5, x5, #1
+ b.lo 1f
+ ldrb w0, [x4], #1
+ bl puthexb
+ b 0b
+
+1: ldr x30, [sp], #0x10
+ ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+vref:
+ .space MAXVL_B * NVR
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid (16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+function pattern
+ orr w1, w0, w1, lsl #16
+ orr w2, w1, w2, lsl #28
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w2, [x0], #4
+ add w2, w2, #(1 << 22)
+ subs w1, w1, #1
+ bne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for FPSIMD V-register V<xn>
+.macro _adrv xd, xn, nrtmp
+ ldr \xd, =vref
+ mov x\nrtmp, #16
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a FPSIMD V-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_vreg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrv x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setv
+
+ ret x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+ mov w2, #0xae
+ b memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+ mov w2, #0
+endfunction
+ // fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+ cmp x1, #0
+ b.eq 1f
+
+0: strb w2, [x0], #1
+ subs x1, x1, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 1f
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne barf
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Verify that a FPSIMD V-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x5.
+function check_vreg
+ mov x3, x30
+
+ _adrv x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getv
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random V-regs
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #7
+ movi v9.16b, #9
+ movi v31.8b, #31
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+ // Sanity-check and report the vector length
+
+ mov x19, #128
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "Bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "Vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+
+ mov x21, #0 // Set up V-regs & shadow with test pattern
+0: mov x0, x20
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_vreg
+ add x21, x21, #1
+ cmp x21, #NVR
+ b.lo 0b
+
+// Can't do this when SVE state is volatile across SVC:
+ mov x8, #__NR_sched_yield // Encourage preemption
+ svc #0
+
+ mov x21, #0
+0: mov x0, x21
+ bl check_vreg
+ add x21, x21, #1
+ cmp x21, #NVR
+ b.lo 0b
+
+ add x22, x22, #1
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mistatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", reg="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
new file mode 100644
index 000000000000..b29cbc642c57
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/sigcontext.h>
+
+#include "../../kselftest.h"
+
+int main(int argc, char **argv)
+{
+ unsigned int vq;
+ int vl;
+ static unsigned int vqs[SVE_VQ_MAX];
+ unsigned int nvqs = 0;
+
+ ksft_print_header();
+ ksft_set_plan(2);
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ ksft_exit_skip("SVE not available");
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (!sve_vl_valid(vl))
+ ksft_exit_fail_msg("VL %d invalid\n", vl);
+ vq = sve_vq_from_vl(vl);
+
+ if (!(nvqs < SVE_VQ_MAX))
+ ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n",
+ nvqs);
+ vqs[nvqs++] = vq;
+ }
+ ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs);
+ ksft_test_result_pass("All vector lengths valid\n");
+
+ /* Print out the vector lengths in ascending order: */
+ while (nvqs--)
+ ksft_print_msg("%u\n", 16 * vqs[nvqs]);
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
new file mode 100644
index 000000000000..3e81f9fab574
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+#include <asm/unistd.h>
+
+.arch_extension sve
+
+.globl sve_store_patterns
+
+sve_store_patterns:
+ mov x1, x0
+
+ index z0.b, #0, #1
+ str q0, [x1]
+
+ mov w8, #__NR_getpid
+ svc #0
+ str q0, [x1, #0x10]
+
+ mov z1.d, z0.d
+ str q0, [x1, #0x20]
+
+ mov w8, #__NR_getpid
+ svc #0
+ str q0, [x1, #0x30]
+
+ mov z1.d, z0.d
+ str q0, [x1, #0x40]
+
+ ret
+
+.size sve_store_patterns, . - sve_store_patterns
+.type sve_store_patterns, @function
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
new file mode 100644
index 000000000000..b2282be6f938
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_SVE
+#define NT_ARM_SVE 0x405
+#endif
+
+/* Number of registers filled in by sve_store_patterns */
+#define NR_VREGS 5
+
+void sve_store_patterns(__uint128_t v[NR_VREGS]);
+
+static void dump(const void *buf, size_t size)
+{
+ size_t i;
+ const unsigned char *p = buf;
+
+ for (i = 0; i < size; ++i)
+ printf(" %.2x", *p++);
+}
+
+static int check_vregs(const __uint128_t vregs[NR_VREGS])
+{
+ int i;
+ int ok = 1;
+
+ for (i = 0; i < NR_VREGS; ++i) {
+ printf("# v[%d]:", i);
+ dump(&vregs[i], sizeof vregs[i]);
+ putchar('\n');
+
+ if (vregs[i] != vregs[0])
+ ok = 0;
+ }
+
+ return ok;
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+ return EXIT_SUCCESS;
+}
+
+static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
+{
+ struct user_sve_header *sve;
+ void *p;
+ size_t sz = sizeof *sve;
+ struct iovec iov;
+
+ while (1) {
+ if (*size < sz) {
+ p = realloc(*buf, sz);
+ if (!p) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ *buf = p;
+ *size = sz;
+ }
+
+ iov.iov_base = *buf;
+ iov.iov_len = sz;
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov))
+ goto error;
+
+ sve = *buf;
+ if (sve->size <= sz)
+ break;
+
+ sz = sve->size;
+ }
+
+ return sve;
+
+error:
+ return NULL;
+}
+
+static int set_sve(pid_t pid, const struct user_sve_header *sve)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)sve;
+ iov.iov_len = sve->size;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
+}
+
+static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num,
+ unsigned int vlmax)
+{
+ unsigned int vq;
+ unsigned int i;
+
+ if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+ ksft_exit_fail_msg("Dumping non-SVE register\n");
+
+ if (vlmax > sve->vl)
+ vlmax = sve->vl;
+
+ vq = sve_vq_from_vl(sve->vl);
+ for (i = 0; i < num; ++i) {
+ printf("# z%u:", i);
+ dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ vlmax);
+ printf("%s\n", vlmax == sve->vl ? "" : " ...");
+ }
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+ void *svebuf = NULL, *newsvebuf;
+ size_t svebufsz = 0, newsvebufsz;
+ struct user_sve_header *sve, *new_sve;
+ struct user_fpsimd_state *fpsimd;
+ unsigned int i, j;
+ unsigned char *p;
+ unsigned int vq;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n",
+ status);
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ sve = get_sve(pid, &svebuf, &svebufsz);
+ if (!sve) {
+ int e = errno;
+
+ ksft_test_result_fail("get_sve: %s\n", strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ } else {
+ ksft_test_result_pass("get_sve\n");
+ }
+
+ ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
+ "FPSIMD registers\n");
+ if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
+ goto error;
+
+ fpsimd = (struct user_fpsimd_state *)((char *)sve +
+ SVE_PT_FPSIMD_OFFSET);
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&fpsimd->vregs[i];
+
+ for (j = 0; j < sizeof fpsimd->vregs[i]; ++j)
+ p[j] = j;
+ }
+
+ if (set_sve(pid, sve)) {
+ int e = errno;
+
+ ksft_test_result_fail("set_sve(FPSIMD): %s\n",
+ strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ }
+
+ vq = sve_vq_from_vl(sve->vl);
+
+ newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+ new_sve = newsvebuf = malloc(newsvebufsz);
+ if (!new_sve) {
+ errno = ENOMEM;
+ perror(NULL);
+ goto error;
+ }
+
+ *new_sve = *sve;
+ new_sve->flags &= ~SVE_PT_REGS_MASK;
+ new_sve->flags |= SVE_PT_REGS_SVE;
+ memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+ 0, SVE_PT_SVE_ZREG_SIZE(vq));
+ new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+ if (set_sve(pid, new_sve)) {
+ int e = errno;
+
+ ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ }
+
+ new_sve = get_sve(pid, &newsvebuf, &newsvebufsz);
+ if (!new_sve) {
+ int e = errno;
+
+ ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ }
+
+ ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE,
+ "SVE registers\n");
+ if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+ goto error;
+
+ dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]);
+
+ p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+ for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) {
+ unsigned char expected = i;
+
+ if (__BYTE_ORDER == __BIG_ENDIAN)
+ expected = sizeof fpsimd->vregs[0] - 1 - expected;
+
+ ksft_test_result(p[i] == expected, "p[%d] == expected\n", i);
+ if (p[i] != expected)
+ goto error;
+ }
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ __uint128_t v[NR_VREGS];
+ pid_t child;
+
+ ksft_print_header();
+ ksft_set_plan(20);
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ ksft_exit_skip("SVE not available\n");
+
+ sve_store_patterns(v);
+
+ if (!check_vregs(v))
+ ksft_exit_fail_msg("Initial check_vregs() failed\n");
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress
new file mode 100755
index 000000000000..24dd0922cc02
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./sve-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
new file mode 100644
index 000000000000..f95074c9b48b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple Scalable Vector Extension context switch test
+// Repeatedly writes unique test patterns into each SVE register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NZR 32
+#define NPR 16
+#define MAXVL_B (2048 / 8)
+
+.arch_extension sve
+
+.macro _sve_ldr_v zt, xn
+ ldr z\zt, [x\xn]
+.endm
+
+.macro _sve_str_v zt, xn
+ str z\zt, [x\xn]
+.endm
+
+.macro _sve_ldr_p pt, xn
+ ldr p\pt, [x\xn]
+.endm
+
+.macro _sve_str_p pt, xn
+ str p\pt, [x\xn]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// SVE registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
+// All clobber x0-x2
+define_accessor setz, NZR, _sve_ldr_v
+define_accessor getz, NZR, _sve_str_v
+define_accessor setp, NPR, _sve_ldr_p
+define_accessor getp, NPR, _sve_str_p
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+ mov x1, sp
+ str x30, [sp, #-32]! // Result can't be > 20 digits
+
+ mov x2, #0
+ strb w2, [x1, #-1]! // Write the NUL terminator
+
+ mov x2, #10
+0: udiv x3, x0, x2 // div-mod loop to generate the digits
+ msub x0, x3, x2, x0
+ add w0, w0, #'0'
+ strb w0, [x1, #-1]!
+ mov x0, x3
+ cbnz x3, 0b
+
+ ldrb w0, [x1]
+ cbnz w0, 1f
+ mov w0, #'0' // Print "0" for 0, not ""
+ strb w0, [x1, #-1]!
+
+1: mov x0, x1
+ bl puts
+
+ ldr x30, [sp], #32
+ ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+ mov x5, x30
+
+ bl putdec
+ mov x0, #'\n'
+ bl putc
+
+ ret x5
+endfunction
+
+// Clobbers x0-x3,x8
+function puthexb
+ str x30, [sp, #-0x10]!
+
+ mov w3, w0
+ lsr w0, w0, #4
+ bl puthexnibble
+ mov w0, w3
+
+ ldr x30, [sp], #0x10
+ // fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+ and w0, w0, #0xf
+ cmp w0, #10
+ blo 1f
+ add w0, w0, #'a' - ('9' + 1)
+1: add w0, w0, #'0'
+ b putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+ str x30, [sp, #-0x10]!
+
+ mov x4, x0
+ mov x5, x1
+
+0: subs x5, x5, #1
+ b.lo 1f
+ ldrb w0, [x4], #1
+ bl puthexb
+ b 0b
+
+1: ldr x30, [sp], #0x10
+ ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+zref:
+ .space MAXVL_B * NZR
+pref:
+ .space MAXVL_B / 8 * NPR
+ffrref:
+ .space MAXVL_B / 8
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid (16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28 generation number (increments once per test_loop)
+// bits 27:22 32-bit lane index
+// bits 21:16 register number
+// bits 15: 0 pid
+
+function pattern
+ orr w1, w0, w1, lsl #16
+ orr w2, w1, w2, lsl #28
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w2, [x0], #4
+ add w2, w2, #(1 << 22)
+ subs w1, w1, #1
+ bne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for SVE Z-register Z<xn>
+.macro _adrz xd, xn, nrtmp
+ ldr \xd, =zref
+ rdvl x\nrtmp, #1
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Get the address of shadow data for SVE P-register P<xn - NZR>
+.macro _adrp xd, xn, nrtmp
+ ldr \xd, =pref
+ rdvl x\nrtmp, #1
+ lsr x\nrtmp, x\nrtmp, #3
+ sub \xn, \xn, #NZR
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a SVE Z-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_zreg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrz x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setz
+
+ ret x4
+endfunction
+
+// Set up test pattern in a SVE P-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_preg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrp x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setp
+
+ ret x4
+endfunction
+
+// Set up test pattern in the FFR
+// x0: pid
+// x2: generation
+// Beware: corrupts P0.
+function setup_ffr
+ mov x4, x30
+
+ bl pattern
+ ldr x0, =ffrref
+ ldr x1, =scratch
+ rdvl x2, #1
+ lsr x2, x2, #3
+ bl memcpy
+
+ mov x0, #0
+ ldr x1, =ffrref
+ bl setp
+
+ wrffr p0.b
+
+ ret x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+ mov w2, #0xae
+ b memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+ mov w2, #0
+endfunction
+ // fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+ cmp x1, #0
+ b.eq 1f
+
+0: strb w2, [x0], #1
+ subs x1, x1, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 2f
+
+ stp x0, x1, [sp, #-0x20]!
+ str x2, [sp, #0x10]
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne 1f
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ldr x2, [sp, #0x10]
+ ldp x0, x1, [sp], #0x20
+ b.ne barf
+
+2: ret
+endfunction
+
+// Verify that a SVE Z-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_zreg
+ mov x3, x30
+
+ _adrz x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getz
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Verify that a SVE P-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_preg
+ mov x3, x30
+
+ _adrp x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getp
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Verify that the FFR matches its shadow in memory, else abort
+// Beware -- corrupts P0.
+// Clobbers x0-x5.
+function check_ffr
+ mov x3, x30
+
+ ldr x4, =scratch
+ rdvl x5, #1
+ lsr x5, x5, #3
+
+ mov x0, x4
+ mov x1, x5
+ bl memfill_ae
+
+ rdffr p0.b
+ mov x0, #0
+ mov x1, x4
+ bl getp
+
+ ldr x0, =ffrref
+ mov x1, x4
+ mov x2, x5
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random Z-regs
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #1
+ movi v9.16b, #2
+ movi v31.8b, #3
+ // And P0
+ rdffr p0.b
+ // And FFR
+ wrffr p15.b
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+ // Sanity-check and report the vector length
+
+ rdvl x19, #8
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "Bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "Vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+ rdvl x0, #8
+ cmp x0, x19
+ b.ne vl_barf
+
+ mov x21, #0 // Set up Z-regs & shadow with test pattern
+0: mov x0, x20
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_zreg
+ add x21, x21, #1
+ cmp x21, #NZR
+ b.lo 0b
+
+ mov x0, x20 // Set up FFR & shadow with test pattern
+ mov x1, #NZR + NPR
+ and x2, x22, #0xf
+ bl setup_ffr
+
+0: mov x0, x20 // Set up P-regs & shadow with test pattern
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_preg
+ add x21, x21, #1
+ cmp x21, #NZR + NPR
+ b.lo 0b
+
+// Can't do this when SVE state is volatile across SVC:
+// mov x8, #__NR_sched_yield // Encourage preemption
+// svc #0
+
+ mov x21, #0
+0: mov x0, x21
+ bl check_zreg
+ add x21, x21, #1
+ cmp x21, #NZR
+ b.lo 0b
+
+0: mov x0, x21
+ bl check_preg
+ add x21, x21, #1
+ cmp x21, #NZR + NPR
+ b.lo 0b
+
+ bl check_ffr
+
+ add x22, x22, #1
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// end hack
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mistatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", reg="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+
+ mov x8, #__NR_getpid
+ svc #0
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// ^ end of hack
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+// mov x8, #__NR_exit
+// mov x1, #1
+// svc #0
+endfunction
+
+function vl_barf
+ mov x10, x0
+
+ puts "Bad active VL: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
new file mode 100644
index 000000000000..308d27a68226
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2019 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+
+static int inherit = 0;
+static int no_inherit = 0;
+static int force = 0;
+static unsigned long vl;
+
+static const struct option options[] = {
+ { "force", no_argument, NULL, 'f' },
+ { "inherit", no_argument, NULL, 'i' },
+ { "max", no_argument, NULL, 'M' },
+ { "no-inherit", no_argument, &no_inherit, 1 },
+ { "help", no_argument, NULL, '?' },
+ {}
+};
+
+static char const *program_name;
+
+static int parse_options(int argc, char **argv)
+{
+ int c;
+ char *rest;
+
+ program_name = strrchr(argv[0], '/');
+ if (program_name)
+ ++program_name;
+ else
+ program_name = argv[0];
+
+ while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1)
+ switch (c) {
+ case 'M': vl = SVE_VL_MAX; break;
+ case 'f': force = 1; break;
+ case 'i': inherit = 1; break;
+ case 0: break;
+ default: goto error;
+ }
+
+ if (inherit && no_inherit)
+ goto error;
+
+ if (!vl) {
+ /* vector length */
+ if (optind >= argc)
+ goto error;
+
+ errno = 0;
+ vl = strtoul(argv[optind], &rest, 0);
+ if (*rest) {
+ vl = ULONG_MAX;
+ errno = EINVAL;
+ }
+ if (vl == ULONG_MAX && errno) {
+ fprintf(stderr, "%s: %s: %s\n",
+ program_name, argv[optind], strerror(errno));
+ goto error;
+ }
+
+ ++optind;
+ }
+
+ /* command */
+ if (optind >= argc)
+ goto error;
+
+ return 0;
+
+error:
+ fprintf(stderr,
+ "Usage: %s [-f | --force] "
+ "[-i | --inherit | --no-inherit] "
+ "{-M | --max | <vector length>} "
+ "<command> [<arguments> ...]\n",
+ program_name);
+ return -1;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 126; /* same as sh(1) command-not-executable error */
+ long flags;
+ char *path;
+ int t, e;
+
+ if (parse_options(argc, argv))
+ return 2; /* same as sh(1) builtin incorrect-usage */
+
+ if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) {
+ fprintf(stderr, "%s: Invalid vector length %lu\n",
+ program_name, vl);
+ return 2; /* same as sh(1) builtin incorrect-usage */
+ }
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ fprintf(stderr, "%s: Scalable Vector Extension not present\n",
+ program_name);
+
+ if (!force)
+ goto error;
+
+ fputs("Going ahead anyway (--force): "
+ "This is a debug option. Don't rely on it.\n",
+ stderr);
+ }
+
+ flags = PR_SVE_SET_VL_ONEXEC;
+ if (inherit)
+ flags |= PR_SVE_VL_INHERIT;
+
+ t = prctl(PR_SVE_SET_VL, vl | flags);
+ if (t < 0) {
+ fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
+ program_name, strerror(errno));
+ goto error;
+ }
+
+ t = prctl(PR_SVE_GET_VL);
+ if (t == -1) {
+ fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
+ program_name, strerror(errno));
+ goto error;
+ }
+ flags = PR_SVE_VL_LEN_MASK;
+ flags = t & ~flags;
+
+ assert(optind < argc);
+ path = argv[optind];
+
+ execvp(path, &argv[optind]);
+ e = errno;
+ if (errno == ENOENT)
+ ret = 127; /* same as sh(1) not-found error */
+ fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e));
+
+error:
+ return ret; /* same as sh(1) not-executable error */
+}
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
new file mode 100644
index 000000000000..bc3ac63f3314
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -0,0 +1,6 @@
+check_buffer_fill
+check_tags_inclusion
+check_child_memory
+check_mmap_options
+check_ksm_options
+check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
new file mode 100644
index 000000000000..2480226dfe57
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+CFLAGS += -std=gnu99 -I.
+SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
+PROGS := $(patsubst %.c,%,$(SRCS))
+
+#Add mte compiler option
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
+CFLAGS += -march=armv8.5-a+memtag
+endif
+
+#check if the compiler works well
+mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(mte_cc_support),1)
+# Generated binaries to be installed by top KSFT script
+TEST_GEN_PROGS := $(PROGS)
+
+# Get Kernel headers installed and use them.
+KSFT_KHDR_INSTALL := 1
+endif
+
+# Include KSFT lib.mk.
+include ../../lib.mk
+
+ifeq ($(mte_cc_support),1)
+$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+endif
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
new file mode 100644
index 000000000000..242635d79035
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define OVERFLOW_RANGE MT_GRANULE_SIZE
+
+static int sizes[] = {
+ 1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+enum mte_block_test_alloc {
+ UNTAGGED_TAGGED,
+ TAGGED_UNTAGGED,
+ TAGGED_TAGGED,
+ BLOCK_ALLOC_MAX,
+};
+
+static int check_buffer_by_byte(int mem_type, int mode)
+{
+ char *ptr;
+ int i, j, item;
+ bool err;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true);
+ if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS)
+ return KSFT_FAIL;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]);
+ /* Set some value in tagged memory */
+ for (j = 0; j < sizes[i]; j++)
+ ptr[j] = '1';
+ mte_wait_after_trig();
+ err = cur_mte_cxt.fault_valid;
+ /* Check the buffer whether it is filled. */
+ for (j = 0; j < sizes[i] && !err; j++) {
+ if (ptr[j] != '1')
+ err = true;
+ }
+ mte_free_memory((void *)ptr, sizes[i], mem_type, true);
+
+ if (err)
+ break;
+ }
+ if (!err)
+ return KSFT_PASS;
+ else
+ return KSFT_FAIL;
+}
+
+static int check_buffer_underflow_by_byte(int mem_type, int mode,
+ int underflow_range)
+{
+ char *ptr;
+ int i, j, item, last_index;
+ bool err;
+ char *und_ptr = NULL;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+ underflow_range, 0);
+ if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+ underflow_range, 0) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range);
+ last_index = 0;
+ /* Set some value in tagged memory and make the buffer underflow */
+ for (j = sizes[i] - 1; (j >= -underflow_range) &&
+ (cur_mte_cxt.fault_valid == false); j--) {
+ ptr[j] = '1';
+ last_index = j;
+ }
+ mte_wait_after_trig();
+ err = false;
+ /* Check whether the buffer is filled */
+ for (j = 0; j < sizes[i]; j++) {
+ if (ptr[j] != '1') {
+ err = true;
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ j, ptr);
+ break;
+ }
+ }
+ if (err)
+ goto check_buffer_underflow_by_byte_err;
+
+ switch (mode) {
+ case MTE_NONE_ERR:
+ if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) {
+ err = true;
+ break;
+ }
+ /* There were no fault so the underflow area should be filled */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range);
+ for (j = 0 ; j < underflow_range; j++) {
+ if (und_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_ASYNC_ERR:
+ /* Imprecise fault should occur otherwise return error */
+ if (cur_mte_cxt.fault_valid == false) {
+ err = true;
+ break;
+ }
+ /*
+ * The imprecise fault is checked after the write to the buffer,
+ * so the underflow area before the fault should be filled.
+ */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ for (j = last_index ; j < 0 ; j++) {
+ if (und_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_SYNC_ERR:
+ /* Precise fault should occur otherwise return error */
+ if (!cur_mte_cxt.fault_valid || (last_index != (-1))) {
+ err = true;
+ break;
+ }
+ /* Underflow area should not be filled */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ if (und_ptr[-1] == '1')
+ err = true;
+ break;
+ default:
+ err = true;
+ break;
+ }
+check_buffer_underflow_by_byte_err:
+ mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0);
+ if (err)
+ break;
+ }
+ return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_overflow_by_byte(int mem_type, int mode,
+ int overflow_range)
+{
+ char *ptr;
+ int i, j, item, last_index;
+ bool err;
+ size_t tagged_size, overflow_size;
+ char *over_ptr = NULL;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+ 0, overflow_range);
+ if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+ 0, overflow_range) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ tagged_size = MT_ALIGN_UP(sizes[i]);
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range);
+
+ /* Set some value in tagged memory and make the buffer underflow */
+ for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) &&
+ (cur_mte_cxt.fault_valid == false); j++) {
+ ptr[j] = '1';
+ last_index = j;
+ }
+ mte_wait_after_trig();
+ err = false;
+ /* Check whether the buffer is filled */
+ for (j = 0; j < sizes[i]; j++) {
+ if (ptr[j] != '1') {
+ err = true;
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ j, ptr);
+ break;
+ }
+ }
+ if (err)
+ goto check_buffer_overflow_by_byte_err;
+
+ overflow_size = overflow_range - (tagged_size - sizes[i]);
+
+ switch (mode) {
+ case MTE_NONE_ERR:
+ if ((cur_mte_cxt.fault_valid == true) ||
+ (last_index != (sizes[i] + overflow_range - 1))) {
+ err = true;
+ break;
+ }
+ /* There were no fault so the overflow area should be filled */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+ for (j = 0 ; j < overflow_size; j++) {
+ if (over_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_ASYNC_ERR:
+ /* Imprecise fault should occur otherwise return error */
+ if (cur_mte_cxt.fault_valid == false) {
+ err = true;
+ break;
+ }
+ /*
+ * The imprecise fault is checked after the write to the buffer,
+ * so the overflow area should be filled before the fault.
+ */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ for (j = tagged_size ; j < last_index; j++) {
+ if (over_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_SYNC_ERR:
+ /* Precise fault should occur otherwise return error */
+ if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) {
+ err = true;
+ break;
+ }
+ /* Underflow area should not be filled */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+ for (j = 0 ; j < overflow_size; j++) {
+ if (over_ptr[j] == '1')
+ err = true;
+ }
+ break;
+ default:
+ err = true;
+ break;
+ }
+check_buffer_overflow_by_byte_err:
+ mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range);
+ if (err)
+ break;
+ }
+ return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size)
+{
+ char *src, *dst;
+ int j, result = KSFT_PASS;
+ enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED;
+
+ for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) {
+ switch (alloc_type) {
+ case UNTAGGED_TAGGED:
+ src = (char *)mte_allocate_memory(size, mem_type, 0, false);
+ if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)src, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+
+ break;
+ case TAGGED_UNTAGGED:
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, false);
+ if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)dst, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+ break;
+ case TAGGED_TAGGED:
+ src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)src, size, mem_type, true);
+ return KSFT_FAIL;
+ }
+ break;
+ default:
+ return KSFT_FAIL;
+ }
+
+ cur_mte_cxt.fault_valid = false;
+ result = KSFT_PASS;
+ mte_initialize_current_context(mode, (uintptr_t)dst, size);
+ /* Set some value in memory and copy*/
+ memset((void *)src, (int)'1', size);
+ memcpy((void *)dst, (void *)src, size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid) {
+ result = KSFT_FAIL;
+ goto check_buffer_by_block_err;
+ }
+ /* Check the buffer whether it is filled. */
+ for (j = 0; j < size; j++) {
+ if (src[j] != dst[j] || src[j] != '1') {
+ result = KSFT_FAIL;
+ break;
+ }
+ }
+check_buffer_by_block_err:
+ mte_free_memory((void *)src, size, mem_type,
+ MT_FETCH_TAG((uintptr_t)src) ? true : false);
+ mte_free_memory((void *)dst, size, mem_type,
+ MT_FETCH_TAG((uintptr_t)dst) ? true : false);
+ if (result != KSFT_PASS)
+ return result;
+ }
+ return result;
+}
+
+static int check_buffer_by_block(int mem_type, int mode)
+{
+ int i, item, result = KSFT_PASS;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+ cur_mte_cxt.fault_valid = false;
+ for (i = 0; i < item; i++) {
+ result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]);
+ if (result != KSFT_PASS)
+ break;
+ }
+ return result;
+}
+
+static int compare_memory_tags(char *ptr, size_t size, int tag)
+{
+ int i, new_tag;
+
+ for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+ new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+ if (tag != new_tag) {
+ ksft_print_msg("FAIL: child mte tag mismatch\n");
+ return KSFT_FAIL;
+ }
+ }
+ return KSFT_PASS;
+}
+
+static int check_memory_initial_tags(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int run, fd;
+ int total = sizeof(sizes)/sizeof(int);
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ /* check initial tags for anonymous mmap */
+ ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
+ if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+ if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ return KSFT_FAIL;
+ }
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+
+ /* check initial tags for file mmap */
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd);
+ if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ close(fd);
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ size_t page_size = getpagesize();
+ int item = sizeof(sizes)/sizeof(int);
+
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ /* Buffer by byte tests */
+ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR),
+ "Check buffer correctness by byte with sync err mode and mmap memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR),
+ "Check buffer correctness by byte with async err mode and mmap memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR),
+ "Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR),
+ "Check buffer correctness by byte with async err mode and mmap/mprotect memory\n");
+
+ /* Check buffer underflow with underflow size as 16 */
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+ /* Check buffer underflow with underflow size as page size */
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size),
+ "Check buffer write underflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size),
+ "Check buffer write underflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size),
+ "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+ /* Check buffer overflow with overflow size as 16 */
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n");
+
+ /* Buffer by block tests */
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR),
+ "Check buffer write correctness by block with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR),
+ "Check buffer write correctness by block with async mode and mmap memory\n");
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR),
+ "Check buffer write correctness by block with tag fault ignore and mmap memory\n");
+
+ /* Initial tags are supposed to be 0 */
+ evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check initial tags with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check initial tags with shared mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
new file mode 100644
index 000000000000..97bebdecd29e
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE (5 * MT_GRANULE_SIZE)
+#define RUNS (MT_TAG_COUNT)
+#define UNDERFLOW MT_GRANULE_SIZE
+#define OVERFLOW MT_GRANULE_SIZE
+
+static size_t page_size;
+static int sizes[] = {
+ 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_child_tag_inheritance(char *ptr, int size, int mode)
+{
+ int i, parent_tag, child_tag, fault, child_status;
+ pid_t child;
+
+ parent_tag = MT_FETCH_TAG((uintptr_t)ptr);
+ fault = 0;
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("FAIL: child process creation\n");
+ return KSFT_FAIL;
+ } else if (child == 0) {
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ /* Do copy on write */
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+ child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+ if (parent_tag != child_tag) {
+ ksft_print_msg("FAIL: child mte tag mismatch\n");
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memset(ptr + size, '3', OVERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+check_child_tag_inheritance_err:
+ _exit(fault);
+ }
+ /* Wait for child process to terminate */
+ wait(&child_status);
+ if (WIFEXITED(child_status))
+ fault = WEXITSTATUS(child_status);
+ else
+ fault = 1;
+ return (fault) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int check_child_memory_mapping(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int run, result;
+ int item = sizeof(sizes)/sizeof(int);
+
+ item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < item; run++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS)
+ return KSFT_FAIL;
+ result = check_child_tag_inheritance(ptr, sizes[run], mode);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ if (result == KSFT_FAIL)
+ return result;
+ }
+ return KSFT_PASS;
+}
+
+static int check_child_file_mapping(int mem_type, int mode, int mapping)
+{
+ char *ptr, *map_ptr;
+ int run, fd, map_size, result = KSFT_PASS;
+ int total = sizeof(sizes)/sizeof(int);
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on file based memory\n");
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_child_tag_inheritance(ptr, sizes[run], mode);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ int item = sizeof(sizes)/sizeof(int);
+
+ page_size = getpagesize();
+ if (!page_size) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler);
+
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+ evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, precise mode and mmap/mprotect memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
new file mode 100644
index 000000000000..bc41ae630c86
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define TEST_UNIT 10
+#define PATH_KSM "/sys/kernel/mm/ksm/"
+#define MAX_LOOP 4
+
+static size_t page_sz;
+static unsigned long ksm_sysfs[5];
+
+static unsigned long read_sysfs(char *str)
+{
+ FILE *f;
+ unsigned long val = 0;
+
+ f = fopen(str, "r");
+ if (!f) {
+ ksft_print_msg("ERR: missing %s\n", str);
+ return 0;
+ }
+ fscanf(f, "%lu", &val);
+ fclose(f);
+ return val;
+}
+
+static void write_sysfs(char *str, unsigned long val)
+{
+ FILE *f;
+
+ f = fopen(str, "w");
+ if (!f) {
+ ksft_print_msg("ERR: missing %s\n", str);
+ return;
+ }
+ fprintf(f, "%lu", val);
+ fclose(f);
+}
+
+static void mte_ksm_setup(void)
+{
+ ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes");
+ write_sysfs(PATH_KSM "merge_across_nodes", 1);
+ ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs");
+ write_sysfs(PATH_KSM "sleep_millisecs", 0);
+ ksm_sysfs[2] = read_sysfs(PATH_KSM "run");
+ write_sysfs(PATH_KSM "run", 1);
+ ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing");
+ write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT);
+ ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan");
+ write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT);
+}
+
+static void mte_ksm_restore(void)
+{
+ write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]);
+ write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]);
+ write_sysfs(PATH_KSM "run", ksm_sysfs[2]);
+ write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]);
+ write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]);
+}
+
+static void mte_ksm_scan(void)
+{
+ int cur_count = read_sysfs(PATH_KSM "full_scans");
+ int scan_count = cur_count + 1;
+ int max_loop_count = MAX_LOOP;
+
+ while ((cur_count < scan_count) && max_loop_count) {
+ sleep(1);
+ cur_count = read_sysfs(PATH_KSM "full_scans");
+ max_loop_count--;
+ }
+#ifdef DEBUG
+ ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n",
+ read_sysfs(PATH_KSM "pages_shared"),
+ read_sysfs(PATH_KSM "pages_sharing"));
+#endif
+}
+
+static int check_madvise_options(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int err, ret;
+
+ err = KSFT_FAIL;
+ if (access(PATH_KSM, F_OK) == -1) {
+ ksft_print_msg("ERR: Kernel KSM config not enabled\n");
+ return err;
+ }
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
+ if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ /* Insert same data in all the pages */
+ memset(ptr, 'A', TEST_UNIT * page_sz);
+ ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE);
+ if (ret) {
+ ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n");
+ goto madvise_err;
+ }
+ mte_ksm_scan();
+ /* Tagged pages should not merge */
+ if ((read_sysfs(PATH_KSM "pages_shared") < 1) ||
+ (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1)))
+ err = KSFT_PASS;
+madvise_err:
+ mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+ page_sz = getpagesize();
+ if (!page_sz) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler);
+ /* Enable KSM */
+ mte_ksm_setup();
+
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check KSM mte page merge for private mapping, sync mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check KSM mte page merge for private mapping, async mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check KSM mte page merge for shared mapping, sync mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check KSM mte page merge for shared mapping, async mode and mmap memory\n");
+
+ mte_ksm_restore();
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
new file mode 100644
index 000000000000..33b13b86199b
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define RUNS (MT_TAG_COUNT)
+#define UNDERFLOW MT_GRANULE_SIZE
+#define OVERFLOW MT_GRANULE_SIZE
+#define TAG_CHECK_ON 0
+#define TAG_CHECK_OFF 1
+
+static size_t page_size;
+static int sizes[] = {
+ 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+{
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+ return KSFT_FAIL;
+ if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memset(ptr + size, '3', OVERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+ return KSFT_FAIL;
+ if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+ char *ptr, *map_ptr;
+ int run, result, map_size;
+ int item = sizeof(sizes)/sizeof(int);
+
+ item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < item; run++) {
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n");
+ munmap((void *)map_ptr, map_size);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ mte_free_memory((void *)map_ptr, map_size, mem_type, false);
+ if (result == KSFT_FAIL)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+ char *ptr, *map_ptr;
+ int run, fd, map_size;
+ int total = sizeof(sizes)/sizeof(int);
+ int result = KSFT_PASS;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+
+ map_size = sizes[run] + UNDERFLOW + OVERFLOW;
+ map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on file based memory\n");
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ if (result == KSFT_FAIL)
+ break;
+ }
+ return result;
+}
+
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+{
+ char *ptr, *map_ptr;
+ int run, prot_flag, result, fd, map_size;
+ int total = sizeof(sizes)/sizeof(int);
+
+ prot_flag = PROT_READ | PROT_WRITE;
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS)
+ return KSFT_FAIL;
+ map_ptr = ptr - UNDERFLOW;
+ /* Try to clear PROT_MTE property and verify it by tag checking */
+ if (mprotect(map_ptr, map_size, prot_flag)) {
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW);
+ ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW, fd);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ map_ptr = ptr - UNDERFLOW;
+ /* Try to clear PROT_MTE property and verify it by tag checking */
+ if (mprotect(map_ptr, map_size, prot_flag)) {
+ ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ close(fd);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ int item = sizeof(sizes)/sizeof(int);
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+ page_size = getpagesize();
+ if (!page_size) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ mte_enable_pstate_tco();
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
+
+ mte_disable_pstate_tco();
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
+
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+ evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
new file mode 100644
index 000000000000..94d245a0ed56
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE (5 * MT_GRANULE_SIZE)
+#define RUNS (MT_TAG_COUNT * 2)
+#define MTE_LAST_TAG_MASK (0x7FFF)
+
+static int verify_mte_pointer_validity(char *ptr, int mode)
+{
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+ /* Check the validity of the tagged pointer */
+ memset((void *)ptr, '1', BUFFER_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ /* Proceed further for nonzero tags */
+ if (!MT_FETCH_TAG((uintptr_t)ptr))
+ return KSFT_PASS;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1);
+ /* Check the validity outside the range */
+ ptr[BUFFER_SIZE] = '2';
+ mte_wait_after_trig();
+ if (!cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ else
+ return KSFT_PASS;
+}
+
+static int check_single_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int tag, run, result = KSFT_PASS;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Check tag value */
+ if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
+ ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ MT_FETCH_TAG((uintptr_t)ptr),
+ MT_INCLUDE_VALID_TAG(tag));
+ result = KSFT_FAIL;
+ break;
+ }
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ }
+ mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_multiple_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int tag, run, result = KSFT_PASS;
+ unsigned long excl_mask = 0;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
+ excl_mask |= 1 << tag;
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Check tag value */
+ if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
+ ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ MT_FETCH_TAG((uintptr_t)ptr),
+ MT_INCLUDE_VALID_TAGS(excl_mask));
+ result = KSFT_FAIL;
+ break;
+ }
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ }
+ mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_all_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int run, result = KSFT_PASS;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /*
+ * Here tag byte can be between 0x0 to 0xF (full allowed range)
+ * so no need to match so just verify if it is writable.
+ */
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_none_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int run;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; run < RUNS; run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Here all tags exluded so tag value generated should be 0 */
+ if (MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: included tag value found\n");
+ mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true);
+ return KSFT_FAIL;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+ /* Check the write validity of the untagged pointer */
+ memset((void *)ptr, '1', BUFFER_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid)
+ break;
+ }
+ mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+ if (cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ else
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check an included tag value with sync mode\n");
+ evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check different included tags value with sync mode\n");
+ evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check none included tags value with sync mode\n");
+ evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check all included tags value with sync mode\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
new file mode 100644
index 000000000000..594e98e76880
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+static size_t page_sz;
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+{
+ int fd, i, err;
+ char val = 'A';
+ size_t len, read_len;
+ void *ptr, *ptr_next;
+
+ err = KSFT_FAIL;
+ len = 2 * page_sz;
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ for (i = 0; i < len; i++)
+ write(fd, &val, sizeof(val));
+ lseek(fd, 0, 0);
+ ptr = mte_allocate_memory(len, mem_type, mapping, true);
+ if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, len);
+ /* Copy from file into buffer with valid tag */
+ read_len = read(fd, ptr, len);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid || read_len < len)
+ goto usermem_acc_err;
+ /* Verify same pattern is read */
+ for (i = 0; i < len; i++)
+ if (*(char *)(ptr + i) != val)
+ break;
+ if (i < len)
+ goto usermem_acc_err;
+
+ /* Tag the next half of memory with different value */
+ ptr_next = (void *)((unsigned long)ptr + page_sz);
+ ptr_next = mte_insert_new_tag(ptr_next);
+ mte_set_tag_address_range(ptr_next, page_sz);
+
+ lseek(fd, 0, 0);
+ /* Copy from file into buffer with invalid tag */
+ read_len = read(fd, ptr, len);
+ mte_wait_after_trig();
+ /*
+ * Accessing user memory in kernel with invalid tag should fail in sync
+ * mode without fault but may not fail in async mode as per the
+ * implemented MTE userspace support in Arm64 kernel.
+ */
+ if (mode == MTE_SYNC_ERR &&
+ !cur_mte_cxt.fault_valid && read_len < len) {
+ err = KSFT_PASS;
+ } else if (mode == MTE_ASYNC_ERR &&
+ !cur_mte_cxt.fault_valid && read_len == len) {
+ err = KSFT_PASS;
+ }
+usermem_acc_err:
+ mte_free_memory((void *)ptr, len, mem_type, true);
+ close(fd);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ page_sz = getpagesize();
+ if (!page_sz) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ err = mte_default_setup();
+ if (err)
+ return err;
+ /* Register signal handlers */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check memory access from kernel in sync mode, private mapping and mmap memory\n");
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
+
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check memory access from kernel in async mode, private mapping and mmap memory\n");
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
new file mode 100644
index 000000000000..39f8908988ea
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/auxvec.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define INIT_BUFFER_SIZE 256
+
+struct mte_fault_cxt cur_mte_cxt;
+static unsigned int mte_cur_mode;
+static unsigned int mte_cur_pstate_tco;
+
+void mte_default_handler(int signum, siginfo_t *si, void *uc)
+{
+ unsigned long addr = (unsigned long)si->si_addr;
+
+ if (signum == SIGSEGV) {
+#ifdef DEBUG
+ ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+#endif
+ if (si->si_code == SEGV_MTEAERR) {
+ if (cur_mte_cxt.trig_si_code == si->si_code)
+ cur_mte_cxt.fault_valid = true;
+ return;
+ }
+ /* Compare the context for precise error */
+ else if (si->si_code == SEGV_MTESERR) {
+ if (cur_mte_cxt.trig_si_code == si->si_code &&
+ ((cur_mte_cxt.trig_range >= 0 &&
+ addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ (cur_mte_cxt.trig_range < 0 &&
+ addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+ cur_mte_cxt.fault_valid = true;
+ /* Adjust the pc by 4 */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ } else {
+ ksft_print_msg("Invalid MTE synchronous exception caught!\n");
+ exit(1);
+ }
+ } else {
+ ksft_print_msg("Unknown SIGSEGV exception caught!\n");
+ exit(1);
+ }
+ } else if (signum == SIGBUS) {
+ ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+ if ((cur_mte_cxt.trig_range >= 0 &&
+ addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ (cur_mte_cxt.trig_range < 0 &&
+ addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+ cur_mte_cxt.fault_valid = true;
+ /* Adjust the pc by 4 */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ }
+ }
+}
+
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(signal, &sa, NULL);
+}
+
+void mte_wait_after_trig(void)
+{
+ sched_yield();
+}
+
+void *mte_insert_tags(void *ptr, size_t size)
+{
+ void *tag_ptr;
+ int align_size;
+
+ if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+ ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ return NULL;
+ }
+ align_size = MT_ALIGN_UP(size);
+ tag_ptr = mte_insert_random_tag(ptr);
+ mte_set_tag_address_range(tag_ptr, align_size);
+ return tag_ptr;
+}
+
+void mte_clear_tags(void *ptr, size_t size)
+{
+ if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+ ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ return;
+ }
+ size = MT_ALIGN_UP(size);
+ ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr);
+ mte_clear_tag_address_range(ptr, size);
+}
+
+static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after,
+ bool tags, int fd)
+{
+ void *ptr;
+ int prot_flag, map_flag;
+ size_t entire_size = size + range_before + range_after;
+
+ if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
+ mem_type != USE_MPROTECT) {
+ ksft_print_msg("FAIL: Invalid allocate request\n");
+ return NULL;
+ }
+ if (mem_type == USE_MALLOC)
+ return malloc(entire_size) + range_before;
+
+ prot_flag = PROT_READ | PROT_WRITE;
+ if (mem_type == USE_MMAP)
+ prot_flag |= PROT_MTE;
+
+ map_flag = mapping;
+ if (fd == -1)
+ map_flag = MAP_ANONYMOUS | map_flag;
+ if (!(mapping & MAP_SHARED))
+ map_flag |= MAP_PRIVATE;
+ ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0);
+ if (ptr == MAP_FAILED) {
+ ksft_print_msg("FAIL: mmap allocation\n");
+ return NULL;
+ }
+ if (mem_type == USE_MPROTECT) {
+ if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) {
+ munmap(ptr, size);
+ ksft_print_msg("FAIL: mprotect PROT_MTE property\n");
+ return NULL;
+ }
+ }
+ if (tags)
+ ptr = mte_insert_tags(ptr + range_before, size);
+ return ptr;
+}
+
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after)
+{
+ return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+ range_after, true, -1);
+}
+
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags)
+{
+ return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1);
+}
+
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd)
+{
+ int index;
+ char buffer[INIT_BUFFER_SIZE];
+
+ if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+ ksft_print_msg("FAIL: Invalid mmap file request\n");
+ return NULL;
+ }
+ /* Initialize the file for mappable size */
+ lseek(fd, 0, SEEK_SET);
+ for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
+ write(fd, buffer, INIT_BUFFER_SIZE);
+ index -= INIT_BUFFER_SIZE;
+ write(fd, buffer, size - index);
+ return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
+}
+
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after, int fd)
+{
+ int index;
+ char buffer[INIT_BUFFER_SIZE];
+ int map_size = size + range_before + range_after;
+
+ if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+ ksft_print_msg("FAIL: Invalid mmap file request\n");
+ return NULL;
+ }
+ /* Initialize the file for mappable size */
+ lseek(fd, 0, SEEK_SET);
+ for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
+ write(fd, buffer, INIT_BUFFER_SIZE);
+ index -= INIT_BUFFER_SIZE;
+ write(fd, buffer, map_size - index);
+ return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+ range_after, true, fd);
+}
+
+static void __mte_free_memory_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after, bool tags)
+{
+ switch (mem_type) {
+ case USE_MALLOC:
+ free(ptr - range_before);
+ break;
+ case USE_MMAP:
+ case USE_MPROTECT:
+ if (tags)
+ mte_clear_tags(ptr, size);
+ munmap(ptr - range_before, size + range_before + range_after);
+ break;
+ default:
+ ksft_print_msg("FAIL: Invalid free request\n");
+ break;
+ }
+}
+
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after)
+{
+ __mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true);
+}
+
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags)
+{
+ __mte_free_memory_range(ptr, size, mem_type, 0, 0, tags);
+}
+
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
+{
+ cur_mte_cxt.fault_valid = false;
+ cur_mte_cxt.trig_addr = ptr;
+ cur_mte_cxt.trig_range = range;
+ if (mode == MTE_SYNC_ERR)
+ cur_mte_cxt.trig_si_code = SEGV_MTESERR;
+ else if (mode == MTE_ASYNC_ERR)
+ cur_mte_cxt.trig_si_code = SEGV_MTEAERR;
+ else
+ cur_mte_cxt.trig_si_code = 0;
+}
+
+int mte_switch_mode(int mte_option, unsigned long incl_mask)
+{
+ unsigned long en = 0;
+
+ if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
+ mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
+ ksft_print_msg("FAIL: Invalid mte config option\n");
+ return -EINVAL;
+ }
+ en = PR_TAGGED_ADDR_ENABLE;
+ if (mte_option == MTE_SYNC_ERR)
+ en |= PR_MTE_TCF_SYNC;
+ else if (mte_option == MTE_ASYNC_ERR)
+ en |= PR_MTE_TCF_ASYNC;
+ else if (mte_option == MTE_NONE_ERR)
+ en |= PR_MTE_TCF_NONE;
+
+ en |= (incl_mask << PR_MTE_TAG_SHIFT);
+ /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
+ if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+ ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#define ID_AA64PFR1_MTE_SHIFT 8
+#define ID_AA64PFR1_MTE 2
+
+int mte_default_setup(void)
+{
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+ unsigned long en = 0;
+ int ret;
+
+ if (!(hwcaps & HWCAP_CPUID)) {
+ ksft_print_msg("FAIL: CPUID registers unavailable\n");
+ return KSFT_FAIL;
+ }
+ /* Read ID_AA64PFR1_EL1 register */
+ asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
+ if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
+ ksft_print_msg("FAIL: MTE features unavailable\n");
+ return KSFT_SKIP;
+ }
+ /* Get current mte mode */
+ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
+ if (ret < 0) {
+ ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret);
+ return KSFT_FAIL;
+ }
+ if (ret & PR_MTE_TCF_SYNC)
+ mte_cur_mode = MTE_SYNC_ERR;
+ else if (ret & PR_MTE_TCF_ASYNC)
+ mte_cur_mode = MTE_ASYNC_ERR;
+ else if (ret & PR_MTE_TCF_NONE)
+ mte_cur_mode = MTE_NONE_ERR;
+
+ mte_cur_pstate_tco = mte_get_pstate_tco();
+ /* Disable PSTATE.TCO */
+ mte_disable_pstate_tco();
+ return 0;
+}
+
+void mte_restore_setup(void)
+{
+ mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
+ mte_enable_pstate_tco();
+ else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
+ mte_disable_pstate_tco();
+}
+
+int create_temp_file(void)
+{
+ int fd;
+ char filename[] = "/dev/shm/tmp_XXXXXX";
+
+ /* Create a file in the tmpfs filesystem */
+ fd = mkstemp(&filename[0]);
+ if (fd == -1) {
+ ksft_print_msg("FAIL: Unable to open temporary file\n");
+ return 0;
+ }
+ unlink(&filename[0]);
+ return fd;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
new file mode 100644
index 000000000000..195a7d1879e6
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _MTE_COMMON_UTIL_H
+#define _MTE_COMMON_UTIL_H
+
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include "mte_def.h"
+#include "kselftest.h"
+
+enum mte_mem_type {
+ USE_MALLOC,
+ USE_MMAP,
+ USE_MPROTECT,
+};
+
+enum mte_mode {
+ MTE_NONE_ERR,
+ MTE_SYNC_ERR,
+ MTE_ASYNC_ERR,
+};
+
+struct mte_fault_cxt {
+ /* Address start which triggers mte tag fault */
+ unsigned long trig_addr;
+ /* Address range for mte tag fault and negative value means underflow */
+ ssize_t trig_range;
+ /* siginfo si code */
+ unsigned long trig_si_code;
+ /* Flag to denote if correct fault caught */
+ bool fault_valid;
+};
+
+extern struct mte_fault_cxt cur_mte_cxt;
+
+/* MTE utility functions */
+void mte_default_handler(int signum, siginfo_t *si, void *uc);
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_wait_after_trig(void);
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after);
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping,
+ bool tags, int fd);
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after, int fd);
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags);
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after);
+void *mte_insert_tags(void *ptr, size_t size);
+void mte_clear_tags(void *ptr, size_t size);
+int mte_default_setup(void);
+void mte_restore_setup(void);
+int mte_switch_mode(int mte_option, unsigned long incl_mask);
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
+
+/* Common utility functions */
+int create_temp_file(void);
+
+/* Assembly MTE utility functions */
+void *mte_insert_random_tag(void *ptr);
+void *mte_insert_new_tag(void *ptr);
+void *mte_get_tag_address(void *ptr);
+void mte_set_tag_address_range(void *ptr, int range);
+void mte_clear_tag_address_range(void *ptr, int range);
+void mte_disable_pstate_tco(void);
+void mte_enable_pstate_tco(void);
+unsigned int mte_get_pstate_tco(void);
+
+/* Test framework static inline functions/macros */
+static inline void evaluate_test(int err, const char *msg)
+{
+ if (err == KSFT_PASS)
+ ksft_test_result_pass(msg);
+ else if (err == KSFT_FAIL)
+ ksft_test_result_fail(msg);
+}
+
+static inline int check_allocated_memory(void *ptr, size_t size,
+ int mem_type, bool tags)
+{
+ if (ptr == NULL) {
+ ksft_print_msg("FAIL: memory allocation\n");
+ return KSFT_FAIL;
+ }
+
+ if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+ mte_free_memory((void *)ptr, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+
+ return KSFT_PASS;
+}
+
+static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after)
+{
+ if (ptr == NULL) {
+ ksft_print_msg("FAIL: memory allocation\n");
+ return KSFT_FAIL;
+ }
+
+ if (!MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+ mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before,
+ range_after);
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+#endif /* _MTE_COMMON_UTIL_H */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
new file mode 100644
index 000000000000..9b188254b61a
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+/*
+ * Below definitions may be found in kernel headers, However, they are
+ * redefined here to decouple the MTE selftests compilations from them.
+ */
+#ifndef SEGV_MTEAERR
+#define SEGV_MTEAERR 8
+#endif
+#ifndef SEGV_MTESERR
+#define SEGV_MTESERR 9
+#endif
+#ifndef PROT_MTE
+#define PROT_MTE 0x20
+#endif
+#ifndef HWCAP2_MTE
+#define HWCAP2_MTE (1 << 18)
+#endif
+
+#ifndef PR_MTE_TCF_SHIFT
+#define PR_MTE_TCF_SHIFT 1
+#endif
+#ifndef PR_MTE_TCF_NONE
+#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_SYNC
+#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_ASYNC
+#define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TAG_SHIFT
+#define PR_MTE_TAG_SHIFT 3
+#endif
+
+/* MTE Hardware feature definitions below. */
+#define MT_TAG_SHIFT 56
+#define MT_TAG_MASK 0xFUL
+#define MT_FREE_TAG 0x0UL
+#define MT_GRANULE_SIZE 16
+#define MT_TAG_COUNT 16
+#define MT_INCLUDE_TAG_MASK 0xFFFF
+#define MT_EXCLUDE_TAG_MASK 0x0
+
+#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1)
+#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
+#define MT_SET_TAG(x, y) ((x) | (y << MT_TAG_SHIFT))
+#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
+#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+
+#define MT_PSTATE_TCO_SHIFT 25
+#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT)
+#define MT_PSTATE_TCO_EN 1
+#define MT_PSTATE_TCO_DIS 0
+
+#define MT_EXCLUDE_TAG(x) (1 << (x))
+#define MT_INCLUDE_VALID_TAG(x) (MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x))
+#define MT_INCLUDE_VALID_TAGS(x) (MT_INCLUDE_TAG_MASK ^ (x))
+#define MTE_ALLOW_NON_ZERO_TAG MT_INCLUDE_VALID_TAG(0)
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
new file mode 100644
index 000000000000..a02c04cd0aac
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#include "mte_def.h"
+
+#define ENTRY(name) \
+ .globl name ;\
+ .p2align 2;\
+ .type name, @function ;\
+name:
+
+#define ENDPROC(name) \
+ .size name, .-name ;
+
+ .text
+/*
+ * mte_insert_random_tag: Insert random tag and might be same as the source tag if
+ * the source pointer has it.
+ * Input:
+ * x0 - source pointer with a tag/no-tag
+ * Return:
+ * x0 - pointer with random tag
+ */
+ENTRY(mte_insert_random_tag)
+ irg x0, x0, xzr
+ ret
+ENDPROC(mte_insert_random_tag)
+
+/*
+ * mte_insert_new_tag: Insert new tag and different from the source tag if
+ * source pointer has it.
+ * Input:
+ * x0 - source pointer with a tag/no-tag
+ * Return:
+ * x0 - pointer with random tag
+ */
+ENTRY(mte_insert_new_tag)
+ gmi x1, x0, xzr
+ irg x0, x0, x1
+ ret
+ENDPROC(mte_insert_new_tag)
+
+/*
+ * mte_get_tag_address: Get the tag from given address.
+ * Input:
+ * x0 - source pointer
+ * Return:
+ * x0 - pointer with appended tag
+ */
+ENTRY(mte_get_tag_address)
+ ldg x0, [x0]
+ ret
+ENDPROC(mte_get_tag_address)
+
+/*
+ * mte_set_tag_address_range: Set the tag range from the given address
+ * Input:
+ * x0 - source pointer with tag data
+ * x1 - range
+ * Return:
+ * none
+ */
+ENTRY(mte_set_tag_address_range)
+ cbz x1, 2f
+1:
+ stg x0, [x0, #0x0]
+ add x0, x0, #MT_GRANULE_SIZE
+ sub x1, x1, #MT_GRANULE_SIZE
+ cbnz x1, 1b
+2:
+ ret
+ENDPROC(mte_set_tag_address_range)
+
+/*
+ * mt_clear_tag_address_range: Clear the tag range from the given address
+ * Input:
+ * x0 - source pointer with tag data
+ * x1 - range
+ * Return:
+ * none
+ */
+ENTRY(mte_clear_tag_address_range)
+ cbz x1, 2f
+1:
+ stzg x0, [x0, #0x0]
+ add x0, x0, #MT_GRANULE_SIZE
+ sub x1, x1, #MT_GRANULE_SIZE
+ cbnz x1, 1b
+2:
+ ret
+ENDPROC(mte_clear_tag_address_range)
+
+/*
+ * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * none
+ */
+ENTRY(mte_enable_pstate_tco)
+ msr tco, #MT_PSTATE_TCO_EN
+ ret
+ENDPROC(mte_enable_pstate_tco)
+
+/*
+ * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * none
+ */
+ENTRY(mte_disable_pstate_tco)
+ msr tco, #MT_PSTATE_TCO_DIS
+ ret
+ENDPROC(mte_disable_pstate_tco)
+
+/*
+ * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * x0
+ */
+ENTRY(mte_get_pstate_tco)
+ mrs x0, tco
+ ubfx x0, x0, #MT_PSTATE_TCO_SHIFT, #1
+ ret
+ENDPROC(mte_get_pstate_tco)
diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore
new file mode 100644
index 000000000000..155137d92722
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/.gitignore
@@ -0,0 +1,2 @@
+exec_target
+pac
diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
new file mode 100644
index 000000000000..72e290b0b10c
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -mbranch-protection=pac-ret
+# check if the compiler supports ARMv8.3 and branch protection with PAuth
+pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(pauth_cc_support),1)
+TEST_GEN_PROGS := pac
+TEST_GEN_FILES := pac_corruptor.o helper.o
+TEST_GEN_PROGS_EXTENDED := exec_target
+endif
+
+include ../../lib.mk
+
+ifeq ($(pauth_cc_support),1)
+# pac* and aut* instructions are not available on architectures berfore
+# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly
+$(OUTPUT)/pac_corruptor.o: pac_corruptor.S
+ $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+$(OUTPUT)/helper.o: helper.c
+ $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or
+# greater, gcc emits pac* instructions which are not in HINT NOP space,
+# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can
+# run on earlier targets and print a meaningful error messages
+$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o
+ $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+
+$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o
+ $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+endif
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
new file mode 100644
index 000000000000..4435600ca400
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+
+#include "helper.h"
+
+int main(void)
+{
+ struct signatures signed_vals;
+ unsigned long hwcaps;
+ size_t val;
+
+ fread(&val, sizeof(size_t), 1, stdin);
+
+ /* don't try to execute illegal (unimplemented) instructions) caller
+ * should have checked this and keep worker simple
+ */
+ hwcaps = getauxval(AT_HWCAP);
+
+ if (hwcaps & HWCAP_PACA) {
+ signed_vals.keyia = keyia_sign(val);
+ signed_vals.keyib = keyib_sign(val);
+ signed_vals.keyda = keyda_sign(val);
+ signed_vals.keydb = keydb_sign(val);
+ }
+ signed_vals.keyg = (hwcaps & HWCAP_PACG) ? keyg_sign(val) : 0;
+
+ fwrite(&signed_vals, sizeof(struct signatures), 1, stdout);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c
new file mode 100644
index 000000000000..2c201e7d0d50
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include "helper.h"
+
+size_t keyia_sign(size_t ptr)
+{
+ asm volatile("paciza %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyib_sign(size_t ptr)
+{
+ asm volatile("pacizb %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyda_sign(size_t ptr)
+{
+ asm volatile("pacdza %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keydb_sign(size_t ptr)
+{
+ asm volatile("pacdzb %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyg_sign(size_t ptr)
+{
+ /* output is encoded in the upper 32 bits */
+ size_t dest = 0;
+ size_t modifier = 0;
+
+ asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier));
+
+ return dest;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h
new file mode 100644
index 000000000000..652496c7b411
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _HELPER_H_
+#define _HELPER_H_
+
+#include <stdlib.h>
+
+#define NKEYS 5
+
+struct signatures {
+ size_t keyia;
+ size_t keyib;
+ size_t keyda;
+ size_t keydb;
+ size_t keyg;
+};
+
+void pac_corruptor(void);
+
+/* PAuth sign a value with key ia and modifier value 0 */
+size_t keyia_sign(size_t val);
+size_t keyib_sign(size_t val);
+size_t keyda_sign(size_t val);
+size_t keydb_sign(size_t val);
+size_t keyg_sign(size_t val);
+
+#endif
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
new file mode 100644
index 000000000000..592fe538506e
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <sys/auxv.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <sched.h>
+
+#include "../../kselftest_harness.h"
+#include "helper.h"
+
+#define PAC_COLLISION_ATTEMPTS 10
+/*
+ * The kernel sets TBID by default. So bits 55 and above should remain
+ * untouched no matter what.
+ * The VA space size is 48 bits. Bigger is opt-in.
+ */
+#define PAC_MASK (~0xff80ffffffffffff)
+#define ARBITRARY_VALUE (0x1234)
+#define ASSERT_PAUTH_ENABLED() \
+do { \
+ unsigned long hwcaps = getauxval(AT_HWCAP); \
+ /* data key instructions are not in NOP space. This prevents a SIGILL */ \
+ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+} while (0)
+#define ASSERT_GENERIC_PAUTH_ENABLED() \
+do { \
+ unsigned long hwcaps = getauxval(AT_HWCAP); \
+ /* generic key instructions are not in NOP space. This prevents a SIGILL */ \
+ ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+} while (0)
+
+void sign_specific(struct signatures *sign, size_t val)
+{
+ sign->keyia = keyia_sign(val);
+ sign->keyib = keyib_sign(val);
+ sign->keyda = keyda_sign(val);
+ sign->keydb = keydb_sign(val);
+}
+
+void sign_all(struct signatures *sign, size_t val)
+{
+ sign->keyia = keyia_sign(val);
+ sign->keyib = keyib_sign(val);
+ sign->keyda = keyda_sign(val);
+ sign->keydb = keydb_sign(val);
+ sign->keyg = keyg_sign(val);
+}
+
+int n_same(struct signatures *old, struct signatures *new, int nkeys)
+{
+ int res = 0;
+
+ res += old->keyia == new->keyia;
+ res += old->keyib == new->keyib;
+ res += old->keyda == new->keyda;
+ res += old->keydb == new->keydb;
+ if (nkeys == NKEYS)
+ res += old->keyg == new->keyg;
+
+ return res;
+}
+
+int n_same_single_set(struct signatures *sign, int nkeys)
+{
+ size_t vals[nkeys];
+ int same = 0;
+
+ vals[0] = sign->keyia & PAC_MASK;
+ vals[1] = sign->keyib & PAC_MASK;
+ vals[2] = sign->keyda & PAC_MASK;
+ vals[3] = sign->keydb & PAC_MASK;
+
+ if (nkeys >= 4)
+ vals[4] = sign->keyg & PAC_MASK;
+
+ for (int i = 0; i < nkeys - 1; i++) {
+ for (int j = i + 1; j < nkeys; j++) {
+ if (vals[i] == vals[j])
+ same += 1;
+ }
+ }
+ return same;
+}
+
+int exec_sign_all(struct signatures *signed_vals, size_t val)
+{
+ int new_stdin[2];
+ int new_stdout[2];
+ int status;
+ int i;
+ ssize_t ret;
+ pid_t pid;
+ cpu_set_t mask;
+
+ ret = pipe(new_stdin);
+ if (ret == -1) {
+ perror("pipe returned error");
+ return -1;
+ }
+
+ ret = pipe(new_stdout);
+ if (ret == -1) {
+ perror("pipe returned error");
+ return -1;
+ }
+
+ /*
+ * pin this process and all its children to a single CPU, so it can also
+ * guarantee a context switch with its child
+ */
+ sched_getaffinity(0, sizeof(mask), &mask);
+
+ for (i = 0; i < sizeof(cpu_set_t); i++)
+ if (CPU_ISSET(i, &mask))
+ break;
+
+ CPU_ZERO(&mask);
+ CPU_SET(i, &mask);
+ sched_setaffinity(0, sizeof(mask), &mask);
+
+ pid = fork();
+ // child
+ if (pid == 0) {
+ dup2(new_stdin[0], STDIN_FILENO);
+ if (ret == -1) {
+ perror("dup2 returned error");
+ exit(1);
+ }
+
+ dup2(new_stdout[1], STDOUT_FILENO);
+ if (ret == -1) {
+ perror("dup2 returned error");
+ exit(1);
+ }
+
+ close(new_stdin[0]);
+ close(new_stdin[1]);
+ close(new_stdout[0]);
+ close(new_stdout[1]);
+
+ ret = execl("exec_target", "exec_target", (char *)NULL);
+ if (ret == -1) {
+ perror("exec returned error");
+ exit(1);
+ }
+ }
+
+ close(new_stdin[0]);
+ close(new_stdout[1]);
+
+ ret = write(new_stdin[1], &val, sizeof(size_t));
+ if (ret == -1) {
+ perror("write returned error");
+ return -1;
+ }
+
+ /*
+ * wait for the worker to finish, so that read() reads all data
+ * will also context switch with worker so that this function can be used
+ * for context switch tests
+ */
+ waitpid(pid, &status, 0);
+ if (WIFEXITED(status) == 0) {
+ fprintf(stderr, "worker exited unexpectedly\n");
+ return -1;
+ }
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "worker exited with error\n");
+ return -1;
+ }
+
+ ret = read(new_stdout[0], signed_vals, sizeof(struct signatures));
+ if (ret == -1) {
+ perror("read returned error");
+ return -1;
+ }
+
+ return 0;
+}
+
+sigjmp_buf jmpbuf;
+void pac_signal_handler(int signum, siginfo_t *si, void *uc)
+{
+ if (signum == SIGSEGV || signum == SIGILL)
+ siglongjmp(jmpbuf, 1);
+}
+
+/* check that a corrupted PAC results in SIGSEGV or SIGILL */
+TEST(corrupt_pac)
+{
+ struct sigaction sa;
+
+ ASSERT_PAUTH_ENABLED();
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sa.sa_sigaction = pac_signal_handler;
+ sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
+ sigemptyset(&sa.sa_mask);
+
+ sigaction(SIGSEGV, &sa, NULL);
+ sigaction(SIGILL, &sa, NULL);
+
+ pac_corruptor();
+ ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur");
+ }
+}
+
+/*
+ * There are no separate pac* and aut* controls so checking only the pac*
+ * instructions is sufficient
+ */
+TEST(pac_instructions_not_nop)
+{
+ size_t keyia = 0;
+ size_t keyib = 0;
+ size_t keyda = 0;
+ size_t keydb = 0;
+
+ ASSERT_PAUTH_ENABLED();
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ keyia |= keyia_sign(i) & PAC_MASK;
+ keyib |= keyib_sign(i) & PAC_MASK;
+ keyda |= keyda_sign(i) & PAC_MASK;
+ keydb |= keydb_sign(i) & PAC_MASK;
+ }
+
+ ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing");
+ ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing");
+ ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing");
+ ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing");
+}
+
+TEST(pac_instructions_not_nop_generic)
+{
+ size_t keyg = 0;
+
+ ASSERT_GENERIC_PAUTH_ENABLED();
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++)
+ keyg |= keyg_sign(i) & PAC_MASK;
+
+ ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing");
+}
+
+TEST(single_thread_different_keys)
+{
+ int same = 10;
+ int nkeys = NKEYS;
+ int tmp;
+ struct signatures signed_vals;
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+
+ /* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ if (!(hwcaps & HWCAP_PACG)) {
+ TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+ nkeys = NKEYS - 1;
+ }
+
+ /*
+ * In Linux the PAC field can be up to 7 bits wide. Even if keys are
+ * different, there is about 5% chance for PACs to collide with
+ * different addresses. This chance rapidly increases with fewer bits
+ * allocated for the PAC (e.g. wider address). A comparison of the keys
+ * directly will be more reliable.
+ * All signed values need to be different at least once out of n
+ * attempts to be certain that the keys are different
+ */
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ if (nkeys == NKEYS)
+ sign_all(&signed_vals, i);
+ else
+ sign_specific(&signed_vals, i);
+
+ tmp = n_same_single_set(&signed_vals, nkeys);
+ if (tmp < same)
+ same = tmp;
+ }
+
+ ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same);
+}
+
+/*
+ * fork() does not change keys. Only exec() does so call a worker program.
+ * Its only job is to sign a value and report back the resutls
+ */
+TEST(exec_changed_keys)
+{
+ struct signatures new_keys;
+ struct signatures old_keys;
+ int ret;
+ int same = 10;
+ int nkeys = NKEYS;
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+
+ /* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ if (!(hwcaps & HWCAP_PACG)) {
+ TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+ nkeys = NKEYS - 1;
+ }
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ ret = exec_sign_all(&new_keys, i);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ if (nkeys == NKEYS)
+ sign_all(&old_keys, i);
+ else
+ sign_specific(&old_keys, i);
+
+ ret = n_same(&old_keys, &new_keys, nkeys);
+ if (ret < same)
+ same = ret;
+ }
+
+ ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same);
+}
+
+TEST(context_switch_keep_keys)
+{
+ int ret;
+ struct signatures trash;
+ struct signatures before;
+ struct signatures after;
+
+ ASSERT_PAUTH_ENABLED();
+
+ sign_specific(&before, ARBITRARY_VALUE);
+
+ /* will context switch with a process with different keys at least once */
+ ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ sign_specific(&after, ARBITRARY_VALUE);
+
+ ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching");
+ ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching");
+ ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching");
+ ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching");
+}
+
+TEST(context_switch_keep_keys_generic)
+{
+ int ret;
+ struct signatures trash;
+ size_t before;
+ size_t after;
+
+ ASSERT_GENERIC_PAUTH_ENABLED();
+
+ before = keyg_sign(ARBITRARY_VALUE);
+
+ /* will context switch with a process with different keys at least once */
+ ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ after = keyg_sign(ARBITRARY_VALUE);
+
+ ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
new file mode 100644
index 000000000000..aa6588050752
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+.global pac_corruptor
+
+.text
+/*
+ * Corrupting a single bit of the PAC ensures the authentication will fail. It
+ * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no
+ * top byte PAC is tested
+ */
+ pac_corruptor:
+ paciasp
+
+ /* corrupt the top bit of the PAC */
+ eor lr, lr, #1 << 53
+
+ autiasp
+ ret
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 9a0946ddb705..3ab1200e172f 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -13,9 +13,7 @@ test_verifier_log
feature
test_sock
test_sock_addr
-test_sock_fields
urandom_read
-test_btf
test_sockmap
test_lirc_mode2_user
get_cgroup_id_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fc946b7ac288..bdbeafec371b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -33,9 +33,9 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_verifier_log test_dev_cgroup test_tcpbpf_user \
- test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
+ test_sock test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage \
- test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
+ test_netcnt test_tcpnotify_user test_sysctl \
test_progs-no_alu32 \
test_current_pid_tgid_new_ns
@@ -68,7 +68,8 @@ TEST_PROGS := test_kmod.sh \
test_tc_edt.sh \
test_xdping.sh \
test_bpftool_build.sh \
- test_bpftool.sh
+ test_bpftool.sh \
+ test_bpftool_metadata.sh \
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
@@ -176,6 +177,11 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
OUTPUT=$(BUILD_DIR)/bpftool/ \
prefix= DESTDIR=$(SCRATCH_DIR)/ install
+ $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
+ $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
+ -C $(BPFTOOLDIR)/Documentation \
+ OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \
+ prefix= DESTDIR=$(SCRATCH_DIR)/ install
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
@@ -316,7 +322,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
- $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS), \
$(TRUNNER_BPF_LDFLAGS))
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index e885d351595f..ac9eda830187 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -7,6 +7,44 @@ General instructions on running selftests can be found in
Additional information about selftest failures are
documented here.
+profiler[23] test failures with clang/llvm <12.0.0
+==================================================
+
+With clang/llvm <12.0.0, the profiler[23] test may fail.
+The symptom looks like
+
+.. code-block:: c
+
+ // r9 is a pointer to map_value
+ // r7 is a scalar
+ 17: bf 96 00 00 00 00 00 00 r6 = r9
+ 18: 0f 76 00 00 00 00 00 00 r6 += r7
+ math between map_value pointer and register with unbounded min value is not allowed
+
+ // the instructions below will not be seen in the verifier log
+ 19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1
+ 20: bf 96 00 00 00 00 00 00 r6 = r9
+ // r6 is used here
+
+The verifier will reject such code with above error.
+At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and
+the insn 20 undoes map_value addition. It is currently impossible for the
+verifier to understand such speculative pointer arithmetic.
+Hence
+ https://reviews.llvm.org/D85570
+addresses it on the compiler side. It was committed on llvm 12.
+
+The corresponding C code
+.. code-block:: c
+
+ for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
+ filepart_length = bpf_probe_read_str(payload, ...);
+ if (filepart_length <= MAX_PATH) {
+ barrier_var(filepart_length); // workaround
+ payload += filepart_length;
+ }
+ }
+
bpf_iter test failures with clang/llvm 10.0.0
=============================================
@@ -43,3 +81,24 @@ This is due to a llvm BPF backend bug. The fix
https://reviews.llvm.org/D78466
has been pushed to llvm 10.x release branch and will be
available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
+
+BPF CO-RE-based tests and Clang version
+=======================================
+
+A set of selftests use BPF target-specific built-ins, which might require
+bleeding-edge Clang versions (Clang 12 nightly at this time).
+
+Few sub-tests of core_reloc test suit (part of test_progs test runner) require
+the following built-ins, listed with corresponding Clang diffs introducing
+them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too
+old to support them, they shouldn't cause build failures or runtime test
+failures:
+
+ - __builtin_btf_type_id() ([0], [1], [2]);
+ - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]).
+
+ [0] https://reviews.llvm.org/D74572
+ [1] https://reviews.llvm.org/D74668
+ [2] https://reviews.llvm.org/D85174
+ [3] https://reviews.llvm.org/D83878
+ [4] https://reviews.llvm.org/D83242
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 944ad4721c83..332ed2f7b402 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -311,12 +311,12 @@ extern const struct bench bench_rename_kretprobe;
extern const struct bench bench_rename_rawtp;
extern const struct bench bench_rename_fentry;
extern const struct bench bench_rename_fexit;
-extern const struct bench bench_rename_fmodret;
extern const struct bench bench_trig_base;
extern const struct bench bench_trig_tp;
extern const struct bench bench_trig_rawtp;
extern const struct bench bench_trig_kprobe;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
@@ -332,12 +332,12 @@ static const struct bench *benchs[] = {
&bench_rename_rawtp,
&bench_rename_fentry,
&bench_rename_fexit,
- &bench_rename_fmodret,
&bench_trig_base,
&bench_trig_tp,
&bench_trig_rawtp,
&bench_trig_kprobe,
&bench_trig_fentry,
+ &bench_trig_fentry_sleep,
&bench_trig_fmodret,
&bench_rb_libbpf,
&bench_rb_custom,
@@ -462,4 +462,3 @@ int main(int argc, char **argv)
return 0;
}
-
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index e74cff40f4fe..a967674098ad 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -106,12 +106,6 @@ static void setup_fexit()
attach_bpf(ctx.skel->progs.prog5);
}
-static void setup_fmodret()
-{
- setup_ctx();
- attach_bpf(ctx.skel->progs.prog6);
-}
-
static void *consumer(void *input)
{
return NULL;
@@ -182,14 +176,3 @@ const struct bench bench_rename_fexit = {
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-
-const struct bench bench_rename_fmodret = {
- .name = "rename-fmodret",
- .validate = validate,
- .setup = setup_fmodret,
- .producer_thread = producer,
- .consumer_thread = consumer,
- .measure = measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 49c22832f216..2a0b6c9885a4 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -90,6 +90,12 @@ static void trigger_fentry_setup()
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
+static void trigger_fentry_sleep_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+}
+
static void trigger_fmodret_setup()
{
setup_ctx();
@@ -155,6 +161,17 @@ const struct bench bench_trig_fentry = {
.report_final = hits_drops_report_final,
};
+const struct bench bench_trig_fentry_sleep = {
+ .name = "trig-fentry-sleep",
+ .validate = trigger_validate,
+ .setup = trigger_fentry_sleep_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
const struct bench bench_trig_fmodret = {
.name = "trig-fmodret",
.validate = trigger_validate,
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 5bf2fe9b1efa..2915664c335d 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -16,6 +16,7 @@ BPF_PROG(name, args)
struct sock_common {
unsigned char skc_state;
+ __u16 skc_num;
} __attribute__((preserve_access_index));
enum sk_pacing {
@@ -45,6 +46,10 @@ struct inet_connection_sock {
__u64 icsk_ca_priv[104 / sizeof(__u64)];
} __attribute__((preserve_access_index));
+struct request_sock {
+ struct sock_common __req_common;
+} __attribute__((preserve_access_index));
+
struct tcp_sock {
struct inet_connection_sock inet_conn;
@@ -115,14 +120,6 @@ enum tcp_ca_event {
CA_EVENT_ECN_IS_CE = 5,
};
-enum tcp_ca_state {
- TCP_CA_Open = 0,
- TCP_CA_Disorder = 1,
- TCP_CA_CWR = 2,
- TCP_CA_Recovery = 3,
- TCP_CA_Loss = 4
-};
-
struct ack_sample {
__u32 pkts_acked;
__s32 rtt_us;
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index daeaeb518894..7290401ec172 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -23,7 +23,13 @@ static inline int bpf_flow_load(struct bpf_object **obj,
if (ret)
return ret;
- main_prog = bpf_object__find_program_by_title(*obj, section_name);
+ main_prog = NULL;
+ bpf_object__for_each_program(prog, *obj) {
+ if (strcmp(section_name, bpf_program__section_name(prog)) == 0) {
+ main_prog = prog;
+ break;
+ }
+ }
if (!main_prog)
return -1;
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index f56655690f9b..12ee40284da0 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -104,6 +104,43 @@ error_close:
return -1;
}
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ struct sockaddr_in *addr_in;
+ int fd, ret;
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ addr_in = (struct sockaddr_in *)&addr;
+ fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
+ ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
+ addrlen);
+ if (ret != data_len) {
+ log_err("sendto(data, %u) != %d\n", data_len, ret);
+ goto error_close;
+ }
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
static int connect_fd_to_addr(int fd,
const struct sockaddr_storage *addr,
socklen_t addrlen)
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index c3728f6667e4..7205f8afdba1 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -37,6 +37,8 @@ int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
int connect_to_fd(int server_fd, int timeout_ms);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+ int timeout_ms);
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index c548aded6585..52414058a627 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -195,13 +195,13 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
},
@@ -518,7 +518,7 @@ static struct bpf_align_test tests[] = {
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
+ {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
},
},
@@ -561,18 +561,18 @@ static struct bpf_align_test tests[] = {
/* Adding 14 makes R6 be (4n+2) */
{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
+ {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+ {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+ {20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
},
},
};
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 7375d9a6d242..448885b95eed 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -7,6 +7,7 @@
#include "bpf_iter_task.skel.h"
#include "bpf_iter_task_stack.skel.h"
#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_task_btf.skel.h"
#include "bpf_iter_tcp4.skel.h"
#include "bpf_iter_tcp6.skel.h"
#include "bpf_iter_udp4.skel.h"
@@ -132,20 +133,118 @@ static void test_task_stack(void)
bpf_iter_task_stack__destroy(skel);
}
+static void *do_nothing(void *arg)
+{
+ pthread_exit(arg);
+}
+
static void test_task_file(void)
{
struct bpf_iter_task_file *skel;
+ pthread_t thread_id;
+ void *ret;
skel = bpf_iter_task_file__open_and_load();
if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
"skeleton open_and_load failed\n"))
return;
+ skel->bss->tgid = getpid();
+
+ if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
+ "pthread_create", "pthread_create failed\n"))
+ goto done;
+
do_dummy_read(skel->progs.dump_task_file);
+ if (CHECK(pthread_join(thread_id, &ret) || ret != NULL,
+ "pthread_join", "pthread_join failed\n"))
+ goto done;
+
+ CHECK(skel->bss->count != 0, "check_count",
+ "invalid non pthread file visit count %d\n", skel->bss->count);
+
+done:
bpf_iter_task_file__destroy(skel);
}
+#define TASKBUFSZ 32768
+
+static char taskbuf[TASKBUFSZ];
+
+static int do_btf_read(struct bpf_iter_task_btf *skel)
+{
+ struct bpf_program *prog = skel->progs.dump_task_struct;
+ struct bpf_iter_task_btf__bss *bss = skel->bss;
+ int iter_fd = -1, len = 0, bufleft = TASKBUFSZ;
+ struct bpf_link *link;
+ char *buf = taskbuf;
+ int ret = 0;
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ return ret;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ do {
+ len = read(iter_fd, buf, bufleft);
+ if (len > 0) {
+ buf += len;
+ bufleft -= len;
+ }
+ } while (len > 0);
+
+ if (bss->skip) {
+ printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+ ret = 1;
+ test__skip();
+ goto free_link;
+ }
+
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto free_link;
+
+ CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
+ "check for btf representation of task_struct in iter data",
+ "struct task_struct not found");
+free_link:
+ if (iter_fd > 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ return ret;
+}
+
+static void test_task_btf(void)
+{
+ struct bpf_iter_task_btf__bss *bss;
+ struct bpf_iter_task_btf *skel;
+ int ret;
+
+ skel = bpf_iter_task_btf__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task_btf__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ bss = skel->bss;
+
+ ret = do_btf_read(skel);
+ if (ret)
+ goto cleanup;
+
+ if (CHECK(bss->tasks == 0, "check if iterated over tasks",
+ "no task iteration, did BPF program run?\n"))
+ goto cleanup;
+
+ CHECK(bss->seq_err != 0, "check for unexpected err",
+ "bpf_seq_printf_btf returned %ld", bss->seq_err);
+
+cleanup:
+ bpf_iter_task_btf__destroy(skel);
+}
+
static void test_tcp4(void)
{
struct bpf_iter_tcp4 *skel;
@@ -331,7 +430,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
struct bpf_map_info map_info = {};
struct bpf_iter_test_kern4 *skel;
struct bpf_link *link;
- __u32 page_size;
+ __u32 iter_size;
char *buf;
skel = bpf_iter_test_kern4__open();
@@ -353,19 +452,19 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
"map_creation failed: %s\n", strerror(errno)))
goto free_map1;
- /* bpf_seq_printf kernel buffer is one page, so one map
+ /* bpf_seq_printf kernel buffer is 8 pages, so one map
* bpf_seq_write will mostly fill it, and the other map
* will partially fill and then trigger overflow and need
* bpf_seq_read restart.
*/
- page_size = sysconf(_SC_PAGE_SIZE);
+ iter_size = sysconf(_SC_PAGE_SIZE) << 3;
if (test_e2big_overflow) {
- skel->rodata->print_len = (page_size + 8) / 8;
- expected_read_len = 2 * (page_size + 8);
+ skel->rodata->print_len = (iter_size + 8) / 8;
+ expected_read_len = 2 * (iter_size + 8);
} else if (!ret1) {
- skel->rodata->print_len = (page_size - 8) / 8;
- expected_read_len = 2 * (page_size - 8);
+ skel->rodata->print_len = (iter_size - 8) / 8;
+ expected_read_len = 2 * (iter_size - 8);
} else {
skel->rodata->print_len = 1;
expected_read_len = 2 * 8;
@@ -936,6 +1035,8 @@ void test_bpf_iter(void)
test_task_stack();
if (test__start_subtest("task_file"))
test_task_file();
+ if (test__start_subtest("task_btf"))
+ test_task_btf();
if (test__start_subtest("tcp4"))
test_tcp4();
if (test__start_subtest("tcp6"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e9f2f12ba06b..e698ee6bb6c2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -49,6 +49,7 @@ void test_bpf_verif_scale(void)
{ "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
{ "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
/* full unroll by llvm */
{ "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
@@ -86,6 +87,9 @@ void test_bpf_verif_scale(void)
{ "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
{ "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ /* non-inlined subprogs */
+ { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+
{ "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
{ "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index c75fc6447186..93162484c2ca 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -24,40 +24,17 @@
#include "bpf_rlimit.h"
#include "bpf_util.h"
-#include "test_btf.h"
+#include "../test_btf.h"
+#include "test_progs.h"
#define MAX_INSNS 512
#define MAX_SUBPROGS 16
-static uint32_t pass_cnt;
-static uint32_t error_cnt;
-static uint32_t skip_cnt;
+static int duration = 0;
+static bool always_log;
-#define CHECK(condition, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
- fprintf(stderr, format); \
- } \
- __ret; \
-})
-
-static int count_result(int err)
-{
- if (err)
- error_cnt++;
- else
- pass_cnt++;
-
- fprintf(stderr, "\n");
- return err;
-}
-
-static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
- const char *format, va_list args)
-{
- return vfprintf(stderr, format, args);
-}
+#undef CHECK
+#define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
#define BTF_END_RAW 0xdeadbeef
#define NAME_TBD 0xdeadb33f
@@ -69,21 +46,6 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
#define MAX_NR_RAW_U32 1024
#define BTF_LOG_BUF_SIZE 65535
-static struct args {
- unsigned int raw_test_num;
- unsigned int file_test_num;
- unsigned int get_info_test_num;
- unsigned int info_raw_test_num;
- unsigned int dedup_test_num;
- bool raw_test;
- bool file_test;
- bool get_info_test;
- bool pprint_test;
- bool always_log;
- bool info_raw_test;
- bool dedup_test;
-} args;
-
static char btf_log_buf[BTF_LOG_BUF_SIZE];
static struct btf_header hdr_tmpl = {
@@ -3664,7 +3626,7 @@ done:
return raw_btf;
}
-static int do_test_raw(unsigned int test_num)
+static void do_test_raw(unsigned int test_num)
{
struct btf_raw_test *test = &raw_tests[test_num - 1];
struct bpf_create_map_attr create_attr = {};
@@ -3674,15 +3636,16 @@ static int do_test_raw(unsigned int test_num)
void *raw_btf;
int err;
- fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl,
test->raw_types,
test->str_sec,
test->str_sec_size,
&raw_btf_size, NULL);
-
if (!raw_btf)
- return -1;
+ return;
hdr = raw_btf;
@@ -3694,7 +3657,7 @@ static int do_test_raw(unsigned int test_num)
*btf_log_buf = '\0';
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
free(raw_btf);
err = ((btf_fd == -1) != test->btf_load_err);
@@ -3725,32 +3688,12 @@ static int do_test_raw(unsigned int test_num)
map_fd, test->map_create_err);
done:
- if (!err)
- fprintf(stderr, "OK");
-
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
-
if (btf_fd != -1)
close(btf_fd);
if (map_fd != -1)
close(map_fd);
-
- return err;
-}
-
-static int test_raw(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.raw_test_num)
- return count_result(do_test_raw(args.raw_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
- err |= count_result(do_test_raw(i));
-
- return err;
}
struct btf_get_info_test {
@@ -3814,11 +3757,6 @@ const struct btf_get_info_test get_info_tests[] = {
},
};
-static inline __u64 ptr_to_u64(const void *ptr)
-{
- return (__u64)(unsigned long)ptr;
-}
-
static int test_big_btf_info(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
@@ -3851,7 +3789,7 @@ static int test_big_btf_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
if (CHECK(btf_fd == -1, "errno:%d", errno)) {
err = -1;
goto done;
@@ -3892,7 +3830,7 @@ static int test_big_btf_info(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
@@ -3939,7 +3877,7 @@ static int test_btf_id(unsigned int test_num)
btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
err = -1;
goto done;
@@ -4024,7 +3962,7 @@ static int test_btf_id(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
@@ -4039,7 +3977,7 @@ done:
return err;
}
-static int do_test_get_info(unsigned int test_num)
+static void do_test_get_info(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
unsigned int raw_btf_size, user_btf_size, expected_nbytes;
@@ -4048,11 +3986,14 @@ static int do_test_get_info(unsigned int test_num)
int btf_fd = -1, err, ret;
uint32_t info_len;
- fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
- test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
- if (test->special_test)
- return test->special_test(test_num);
+ if (test->special_test) {
+ err = test->special_test(test_num);
+ if (CHECK(err, "failed: %d\n", err))
+ return;
+ }
raw_btf = btf_raw_create(&hdr_tmpl,
test->raw_types,
@@ -4061,7 +4002,7 @@ static int do_test_get_info(unsigned int test_num)
&raw_btf_size, NULL);
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
@@ -4073,7 +4014,7 @@ static int do_test_get_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
if (CHECK(btf_fd == -1, "errno:%d", errno)) {
err = -1;
goto done;
@@ -4114,7 +4055,7 @@ static int do_test_get_info(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
@@ -4122,22 +4063,6 @@ done:
if (btf_fd != -1)
close(btf_fd);
-
- return err;
-}
-
-static int test_get_info(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.get_info_test_num)
- return count_result(do_test_get_info(args.get_info_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
- err |= count_result(do_test_get_info(i));
-
- return err;
}
struct btf_file_test {
@@ -4151,7 +4076,7 @@ static struct btf_file_test file_tests[] = {
{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
};
-static int do_test_file(unsigned int test_num)
+static void do_test_file(unsigned int test_num)
{
const struct btf_file_test *test = &file_tests[test_num - 1];
const char *expected_fnames[] = {"_dummy_tracepoint",
@@ -4169,17 +4094,17 @@ static int do_test_file(unsigned int test_num)
struct bpf_map *map;
int i, err, prog_fd;
- fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
- test->file);
+ if (!test__start_subtest(test->file))
+ return;
btf = btf__parse_elf(test->file, &btf_ext);
if (IS_ERR(btf)) {
if (PTR_ERR(btf) == -ENOENT) {
- fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
- skip_cnt++;
- return 0;
+ printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
+ test__skip();
+ return;
}
- return PTR_ERR(btf);
+ return;
}
btf__free(btf);
@@ -4188,7 +4113,7 @@ static int do_test_file(unsigned int test_num)
obj = bpf_object__open(test->file);
if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
- return PTR_ERR(obj);
+ return;
prog = bpf_program__next(NULL, obj);
if (CHECK(!prog, "Cannot find bpf_prog")) {
@@ -4310,21 +4235,6 @@ skip:
done:
free(func_info);
bpf_object__close(obj);
- return err;
-}
-
-static int test_file(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.file_test_num)
- return count_result(do_test_file(args.file_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
- err |= count_result(do_test_file(i));
-
- return err;
}
const char *pprint_enum_str[] = {
@@ -4428,7 +4338,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
{
@@ -4493,7 +4403,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
{
@@ -4564,7 +4474,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
#ifdef __SIZEOF_INT128__
@@ -4591,7 +4501,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv_int128),
.key_type_id = 1,
.value_type_id = 4,
- .max_entries = 128 * 1024,
+ .max_entries = 128,
.mapv_kind = PPRINT_MAPV_KIND_INT128,
},
#endif
@@ -4790,7 +4700,7 @@ static int check_line(const char *expected_line, int nexpected_line,
}
-static int do_test_pprint(int test_num)
+static void do_test_pprint(int test_num)
{
const struct btf_raw_test *test = &pprint_test_template[test_num];
enum pprint_mapv_kind_t mapv_kind = test->mapv_kind;
@@ -4809,18 +4719,20 @@ static int do_test_pprint(int test_num)
uint8_t *raw_btf;
ssize_t nread;
- fprintf(stderr, "%s(#%d)......", test->descr, test_num);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
test->str_sec, test->str_sec_size,
&raw_btf_size, NULL);
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
free(raw_btf);
if (CHECK(btf_fd == -1, "errno:%d", errno)) {
@@ -4971,7 +4883,7 @@ done:
free(mapv);
if (!err)
fprintf(stderr, "OK");
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
if (btf_fd != -1)
close(btf_fd);
@@ -4981,14 +4893,11 @@ done:
fclose(pin_file);
unlink(pin_path);
free(line);
-
- return err;
}
-static int test_pprint(void)
+static void test_pprint(void)
{
unsigned int i;
- int err = 0;
/* test various maps with the first test template */
for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
@@ -4999,7 +4908,7 @@ static int test_pprint(void)
pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map;
pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map;
- err |= count_result(do_test_pprint(0));
+ do_test_pprint(0);
}
/* test rest test templates with the first map */
@@ -5010,10 +4919,8 @@ static int test_pprint(void)
pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map;
pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map;
pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map;
- err |= count_result(do_test_pprint(i));
+ do_test_pprint(i);
}
-
- return err;
}
#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \
@@ -6178,7 +6085,7 @@ done:
return err;
}
-static int do_test_info_raw(unsigned int test_num)
+static void do_test_info_raw(unsigned int test_num)
{
const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1];
unsigned int raw_btf_size, linfo_str_off, linfo_size;
@@ -6187,18 +6094,19 @@ static int do_test_info_raw(unsigned int test_num)
const char *ret_next_str;
union bpf_attr attr = {};
- fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
test->str_sec, test->str_sec_size,
&raw_btf_size, &ret_next_str);
-
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
free(raw_btf);
if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
@@ -6206,7 +6114,7 @@ static int do_test_info_raw(unsigned int test_num)
goto done;
}
- if (*btf_log_buf && args.always_log)
+ if (*btf_log_buf && always_log)
fprintf(stderr, "\n%s", btf_log_buf);
*btf_log_buf = '\0';
@@ -6261,10 +6169,7 @@ static int do_test_info_raw(unsigned int test_num)
goto done;
done:
- if (!err)
- fprintf(stderr, "OK");
-
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
if (btf_fd != -1)
@@ -6274,22 +6179,6 @@ done:
if (!IS_ERR(patched_linfo))
free(patched_linfo);
-
- return err;
-}
-
-static int test_info_raw(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.info_raw_test_num)
- return count_result(do_test_info_raw(args.info_raw_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
- err |= count_result(do_test_info_raw(i));
-
- return err;
}
struct btf_raw_data {
@@ -6754,7 +6643,7 @@ static void dump_btf_strings(const char *strs, __u32 len)
}
}
-static int do_test_dedup(unsigned int test_num)
+static void do_test_dedup(unsigned int test_num)
{
const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
@@ -6769,13 +6658,15 @@ static int do_test_dedup(unsigned int test_num)
void *raw_btf;
int err = 0, i;
- fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types,
test->input.str_sec, test->input.str_sec_size,
&raw_btf_size, &ret_test_next_str);
if (!raw_btf)
- return -1;
+ return;
+
test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
free(raw_btf);
if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
@@ -6789,7 +6680,7 @@ static int do_test_dedup(unsigned int test_num)
test->expect.str_sec_size,
&raw_btf_size, &ret_expect_next_str);
if (!raw_btf)
- return -1;
+ return;
expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
free(raw_btf);
if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
@@ -6894,174 +6785,27 @@ static int do_test_dedup(unsigned int test_num)
}
done:
- if (!err)
- fprintf(stderr, "OK");
if (!IS_ERR(test_btf))
btf__free(test_btf);
if (!IS_ERR(expect_btf))
btf__free(expect_btf);
-
- return err;
}
-static int test_dedup(void)
+void test_btf(void)
{
- unsigned int i;
- int err = 0;
+ int i;
- if (args.dedup_test_num)
- return count_result(do_test_dedup(args.dedup_test_num));
+ always_log = env.verbosity > VERBOSE_NONE;
+ for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+ do_test_raw(i);
+ for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+ do_test_get_info(i);
+ for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+ do_test_file(i);
+ for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
+ do_test_info_raw(i);
for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++)
- err |= count_result(do_test_dedup(i));
-
- return err;
-}
-
-static void usage(const char *cmd)
-{
- fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n"
- "\t[-g btf_get_info_test_num (1 - %zu)] |\n"
- "\t[-f btf_file_test_num (1 - %zu)] |\n"
- "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n"
- "\t[-p (pretty print test)] |\n"
- "\t[-d btf_dedup_test_num (1 - %zu)]]\n",
- cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
- ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests),
- ARRAY_SIZE(dedup_tests));
-}
-
-static int parse_args(int argc, char **argv)
-{
- const char *optstr = "hlpk:f:r:g:d:";
- int opt;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
- switch (opt) {
- case 'l':
- args.always_log = true;
- break;
- case 'f':
- args.file_test_num = atoi(optarg);
- args.file_test = true;
- break;
- case 'r':
- args.raw_test_num = atoi(optarg);
- args.raw_test = true;
- break;
- case 'g':
- args.get_info_test_num = atoi(optarg);
- args.get_info_test = true;
- break;
- case 'p':
- args.pprint_test = true;
- break;
- case 'k':
- args.info_raw_test_num = atoi(optarg);
- args.info_raw_test = true;
- break;
- case 'd':
- args.dedup_test_num = atoi(optarg);
- args.dedup_test = true;
- break;
- case 'h':
- usage(argv[0]);
- exit(0);
- default:
- usage(argv[0]);
- return -1;
- }
- }
-
- if (args.raw_test_num &&
- (args.raw_test_num < 1 ||
- args.raw_test_num > ARRAY_SIZE(raw_tests))) {
- fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
- ARRAY_SIZE(raw_tests));
- return -1;
- }
-
- if (args.file_test_num &&
- (args.file_test_num < 1 ||
- args.file_test_num > ARRAY_SIZE(file_tests))) {
- fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
- ARRAY_SIZE(file_tests));
- return -1;
- }
-
- if (args.get_info_test_num &&
- (args.get_info_test_num < 1 ||
- args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
- fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
- ARRAY_SIZE(get_info_tests));
- return -1;
- }
-
- if (args.info_raw_test_num &&
- (args.info_raw_test_num < 1 ||
- args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) {
- fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n",
- ARRAY_SIZE(info_raw_tests));
- return -1;
- }
-
- if (args.dedup_test_num &&
- (args.dedup_test_num < 1 ||
- args.dedup_test_num > ARRAY_SIZE(dedup_tests))) {
- fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n",
- ARRAY_SIZE(dedup_tests));
- return -1;
- }
-
- return 0;
-}
-
-static void print_summary(void)
-{
- fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
- pass_cnt - skip_cnt, skip_cnt, error_cnt);
-}
-
-int main(int argc, char **argv)
-{
- int err = 0;
-
- err = parse_args(argc, argv);
- if (err)
- return err;
-
- if (args.always_log)
- libbpf_set_print(__base_pr);
-
- if (args.raw_test)
- err |= test_raw();
-
- if (args.get_info_test)
- err |= test_get_info();
-
- if (args.file_test)
- err |= test_file();
-
- if (args.pprint_test)
- err |= test_pprint();
-
- if (args.info_raw_test)
- err |= test_info_raw();
-
- if (args.dedup_test)
- err |= test_dedup();
-
- if (args.raw_test || args.get_info_test || args.file_test ||
- args.pprint_test || args.info_raw_test || args.dedup_test)
- goto done;
-
- err |= test_raw();
- err |= test_get_info();
- err |= test_file();
- err |= test_info_raw();
- err |= test_dedup();
-
-done:
- print_summary();
- return err;
+ do_test_dedup(i);
+ test_pprint();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 39fb81d9daeb..c60091ee8a21 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -129,6 +129,109 @@ done:
return err;
}
+static char *dump_buf;
+static size_t dump_buf_sz;
+static FILE *dump_buf_file;
+
+void test_btf_dump_incremental(void)
+{
+ struct btf *btf = NULL;
+ struct btf_dump *d = NULL;
+ struct btf_dump_opts opts;
+ int id, err, i;
+
+ dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
+ if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
+ return;
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "new_empty"))
+ goto err_out;
+ opts.ctx = dump_buf_file;
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+ if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+ goto err_out;
+
+ /* First, generate BTF corresponding to the following C code:
+ *
+ * enum { VAL = 1 };
+ *
+ * struct s { int x; };
+ *
+ */
+ id = btf__add_enum(btf, NULL, 4);
+ ASSERT_EQ(id, 1, "enum_id");
+ err = btf__add_enum_value(btf, "VAL", 1);
+ ASSERT_OK(err, "enum_val_ok");
+
+ id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(id, 2, "int_id");
+
+ id = btf__add_struct(btf, "s", 4);
+ ASSERT_EQ(id, 3, "struct_id");
+ err = btf__add_field(btf, "x", 2, 0, 0);
+ ASSERT_OK(err, "field_ok");
+
+ for (i = 1; i <= btf__get_nr_types(btf); i++) {
+ err = btf_dump__dump_type(d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(dump_buf_file);
+ dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+ ASSERT_STREQ(dump_buf,
+"enum {\n"
+" VAL = 1,\n"
+"};\n"
+"\n"
+"struct s {\n"
+" int x;\n"
+"};\n\n", "c_dump1");
+
+ /* Now, after dumping original BTF, append another struct that embeds
+ * anonymous enum. It also has a name conflict with the first struct:
+ *
+ * struct s___2 {
+ * enum { VAL___2 = 1 } x;
+ * struct s s;
+ * };
+ *
+ * This will test that btf_dump'er maintains internal state properly.
+ * Note that VAL___2 enum value. It's because we've already emitted
+ * that enum as a global anonymous enum, so btf_dump will ensure that
+ * enum values don't conflict;
+ *
+ */
+ fseek(dump_buf_file, 0, SEEK_SET);
+
+ id = btf__add_struct(btf, "s", 4);
+ ASSERT_EQ(id, 4, "struct_id");
+ err = btf__add_field(btf, "x", 1, 0, 0);
+ ASSERT_OK(err, "field_ok");
+ err = btf__add_field(btf, "s", 3, 32, 0);
+ ASSERT_OK(err, "field_ok");
+
+ for (i = 1; i <= btf__get_nr_types(btf); i++) {
+ err = btf_dump__dump_type(d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(dump_buf_file);
+ dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+ ASSERT_STREQ(dump_buf,
+"struct s___2 {\n"
+" enum {\n"
+" VAL___2 = 1,\n"
+" } x;\n"
+" struct s s;\n"
+"};\n\n" , "c_dump1");
+
+err_out:
+ fclose(dump_buf_file);
+ free(dump_buf);
+ btf_dump__free(d);
+ btf__free(btf);
+}
+
void test_btf_dump() {
int i;
@@ -140,4 +243,6 @@ void test_btf_dump() {
test_btf_dump_case(i, &btf_dump_test_cases[i]);
}
+ if (test__start_subtest("btf_dump: incremental"))
+ test_btf_dump_incremental();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
new file mode 100644
index 000000000000..8c52d72c876e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <string.h>
+#include <byteswap.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void test_btf_endian() {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ enum btf_endianness endian = BTF_LITTLE_ENDIAN;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ enum btf_endianness endian = BTF_BIG_ENDIAN;
+#else
+#error "Unrecognized __BYTE_ORDER"
+#endif
+ enum btf_endianness swap_endian = 1 - endian;
+ struct btf *btf = NULL, *swap_btf = NULL;
+ const void *raw_data, *swap_raw_data;
+ const struct btf_type *t;
+ const struct btf_header *hdr;
+ __u32 raw_sz, swap_raw_sz;
+ int var_id;
+
+ /* Load BTF in native endianness */
+ btf = btf__parse_elf("btf_dump_test_case_syntax.o", NULL);
+ if (!ASSERT_OK_PTR(btf, "parse_native_btf"))
+ goto err_out;
+
+ ASSERT_EQ(btf__endianness(btf), endian, "endian");
+ btf__set_endianness(btf, swap_endian);
+ ASSERT_EQ(btf__endianness(btf), swap_endian, "endian");
+
+ /* Get raw BTF data in non-native endianness... */
+ raw_data = btf__get_raw_data(btf, &raw_sz);
+ if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+ goto err_out;
+
+ /* ...and open it as a new BTF instance */
+ swap_btf = btf__new(raw_data, raw_sz);
+ if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+ goto err_out;
+
+ ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+ ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+
+ swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+ if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+ goto err_out;
+
+ /* both raw data should be identical (with non-native endianness) */
+ ASSERT_OK(memcmp(raw_data, swap_raw_data, raw_sz), "mem_identical");
+
+ /* make sure that at least BTF header data is really swapped */
+ hdr = swap_raw_data;
+ ASSERT_EQ(bswap_16(hdr->magic), BTF_MAGIC, "btf_magic_swapped");
+ ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+ /* swap it back to native endianness */
+ btf__set_endianness(swap_btf, endian);
+ swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+ if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+ goto err_out;
+
+ /* now header should have native BTF_MAGIC */
+ hdr = swap_raw_data;
+ ASSERT_EQ(hdr->magic, BTF_MAGIC, "btf_magic_native");
+ ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+ /* now modify original BTF */
+ var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1);
+ CHECK(var_id <= 0, "var_id", "failed %d\n", var_id);
+
+ btf__free(swap_btf);
+ swap_btf = NULL;
+
+ btf__set_endianness(btf, swap_endian);
+ raw_data = btf__get_raw_data(btf, &raw_sz);
+ if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+ goto err_out;
+
+ /* and re-open swapped raw data again */
+ swap_btf = btf__new(raw_data, raw_sz);
+ if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+ goto err_out;
+
+ ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+ ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+
+ /* the type should appear as if it was stored in native endianness */
+ t = btf__type_by_id(swap_btf, var_id);
+ ASSERT_STREQ(btf__str_by_offset(swap_btf, t->name_off), "some_var", "var_name");
+ ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_linkage");
+ ASSERT_EQ(t->type, 1, "var_type");
+
+err_out:
+ btf__free(btf);
+ btf__free(swap_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
index 6ccecbd39476..76ebe4c250f1 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -53,12 +53,12 @@ static int kern_sync_rcu(void)
return err;
}
-void test_btf_map_in_map(void)
+static void test_lookup_update(void)
{
- int err, key = 0, val, i;
+ int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id;
+ int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd;
struct test_btf_map_in_map *skel;
- int outer_arr_fd, outer_hash_fd;
- int fd, map1_fd, map2_fd, map1_id, map2_id;
+ int err, key = 0, val, i, fd;
skel = test_btf_map_in_map__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
@@ -70,32 +70,45 @@ void test_btf_map_in_map(void)
map1_fd = bpf_map__fd(skel->maps.inner_map1);
map2_fd = bpf_map__fd(skel->maps.inner_map2);
+ map3_fd = bpf_map__fd(skel->maps.inner_map3);
+ map4_fd = bpf_map__fd(skel->maps.inner_map4);
+ map5_fd = bpf_map__fd(skel->maps.inner_map5);
+ outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn);
outer_arr_fd = bpf_map__fd(skel->maps.outer_arr);
outer_hash_fd = bpf_map__fd(skel->maps.outer_hash);
- /* inner1 = input, inner2 = input + 1 */
- map1_fd = bpf_map__fd(skel->maps.inner_map1);
+ /* inner1 = input, inner2 = input + 1, inner3 = input + 2 */
bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0);
- map2_fd = bpf_map__fd(skel->maps.inner_map2);
bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0);
+ bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0);
skel->bss->input = 1;
usleep(1);
-
bpf_map_lookup_elem(map1_fd, &key, &val);
CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
bpf_map_lookup_elem(map2_fd, &key, &val);
CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
+ bpf_map_lookup_elem(map3_fd, &key, &val);
+ CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3);
- /* inner1 = input + 1, inner2 = input */
+ /* inner2 = input, inner1 = input + 1, inner4 = input + 2 */
bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0);
bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0);
+ bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0);
skel->bss->input = 3;
usleep(1);
-
bpf_map_lookup_elem(map1_fd, &key, &val);
CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
bpf_map_lookup_elem(map2_fd, &key, &val);
CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
+ bpf_map_lookup_elem(map4_fd, &key, &val);
+ CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5);
+
+ /* inner5 = input + 2 */
+ bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0);
+ skel->bss->input = 5;
+ usleep(1);
+ bpf_map_lookup_elem(map5_fd, &key, &val);
+ CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7);
for (i = 0; i < 5; i++) {
val = i % 2 ? map1_fd : map2_fd;
@@ -106,7 +119,13 @@ void test_btf_map_in_map(void)
}
err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0);
if (CHECK_FAIL(err)) {
- printf("failed to update hash_of_maps on iter #%d\n", i);
+ printf("failed to update array_of_maps on iter #%d\n", i);
+ goto cleanup;
+ }
+ val = i % 2 ? map4_fd : map5_fd;
+ err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0);
+ if (CHECK_FAIL(err)) {
+ printf("failed to update array_of_maps (dyn) on iter #%d\n", i);
goto cleanup;
}
}
@@ -143,3 +162,36 @@ void test_btf_map_in_map(void)
cleanup:
test_btf_map_in_map__destroy(skel);
}
+
+static void test_diff_size(void)
+{
+ struct test_btf_map_in_map *skel;
+ int err, inner_map_fd, zero = 0;
+
+ skel = test_btf_map_in_map__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+ return;
+
+ inner_map_fd = bpf_map__fd(skel->maps.sockarr_sz2);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr), &zero,
+ &inner_map_fd, 0);
+ CHECK(err, "outer_sockarr inner map size check",
+ "cannot use a different size inner_map\n");
+
+ inner_map_fd = bpf_map__fd(skel->maps.inner_map_sz2);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &zero,
+ &inner_map_fd, 0);
+ CHECK(!err, "outer_arr inner map size check",
+ "incorrectly updated with a different size inner_map\n");
+
+ test_btf_map_in_map__destroy(skel);
+}
+
+void test_btf_map_in_map(void)
+{
+ if (test__start_subtest("lookup_update"))
+ test_lookup_update();
+
+ if (test__start_subtest("diff_size"))
+ test_diff_size();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
new file mode 100644
index 000000000000..86ccf37e26b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <linux/compiler.h>
+#include <bpf/libbpf.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_btf_skc_cls_ingress.skel.h"
+
+static struct test_btf_skc_cls_ingress *skel;
+struct sockaddr_in6 srv_sa6;
+static __u32 duration;
+
+#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
+
+static int write_sysctl(const char *sysctl, const char *value)
+{
+ int fd, err, len;
+
+ fd = open(sysctl, O_WRONLY);
+ if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
+ sysctl, strerror(errno), errno))
+ return -1;
+
+ len = strlen(value);
+ err = write(fd, value, len);
+ close(fd);
+ if (CHECK(err != len, "write sysctl",
+ "write(%s, %s, %d): err:%d %s (%d)\n",
+ sysctl, value, len, err, strerror(errno), errno))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_netns(void)
+{
+ if (CHECK(unshare(CLONE_NEWNET), "create netns",
+ "unshare(CLONE_NEWNET): %s (%d)",
+ strerror(errno), errno))
+ return -1;
+
+ if (CHECK(system("ip link set dev lo up"),
+ "ip link set dev lo up", "failed\n"))
+ return -1;
+
+ if (CHECK(system("tc qdisc add dev lo clsact"),
+ "tc qdisc add dev lo clsact", "failed\n"))
+ return -1;
+
+ if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE),
+ "install tc cls-prog at ingress", "failed\n"))
+ return -1;
+
+ /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
+ * bpf_tcp_gen_syncookie() helper.
+ */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") ||
+ write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") ||
+ write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1"))
+ return -1;
+
+ return 0;
+}
+
+static void reset_test(void)
+{
+ memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6));
+ skel->bss->listen_tp_sport = 0;
+ skel->bss->req_sk_sport = 0;
+ skel->bss->recv_cookie = 0;
+ skel->bss->gen_cookie = 0;
+ skel->bss->linum = 0;
+}
+
+static void print_err_line(void)
+{
+ if (skel->bss->linum)
+ printf("bpf prog error at line %u\n", skel->bss->linum);
+}
+
+static void test_conn(void)
+{
+ int listen_fd = -1, cli_fd = -1, err;
+ socklen_t addrlen = sizeof(srv_sa6);
+ int srv_port;
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK_FAIL(listen_fd == -1))
+ return;
+
+ err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+ if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+ errno))
+ goto done;
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+ srv_port = ntohs(srv_sa6.sin6_port);
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
+ if (CHECK(skel->bss->listen_tp_sport != srv_port ||
+ skel->bss->req_sk_sport != srv_port,
+ "Unexpected sk src port",
+ "listen_tp_sport:%u req_sk_sport:%u expected:%u\n",
+ skel->bss->listen_tp_sport, skel->bss->req_sk_sport,
+ srv_port))
+ goto done;
+
+ if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie,
+ "Unexpected syncookie states",
+ "gen_cookie:%u recv_cookie:%u\n",
+ skel->bss->gen_cookie, skel->bss->recv_cookie))
+ goto done;
+
+ CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+ skel->bss->linum);
+
+done:
+ if (listen_fd != -1)
+ close(listen_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
+
+static void test_syncookie(void)
+{
+ int listen_fd = -1, cli_fd = -1, err;
+ socklen_t addrlen = sizeof(srv_sa6);
+ int srv_port;
+
+ /* Enforce syncookie mode */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+ return;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK_FAIL(listen_fd == -1))
+ return;
+
+ err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+ if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+ errno))
+ goto done;
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+ srv_port = ntohs(srv_sa6.sin6_port);
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
+ if (CHECK(skel->bss->listen_tp_sport != srv_port,
+ "Unexpected tp src port",
+ "listen_tp_sport:%u expected:%u\n",
+ skel->bss->listen_tp_sport, srv_port))
+ goto done;
+
+ if (CHECK(skel->bss->req_sk_sport,
+ "Unexpected req_sk src port",
+ "req_sk_sport:%u expected:0\n",
+ skel->bss->req_sk_sport))
+ goto done;
+
+ if (CHECK(!skel->bss->gen_cookie ||
+ skel->bss->gen_cookie != skel->bss->recv_cookie,
+ "Unexpected syncookie states",
+ "gen_cookie:%u recv_cookie:%u\n",
+ skel->bss->gen_cookie, skel->bss->recv_cookie))
+ goto done;
+
+ CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+ skel->bss->linum);
+
+done:
+ if (listen_fd != -1)
+ close(listen_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
+
+struct test {
+ const char *desc;
+ void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, test_##name }
+static struct test tests[] = {
+ DEF_TEST(conn),
+ DEF_TEST(syncookie),
+};
+
+void test_btf_skc_cls_ingress(void)
+{
+ int i, err;
+
+ skel = test_btf_skc_cls_ingress__open_and_load();
+ if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
+ return;
+
+ err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE);
+ if (CHECK(err, "bpf_program__pin",
+ "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) {
+ test_btf_skc_cls_ingress__destroy(skel);
+ return;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (!test__start_subtest(tests[i].desc))
+ continue;
+
+ if (prepare_netns())
+ break;
+
+ tests[i].run();
+
+ print_err_line();
+ reset_test();
+ }
+
+ bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE);
+ test_btf_skc_cls_ingress__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
new file mode 100644
index 000000000000..314e1e7c36df
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void test_btf_write() {
+ const struct btf_var_secinfo *vi;
+ const struct btf_type *t;
+ const struct btf_member *m;
+ const struct btf_enum *v;
+ const struct btf_param *p;
+ struct btf *btf;
+ int id, err, str_off;
+
+ btf = btf__new_empty();
+ if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+ return;
+
+ str_off = btf__find_str(btf, "int");
+ ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off");
+
+ str_off = btf__add_str(btf, "int");
+ ASSERT_EQ(str_off, 1, "int_str_off");
+
+ str_off = btf__find_str(btf, "int");
+ ASSERT_EQ(str_off, 1, "int_str_found_off");
+
+ /* BTF_KIND_INT */
+ id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(id, 1, "int_id");
+
+ t = btf__type_by_id(btf, 1);
+ /* should re-use previously added "int" string */
+ ASSERT_EQ(t->name_off, str_off, "int_name_off");
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "int", "int_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_INT, "int_kind");
+ ASSERT_EQ(t->size, 4, "int_sz");
+ ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc");
+ ASSERT_EQ(btf_int_bits(t), 32, "int_bits");
+
+ /* invalid int size */
+ id = btf__add_int(btf, "bad sz int", 7, 0);
+ ASSERT_ERR(id, "int_bad_sz");
+ /* invalid encoding */
+ id = btf__add_int(btf, "bad enc int", 4, 123);
+ ASSERT_ERR(id, "int_bad_enc");
+ /* NULL name */
+ id = btf__add_int(btf, NULL, 4, 0);
+ ASSERT_ERR(id, "int_bad_null_name");
+ /* empty name */
+ id = btf__add_int(btf, "", 4, 0);
+ ASSERT_ERR(id, "int_bad_empty_name");
+
+ /* PTR/CONST/VOLATILE/RESTRICT */
+ id = btf__add_ptr(btf, 1);
+ ASSERT_EQ(id, 2, "ptr_id");
+ t = btf__type_by_id(btf, 2);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind");
+ ASSERT_EQ(t->type, 1, "ptr_type");
+
+ id = btf__add_const(btf, 5); /* points forward to restrict */
+ ASSERT_EQ(id, 3, "const_id");
+ t = btf__type_by_id(btf, 3);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind");
+ ASSERT_EQ(t->type, 5, "const_type");
+
+ id = btf__add_volatile(btf, 3);
+ ASSERT_EQ(id, 4, "volatile_id");
+ t = btf__type_by_id(btf, 4);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind");
+ ASSERT_EQ(t->type, 3, "volatile_type");
+
+ id = btf__add_restrict(btf, 4);
+ ASSERT_EQ(id, 5, "restrict_id");
+ t = btf__type_by_id(btf, 5);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind");
+ ASSERT_EQ(t->type, 4, "restrict_type");
+
+ /* ARRAY */
+ id = btf__add_array(btf, 1, 2, 10); /* int *[10] */
+ ASSERT_EQ(id, 6, "array_id");
+ t = btf__type_by_id(btf, 6);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ARRAY, "array_kind");
+ ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type");
+ ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type");
+ ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems");
+
+ /* STRUCT */
+ err = btf__add_field(btf, "field", 1, 0, 0);
+ ASSERT_ERR(err, "no_struct_field");
+ id = btf__add_struct(btf, "s1", 8);
+ ASSERT_EQ(id, 7, "struct_id");
+ err = btf__add_field(btf, "f1", 1, 0, 0);
+ ASSERT_OK(err, "f1_res");
+ err = btf__add_field(btf, "f2", 1, 32, 16);
+ ASSERT_OK(err, "f2_res");
+
+ t = btf__type_by_id(btf, 7);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "s1", "struct_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_STRUCT, "struct_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "struct_vlen");
+ ASSERT_EQ(btf_kflag(t), true, "struct_kflag");
+ ASSERT_EQ(t->size, 8, "struct_sz");
+ m = btf_members(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+ ASSERT_EQ(m->type, 1, "f1_type");
+ ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+ ASSERT_EQ(btf_member_bitfield_size(t, 0), 0, "f1_bit_sz");
+ m = btf_members(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f2", "f2_name");
+ ASSERT_EQ(m->type, 1, "f2_type");
+ ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off");
+ ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz");
+
+ /* UNION */
+ id = btf__add_union(btf, "u1", 8);
+ ASSERT_EQ(id, 8, "union_id");
+
+ /* invalid, non-zero offset */
+ err = btf__add_field(btf, "field", 1, 1, 0);
+ ASSERT_ERR(err, "no_struct_field");
+
+ err = btf__add_field(btf, "f1", 1, 0, 16);
+ ASSERT_OK(err, "f1_res");
+
+ t = btf__type_by_id(btf, 8);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "u1", "union_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_UNION, "union_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "union_vlen");
+ ASSERT_EQ(btf_kflag(t), true, "union_kflag");
+ ASSERT_EQ(t->size, 8, "union_sz");
+ m = btf_members(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+ ASSERT_EQ(m->type, 1, "f1_type");
+ ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+ ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz");
+
+ /* ENUM */
+ id = btf__add_enum(btf, "e1", 4);
+ ASSERT_EQ(id, 9, "enum_id");
+ err = btf__add_enum_value(btf, "v1", 1);
+ ASSERT_OK(err, "v1_res");
+ err = btf__add_enum_value(btf, "v2", 2);
+ ASSERT_OK(err, "v2_res");
+
+ t = btf__type_by_id(btf, 9);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "enum_vlen");
+ ASSERT_EQ(t->size, 4, "enum_sz");
+ v = btf_enum(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v1", "v1_name");
+ ASSERT_EQ(v->val, 1, "v1_val");
+ v = btf_enum(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
+ ASSERT_EQ(v->val, 2, "v2_val");
+
+ /* FWDs */
+ id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT);
+ ASSERT_EQ(id, 10, "struct_fwd_id");
+ t = btf__type_by_id(btf, 10);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+ ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag");
+
+ id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION);
+ ASSERT_EQ(id, 11, "union_fwd_id");
+ t = btf__type_by_id(btf, 11);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+ ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag");
+
+ id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM);
+ ASSERT_EQ(id, 12, "enum_fwd_id");
+ t = btf__type_by_id(btf, 12);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "enum_fwd", "fwd_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind");
+ ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
+ ASSERT_EQ(t->size, 4, "enum_fwd_sz");
+
+ /* TYPEDEF */
+ id = btf__add_typedef(btf, "typedef1", 1);
+ ASSERT_EQ(id, 13, "typedef_fwd_id");
+ t = btf__type_by_id(btf, 13);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind");
+ ASSERT_EQ(t->type, 1, "typedef_type");
+
+ /* FUNC & FUNC_PROTO */
+ id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15);
+ ASSERT_EQ(id, 14, "func_id");
+ t = btf__type_by_id(btf, 14);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "func1", "func_name");
+ ASSERT_EQ(t->type, 15, "func_type");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind");
+ ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen");
+
+ id = btf__add_func_proto(btf, 1);
+ ASSERT_EQ(id, 15, "func_proto_id");
+ err = btf__add_func_param(btf, "p1", 1);
+ ASSERT_OK(err, "p1_res");
+ err = btf__add_func_param(btf, "p2", 2);
+ ASSERT_OK(err, "p2_res");
+
+ t = btf__type_by_id(btf, 15);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC_PROTO, "func_proto_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "func_proto_vlen");
+ ASSERT_EQ(t->type, 1, "func_proto_ret_type");
+ p = btf_params(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p1", "p1_name");
+ ASSERT_EQ(p->type, 1, "p1_type");
+ p = btf_params(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name");
+ ASSERT_EQ(p->type, 2, "p2_type");
+
+ /* VAR */
+ id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1);
+ ASSERT_EQ(id, 16, "var_id");
+ t = btf__type_by_id(btf, 16);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "var1", "var_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind");
+ ASSERT_EQ(t->type, 1, "var_type");
+ ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type");
+
+ /* DATASECT */
+ id = btf__add_datasec(btf, "datasec1", 12);
+ ASSERT_EQ(id, 17, "datasec_id");
+ err = btf__add_datasec_var_info(btf, 1, 4, 8);
+ ASSERT_OK(err, "v1_res");
+
+ t = btf__type_by_id(btf, 17);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "datasec1", "datasec_name");
+ ASSERT_EQ(t->size, 12, "datasec_sz");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_DATASEC, "datasec_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "datasec_vlen");
+ vi = btf_var_secinfos(t) + 0;
+ ASSERT_EQ(vi->type, 1, "v1_type");
+ ASSERT_EQ(vi->offset, 4, "v1_off");
+ ASSERT_EQ(vi->size, 8, "v1_sz");
+
+ btf__free(btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index f259085cca6a..9781d85cb223 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -12,10 +12,13 @@
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
+#include "test_cls_redirect_subprogs.skel.h"
#define ENCAP_IP INADDR_LOOPBACK
#define ENCAP_PORT (1234)
+static int duration = 0;
+
struct addr_port {
in_port_t port;
union {
@@ -361,30 +364,18 @@ static void close_fds(int *fds, int n)
close(fds[i]);
}
-void test_cls_redirect(void)
+static void test_cls_redirect_common(struct bpf_program *prog)
{
- struct test_cls_redirect *skel = NULL;
struct bpf_prog_test_run_attr tattr = {};
int families[] = { AF_INET, AF_INET6 };
struct sockaddr_storage ss;
struct sockaddr *addr;
socklen_t slen;
int i, j, err;
-
int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
- skel = test_cls_redirect__open();
- if (CHECK_FAIL(!skel))
- return;
-
- skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
- skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
-
- if (CHECK_FAIL(test_cls_redirect__load(skel)))
- goto cleanup;
-
addr = (struct sockaddr *)&ss;
for (i = 0; i < ARRAY_SIZE(families); i++) {
slen = prepare_addr(&ss, families[i]);
@@ -402,7 +393,7 @@ void test_cls_redirect(void)
goto cleanup;
}
- tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect);
+ tattr.prog_fd = bpf_program__fd(prog);
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct test_cfg *test = &tests[i];
@@ -450,7 +441,58 @@ void test_cls_redirect(void)
}
cleanup:
- test_cls_redirect__destroy(skel);
close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
}
+
+static void test_cls_redirect_inlined(void)
+{
+ struct test_cls_redirect *skel;
+ int err;
+
+ skel = test_cls_redirect__open();
+ if (CHECK(!skel, "skel_open", "failed\n"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect__destroy(skel);
+}
+
+static void test_cls_redirect_subprogs(void)
+{
+ struct test_cls_redirect_subprogs *skel;
+ int err;
+
+ skel = test_cls_redirect_subprogs__open();
+ if (CHECK(!skel, "skel_open", "failed\n"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect_subprogs__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect_subprogs__destroy(skel);
+}
+
+void test_cls_redirect(void)
+{
+ if (test__start_subtest("cls_redirect_inlined"))
+ test_cls_redirect_inlined();
+ if (test__start_subtest("cls_redirect_subprogs"))
+ test_cls_redirect_subprogs();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
new file mode 100644
index 000000000000..981c251453d9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+/* real layout and sizes according to test's (32-bit) BTF
+ * needs to be defined before skeleton is included */
+struct test_struct___real {
+ unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
+ unsigned int val2;
+ unsigned long long val1;
+ unsigned short val3;
+ unsigned char val4;
+ unsigned char _pad;
+};
+
+#include "test_core_autosize.skel.h"
+
+static int duration = 0;
+
+static struct {
+ unsigned long long ptr_samesized;
+ unsigned long long val1_samesized;
+ unsigned long long val2_samesized;
+ unsigned long long val3_samesized;
+ unsigned long long val4_samesized;
+ struct test_struct___real output_samesized;
+
+ unsigned long long ptr_downsized;
+ unsigned long long val1_downsized;
+ unsigned long long val2_downsized;
+ unsigned long long val3_downsized;
+ unsigned long long val4_downsized;
+ struct test_struct___real output_downsized;
+
+ unsigned long long ptr_probed;
+ unsigned long long val1_probed;
+ unsigned long long val2_probed;
+ unsigned long long val3_probed;
+ unsigned long long val4_probed;
+
+ unsigned long long ptr_signed;
+ unsigned long long val1_signed;
+ unsigned long long val2_signed;
+ unsigned long long val3_signed;
+ unsigned long long val4_signed;
+ struct test_struct___real output_signed;
+} out;
+
+void test_core_autosize(void)
+{
+ char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
+ int err, fd = -1, zero = 0;
+ int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+ struct test_core_autosize* skel = NULL;
+ struct bpf_object_load_attr load_attr = {};
+ struct bpf_program *prog;
+ struct bpf_map *bss_map;
+ struct btf *btf = NULL;
+ size_t written;
+ const void *raw_data;
+ __u32 raw_sz;
+ FILE *f = NULL;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ return;
+ /* Emit the following struct with 32-bit pointer size:
+ *
+ * struct test_struct {
+ * void *ptr;
+ * unsigned long val2;
+ * unsigned long long val1;
+ * unsigned short val3;
+ * unsigned char val4;
+ * char: 8;
+ * };
+ *
+ * This struct is going to be used as the "kernel BTF" for this test.
+ * It's equivalent memory-layout-wise to test_struct__real above.
+ */
+
+ /* force 32-bit pointer size */
+ btf__set_pointer_size(btf, 4);
+
+ char_id = btf__add_int(btf, "unsigned char", 1, 0);
+ ASSERT_EQ(char_id, 1, "char_id");
+ short_id = btf__add_int(btf, "unsigned short", 2, 0);
+ ASSERT_EQ(short_id, 2, "short_id");
+ /* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */
+ int_id = btf__add_int(btf, "long unsigned int", 4, 0);
+ ASSERT_EQ(int_id, 3, "int_id");
+ long_long_id = btf__add_int(btf, "unsigned long long", 8, 0);
+ ASSERT_EQ(long_long_id, 4, "long_long_id");
+ void_ptr_id = btf__add_ptr(btf, 0);
+ ASSERT_EQ(void_ptr_id, 5, "void_ptr_id");
+
+ id = btf__add_struct(btf, "test_struct", 20 /* bytes */);
+ ASSERT_EQ(id, 6, "struct_id");
+ err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0);
+ err = err ?: btf__add_field(btf, "val2", int_id, 32, 0);
+ err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0);
+ err = err ?: btf__add_field(btf, "val3", short_id, 128, 0);
+ err = err ?: btf__add_field(btf, "val4", char_id, 144, 0);
+ ASSERT_OK(err, "struct_fields");
+
+ fd = mkstemp(btf_file);
+ if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd))
+ goto cleanup;
+ f = fdopen(fd, "w");
+ if (!ASSERT_OK_PTR(f, "btf_fdopen"))
+ goto cleanup;
+
+ raw_data = btf__get_raw_data(btf, &raw_sz);
+ if (!ASSERT_OK_PTR(raw_data, "raw_data"))
+ goto cleanup;
+ written = fwrite(raw_data, 1, raw_sz, f);
+ if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno))
+ goto cleanup;
+ fflush(f);
+ fclose(f);
+ f = NULL;
+ close(fd);
+ fd = -1;
+
+ /* open and load BPF program with custom BTF as the kernel BTF */
+ skel = test_core_autosize__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ /* disable handle_signed() for now */
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ bpf_program__set_autoload(prog, false);
+
+ load_attr.obj = skel->obj;
+ load_attr.target_btf_path = btf_file;
+ err = bpf_object__load_xattr(&load_attr);
+ if (!ASSERT_OK(err, "prog_load"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ skel->links.handle_samesize = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ skel->links.handle_downsize = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_probed");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ skel->links.handle_probed = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach"))
+ goto cleanup;
+
+ usleep(1);
+
+ bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss");
+ if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
+ goto cleanup;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out);
+ if (!ASSERT_OK(err, "bss_lookup"))
+ goto cleanup;
+
+ ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized");
+ ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized");
+ ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized");
+ ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized");
+ ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized");
+ ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized");
+ ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized");
+ ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized");
+ ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized");
+ ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized");
+
+ ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized");
+ ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized");
+ ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized");
+ ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized");
+ ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized");
+ ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized");
+ ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized");
+ ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized");
+ ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized");
+ ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized");
+
+ ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed");
+ ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed");
+ ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed");
+ ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed");
+ ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed");
+
+ test_core_autosize__destroy(skel);
+ skel = NULL;
+
+ /* now re-load with handle_signed() enabled, it should fail loading */
+ skel = test_core_autosize__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ load_attr.obj = skel->obj;
+ load_attr.target_btf_path = btf_file;
+ err = bpf_object__load_xattr(&load_attr);
+ if (!ASSERT_ERR(err, "bad_prog_load"))
+ goto cleanup;
+
+cleanup:
+ if (f)
+ fclose(f);
+ if (fd >= 0)
+ close(fd);
+ remove(btf_file);
+ btf__free(btf);
+ test_core_autosize__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index a54eafc5e4b3..30e40ff4b0d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -3,6 +3,9 @@
#include "progs/core_reloc_types.h"
#include <sys/mman.h>
#include <sys/syscall.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
@@ -177,14 +180,13 @@
.fails = true, \
}
-#define EXISTENCE_CASE_COMMON(name) \
+#define FIELD_EXISTS_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_existence.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
- .relaxed_core_relocs = true
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
-#define EXISTENCE_ERR_CASE(name) { \
- EXISTENCE_CASE_COMMON(name), \
+#define FIELD_EXISTS_ERR_CASE(name) { \
+ FIELD_EXISTS_CASE_COMMON(name), \
.fails = true, \
}
@@ -253,6 +255,61 @@
.fails = true, \
}
+#define TYPE_BASED_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_type_based.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define TYPE_BASED_CASE(name, ...) { \
+ TYPE_BASED_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_type_based_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_type_based_output), \
+}
+
+#define TYPE_BASED_ERR_CASE(name) { \
+ TYPE_BASED_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define TYPE_ID_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_type_id.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define TYPE_ID_CASE(name, setup_fn) { \
+ TYPE_ID_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_type_id_output) {}, \
+ .output_len = sizeof(struct core_reloc_type_id_output), \
+ .setup = setup_fn, \
+}
+
+#define TYPE_ID_ERR_CASE(name) { \
+ TYPE_ID_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define ENUMVAL_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_enumval.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define ENUMVAL_CASE(name, ...) { \
+ ENUMVAL_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_enumval_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_enumval_output), \
+}
+
+#define ENUMVAL_ERR_CASE(name) { \
+ ENUMVAL_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+struct core_reloc_test_case;
+
+typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
+
struct core_reloc_test_case {
const char *case_name;
const char *bpf_obj_file;
@@ -264,8 +321,136 @@ struct core_reloc_test_case {
bool fails;
bool relaxed_core_relocs;
bool direct_raw_tp;
+ setup_test_fn setup;
};
+static int find_btf_type(const struct btf *btf, const char *name, __u32 kind)
+{
+ int id;
+
+ id = btf__find_by_name_kind(btf, name, kind);
+ if (CHECK(id <= 0, "find_type_id", "failed to find '%s', kind %d: %d\n", name, kind, id))
+ return -1;
+
+ return id;
+}
+
+static int setup_type_id_case_local(struct core_reloc_test_case *test)
+{
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ struct btf *local_btf = btf__parse(test->bpf_obj_file, NULL);
+ struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+ const struct btf_type *t;
+ const char *name;
+ int i;
+
+ if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
+ CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+ btf__free(local_btf);
+ btf__free(targ_btf);
+ return -EINVAL;
+ }
+
+ exp->local_anon_struct = -1;
+ exp->local_anon_union = -1;
+ exp->local_anon_enum = -1;
+ exp->local_anon_func_proto_ptr = -1;
+ exp->local_anon_void_ptr = -1;
+ exp->local_anon_arr = -1;
+
+ for (i = 1; i <= btf__get_nr_types(local_btf); i++)
+ {
+ t = btf__type_by_id(local_btf, i);
+ /* we are interested only in anonymous types */
+ if (t->name_off)
+ continue;
+
+ if (btf_is_struct(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+ strcmp(name, "marker_field") == 0) {
+ exp->local_anon_struct = i;
+ } else if (btf_is_union(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+ strcmp(name, "marker_field") == 0) {
+ exp->local_anon_union = i;
+ } else if (btf_is_enum(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_enum(t)[0].name_off)) &&
+ strcmp(name, "MARKER_ENUM_VAL") == 0) {
+ exp->local_anon_enum = i;
+ } else if (btf_is_ptr(t) && (t = btf__type_by_id(local_btf, t->type))) {
+ if (btf_is_func_proto(t) && (t = btf__type_by_id(local_btf, t->type)) &&
+ btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+ strcmp(name, "_Bool") == 0) {
+ /* ptr -> func_proto -> _Bool */
+ exp->local_anon_func_proto_ptr = i;
+ } else if (btf_is_void(t)) {
+ /* ptr -> void */
+ exp->local_anon_void_ptr = i;
+ }
+ } else if (btf_is_array(t) && (t = btf__type_by_id(local_btf, btf_array(t)->type)) &&
+ btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+ strcmp(name, "_Bool") == 0) {
+ /* _Bool[] */
+ exp->local_anon_arr = i;
+ }
+ }
+
+ exp->local_struct = find_btf_type(local_btf, "a_struct", BTF_KIND_STRUCT);
+ exp->local_union = find_btf_type(local_btf, "a_union", BTF_KIND_UNION);
+ exp->local_enum = find_btf_type(local_btf, "an_enum", BTF_KIND_ENUM);
+ exp->local_int = find_btf_type(local_btf, "int", BTF_KIND_INT);
+ exp->local_struct_typedef = find_btf_type(local_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+ exp->local_func_proto_typedef = find_btf_type(local_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+ exp->local_arr_typedef = find_btf_type(local_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+ btf__free(local_btf);
+ btf__free(targ_btf);
+ return 0;
+}
+
+static int setup_type_id_case_success(struct core_reloc_test_case *test) {
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+ int err;
+
+ err = setup_type_id_case_local(test);
+ if (err)
+ return err;
+
+ targ_btf = btf__parse(test->btf_src_file, NULL);
+
+ exp->targ_struct = find_btf_type(targ_btf, "a_struct", BTF_KIND_STRUCT);
+ exp->targ_union = find_btf_type(targ_btf, "a_union", BTF_KIND_UNION);
+ exp->targ_enum = find_btf_type(targ_btf, "an_enum", BTF_KIND_ENUM);
+ exp->targ_int = find_btf_type(targ_btf, "int", BTF_KIND_INT);
+ exp->targ_struct_typedef = find_btf_type(targ_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+ exp->targ_func_proto_typedef = find_btf_type(targ_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+ exp->targ_arr_typedef = find_btf_type(targ_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+ btf__free(targ_btf);
+ return 0;
+}
+
+static int setup_type_id_case_failure(struct core_reloc_test_case *test)
+{
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ int err;
+
+ err = setup_type_id_case_local(test);
+ if (err)
+ return err;
+
+ exp->targ_struct = 0;
+ exp->targ_union = 0;
+ exp->targ_enum = 0;
+ exp->targ_int = 0;
+ exp->targ_struct_typedef = 0;
+ exp->targ_func_proto_typedef = 0;
+ exp->targ_arr_typedef = 0;
+
+ return 0;
+}
+
static struct core_reloc_test_case test_cases[] = {
/* validate we can find kernel image and use its BTF for relocs */
{
@@ -364,7 +549,7 @@ static struct core_reloc_test_case test_cases[] = {
/* validate field existence checks */
{
- EXISTENCE_CASE_COMMON(existence),
+ FIELD_EXISTS_CASE_COMMON(existence),
.input = STRUCT_TO_CHAR_PTR(core_reloc_existence) {
.a = 1,
.b = 2,
@@ -388,7 +573,7 @@ static struct core_reloc_test_case test_cases[] = {
.output_len = sizeof(struct core_reloc_existence_output),
},
{
- EXISTENCE_CASE_COMMON(existence___minimal),
+ FIELD_EXISTS_CASE_COMMON(existence___minimal),
.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
.a = 42,
},
@@ -408,12 +593,12 @@ static struct core_reloc_test_case test_cases[] = {
.output_len = sizeof(struct core_reloc_existence_output),
},
- EXISTENCE_ERR_CASE(existence__err_int_sz),
- EXISTENCE_ERR_CASE(existence__err_int_type),
- EXISTENCE_ERR_CASE(existence__err_int_kind),
- EXISTENCE_ERR_CASE(existence__err_arr_kind),
- EXISTENCE_ERR_CASE(existence__err_arr_value_type),
- EXISTENCE_ERR_CASE(existence__err_struct_type),
+ FIELD_EXISTS_ERR_CASE(existence__err_int_sz),
+ FIELD_EXISTS_ERR_CASE(existence__err_int_type),
+ FIELD_EXISTS_ERR_CASE(existence__err_int_kind),
+ FIELD_EXISTS_ERR_CASE(existence__err_arr_kind),
+ FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type),
+ FIELD_EXISTS_ERR_CASE(existence__err_struct_type),
/* bitfield relocation checks */
BITFIELDS_CASE(bitfields, {
@@ -452,11 +637,117 @@ static struct core_reloc_test_case test_cases[] = {
/* size relocation checks */
SIZE_CASE(size),
SIZE_CASE(size___diff_sz),
+ SIZE_ERR_CASE(size___err_ambiguous),
+
+ /* validate type existence and size relocations */
+ TYPE_BASED_CASE(type_based, {
+ .struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+ .struct_sz = sizeof(struct a_struct),
+ .union_sz = sizeof(union a_union),
+ .enum_sz = sizeof(enum an_enum),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef),
+ .typedef_int_sz = sizeof(int_typedef),
+ .typedef_enum_sz = sizeof(enum_typedef),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef),
+ .typedef_arr_sz = sizeof(arr_typedef),
+ }),
+ TYPE_BASED_CASE(type_based___all_missing, {
+ /* all zeros */
+ }),
+ TYPE_BASED_CASE(type_based___diff_sz, {
+ .struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+ .struct_sz = sizeof(struct a_struct___diff_sz),
+ .union_sz = sizeof(union a_union___diff_sz),
+ .enum_sz = sizeof(enum an_enum___diff_sz),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef___diff_sz),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff_sz),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff_sz),
+ .typedef_int_sz = sizeof(int_typedef___diff_sz),
+ .typedef_enum_sz = sizeof(enum_typedef___diff_sz),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff_sz),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef___diff_sz),
+ .typedef_arr_sz = sizeof(arr_typedef___diff_sz),
+ }),
+ TYPE_BASED_CASE(type_based___incompat, {
+ .enum_exists = 1,
+ .enum_sz = sizeof(enum an_enum),
+ }),
+ TYPE_BASED_CASE(type_based___fn_wrong_args, {
+ .struct_exists = 1,
+ .struct_sz = sizeof(struct a_struct),
+ }),
+
+ /* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */
+ TYPE_ID_CASE(type_id, setup_type_id_case_success),
+ TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure),
+
+ /* Enumerator value existence and value relocations */
+ ENUMVAL_CASE(enumval, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = true,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = true,
+ .named_val1 = 1,
+ .named_val2 = 2,
+ .anon_val1 = 0x10,
+ .anon_val2 = 0x20,
+ }),
+ ENUMVAL_CASE(enumval___diff, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = true,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = true,
+ .named_val1 = 101,
+ .named_val2 = 202,
+ .anon_val1 = 0x11,
+ .anon_val2 = 0x22,
+ }),
+ ENUMVAL_CASE(enumval___val3_missing, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = false,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = false,
+ .named_val1 = 111,
+ .named_val2 = 222,
+ .anon_val1 = 0x111,
+ .anon_val2 = 0x222,
+ }),
+ ENUMVAL_ERR_CASE(enumval___err_missing),
};
struct data {
char in[256];
char out[256];
+ bool skip;
uint64_t my_pid_tgid;
};
@@ -472,7 +763,7 @@ void test_core_reloc(void)
struct bpf_object_load_attr load_attr = {};
struct core_reloc_test_case *test_case;
const char *tp_name, *probe_name;
- int err, duration = 0, i, equal;
+ int err, i, equal;
struct bpf_link *link = NULL;
struct bpf_map *data_map;
struct bpf_program *prog;
@@ -488,11 +779,13 @@ void test_core_reloc(void)
if (!test__start_subtest(test_case->case_name))
continue;
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .relaxed_core_relocs = test_case->relaxed_core_relocs,
- );
+ if (test_case->setup) {
+ err = test_case->setup(test_case);
+ if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
+ continue;
+ }
- obj = bpf_object__open_file(test_case->bpf_obj_file, &opts);
+ obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
test_case->bpf_obj_file, PTR_ERR(obj)))
continue;
@@ -515,15 +808,10 @@ void test_core_reloc(void)
load_attr.log_level = 0;
load_attr.target_btf_path = test_case->btf_src_file;
err = bpf_object__load_xattr(&load_attr);
- if (test_case->fails) {
- CHECK(!err, "obj_load_fail",
- "should fail to load prog '%s'\n", probe_name);
+ if (err) {
+ if (!test_case->fails)
+ CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err);
goto cleanup;
- } else {
- if (CHECK(err, "obj_load",
- "failed to load prog '%s': %d\n",
- probe_name, err))
- goto cleanup;
}
data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
@@ -551,6 +839,16 @@ void test_core_reloc(void)
/* trigger test run */
usleep(1);
+ if (data->skip) {
+ test__skip();
+ goto cleanup;
+ }
+
+ if (test_case->fails) {
+ CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name);
+ goto cleanup;
+ }
+
equal = memcmp(data->out, test_case->output,
test_case->output_len) == 0;
if (CHECK(!equal, "check_result",
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
new file mode 100644
index 000000000000..0a577a248d34
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#define MAX_PATH_LEN 128
+#define MAX_FILES 7
+
+#include "test_d_path.skel.h"
+
+static int duration;
+
+static struct {
+ __u32 cnt;
+ char paths[MAX_FILES][MAX_PATH_LEN];
+} src;
+
+static int set_pathname(int fd, pid_t pid)
+{
+ char buf[MAX_PATH_LEN];
+
+ snprintf(buf, MAX_PATH_LEN, "/proc/%d/fd/%d", pid, fd);
+ return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
+}
+
+static int trigger_fstat_events(pid_t pid)
+{
+ int sockfd = -1, procfd = -1, devfd = -1;
+ int localfd = -1, indicatorfd = -1;
+ int pipefd[2] = { -1, -1 };
+ struct stat fileStat;
+ int ret = -1;
+
+ /* unmountable pseudo-filesystems */
+ if (CHECK(pipe(pipefd) < 0, "trigger", "pipe failed\n"))
+ return ret;
+ /* unmountable pseudo-filesystems */
+ sockfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (CHECK(sockfd < 0, "trigger", "socket failed\n"))
+ goto out_close;
+ /* mountable pseudo-filesystems */
+ procfd = open("/proc/self/comm", O_RDONLY);
+ if (CHECK(procfd < 0, "trigger", "open /proc/self/comm failed\n"))
+ goto out_close;
+ devfd = open("/dev/urandom", O_RDONLY);
+ if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n"))
+ goto out_close;
+ localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY, 0644);
+ if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n"))
+ goto out_close;
+ /* bpf_d_path will return path with (deleted) */
+ remove("/tmp/d_path_loadgen.txt");
+ indicatorfd = open("/tmp/", O_PATH);
+ if (CHECK(indicatorfd < 0, "trigger", "open /tmp/ failed\n"))
+ goto out_close;
+
+ ret = set_pathname(pipefd[0], pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[0]\n"))
+ goto out_close;
+ ret = set_pathname(pipefd[1], pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[1]\n"))
+ goto out_close;
+ ret = set_pathname(sockfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for socket\n"))
+ goto out_close;
+ ret = set_pathname(procfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for proc\n"))
+ goto out_close;
+ ret = set_pathname(devfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for dev\n"))
+ goto out_close;
+ ret = set_pathname(localfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for file\n"))
+ goto out_close;
+ ret = set_pathname(indicatorfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for dir\n"))
+ goto out_close;
+
+ /* triggers vfs_getattr */
+ fstat(pipefd[0], &fileStat);
+ fstat(pipefd[1], &fileStat);
+ fstat(sockfd, &fileStat);
+ fstat(procfd, &fileStat);
+ fstat(devfd, &fileStat);
+ fstat(localfd, &fileStat);
+ fstat(indicatorfd, &fileStat);
+
+out_close:
+ /* triggers filp_close */
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(sockfd);
+ close(procfd);
+ close(devfd);
+ close(localfd);
+ close(indicatorfd);
+ return ret;
+}
+
+void test_d_path(void)
+{
+ struct test_d_path__bss *bss;
+ struct test_d_path *skel;
+ int err;
+
+ skel = test_d_path__open_and_load();
+ if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
+ goto cleanup;
+
+ err = test_d_path__attach(skel);
+ if (CHECK(err, "setup", "attach failed: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = trigger_fstat_events(bss->my_pid);
+ if (err < 0)
+ goto cleanup;
+
+ if (CHECK(!bss->called_stat,
+ "stat",
+ "trampoline for security_inode_getattr was not called\n"))
+ goto cleanup;
+
+ if (CHECK(!bss->called_close,
+ "close",
+ "trampoline for filp_close was not called\n"))
+ goto cleanup;
+
+ for (int i = 0; i < MAX_FILES; i++) {
+ CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN),
+ "check",
+ "failed to get stat path[%d]: %s vs %s\n",
+ i, src.paths[i], bss->paths_stat[i]);
+ CHECK(strncmp(src.paths[i], bss->paths_close[i], MAX_PATH_LEN),
+ "check",
+ "failed to get close path[%d]: %s vs %s\n",
+ i, src.paths[i], bss->paths_close[i]);
+ /* The d_path helper returns size plus NUL char, hence + 1 */
+ CHECK(bss->rets_stat[i] != strlen(bss->paths_stat[i]) + 1,
+ "check",
+ "failed to match stat return [%d]: %d vs %zd [%s]\n",
+ i, bss->rets_stat[i], strlen(bss->paths_stat[i]) + 1,
+ bss->paths_stat[i]);
+ CHECK(bss->rets_close[i] != strlen(bss->paths_stat[i]) + 1,
+ "check",
+ "failed to match stat return [%d]: %d vs %zd [%s]\n",
+ i, bss->rets_close[i], strlen(bss->paths_close[i]) + 1,
+ bss->paths_stat[i]);
+ }
+
+cleanup:
+ test_d_path__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 197d0d217b56..5c0448910426 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -2,36 +2,79 @@
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include <network_helpers.h>
+#include <bpf/btf.h>
+
+typedef int (*test_cb)(struct bpf_object *obj);
+
+static int check_data_map(struct bpf_object *obj, int prog_cnt, bool reset)
+{
+ struct bpf_map *data_map = NULL, *map;
+ __u64 *result = NULL;
+ const int zero = 0;
+ __u32 duration = 0;
+ int ret = -1, i;
+
+ result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
+ if (CHECK(!result, "alloc_memory", "failed to alloc memory"))
+ return -ENOMEM;
+
+ bpf_object__for_each_map(map, obj)
+ if (bpf_map__is_internal(map)) {
+ data_map = map;
+ break;
+ }
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto out;
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
+ if (CHECK(ret, "get_result",
+ "failed to get output data: %d\n", ret))
+ goto out;
+
+ for (i = 0; i < prog_cnt; i++) {
+ if (CHECK(result[i] != 1, "result",
+ "fexit_bpf2bpf result[%d] failed err %llu\n",
+ i, result[i]))
+ goto out;
+ result[i] = 0;
+ }
+ if (reset) {
+ ret = bpf_map_update_elem(bpf_map__fd(data_map), &zero, result, 0);
+ if (CHECK(ret, "reset_result", "failed to reset result\n"))
+ goto out;
+ }
+
+ ret = 0;
+out:
+ free(result);
+ return ret;
+}
static void test_fexit_bpf2bpf_common(const char *obj_file,
const char *target_obj_file,
int prog_cnt,
const char **prog_name,
- bool run_prog)
+ bool run_prog,
+ test_cb cb)
{
- struct bpf_object *obj = NULL, *pkt_obj;
- int err, pkt_fd, i;
- struct bpf_link **link = NULL;
+ struct bpf_object *obj = NULL, *tgt_obj;
struct bpf_program **prog = NULL;
+ struct bpf_link **link = NULL;
__u32 duration = 0, retval;
- struct bpf_map *data_map;
- const int zero = 0;
- __u64 *result = NULL;
+ int err, tgt_fd, i;
err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
- &pkt_obj, &pkt_fd);
+ &tgt_obj, &tgt_fd);
if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
target_obj_file, err, errno))
return;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .attach_prog_fd = pkt_fd,
+ .attach_prog_fd = tgt_fd,
);
link = calloc(sizeof(struct bpf_link *), prog_cnt);
prog = calloc(sizeof(struct bpf_program *), prog_cnt);
- result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
- if (CHECK(!link || !prog || !result, "alloc_memory",
- "failed to alloc memory"))
+ if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory"))
goto close_prog;
obj = bpf_object__open_file(obj_file, &opts);
@@ -53,39 +96,33 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
goto close_prog;
}
- if (!run_prog)
- goto close_prog;
+ if (cb) {
+ err = cb(obj);
+ if (err)
+ goto close_prog;
+ }
- data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss");
- if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ if (!run_prog)
goto close_prog;
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
- err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
+ if (check_data_map(obj, prog_cnt, false))
goto close_prog;
- for (i = 0; i < prog_cnt; i++)
- if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %llu\n",
- result[i]))
- goto close_prog;
-
close_prog:
for (i = 0; i < prog_cnt; i++)
if (!IS_ERR_OR_NULL(link[i]))
bpf_link__destroy(link[i]);
if (!IS_ERR_OR_NULL(obj))
bpf_object__close(obj);
- bpf_object__close(pkt_obj);
+ bpf_object__close(tgt_obj);
free(link);
free(prog);
- free(result);
}
static void test_target_no_callees(void)
@@ -96,7 +133,7 @@ static void test_target_no_callees(void)
test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o",
"./test_pkt_md_access.o",
ARRAY_SIZE(prog_name),
- prog_name, true);
+ prog_name, true, NULL);
}
static void test_target_yes_callees(void)
@@ -110,7 +147,7 @@ static void test_target_yes_callees(void)
test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
"./test_pkt_access.o",
ARRAY_SIZE(prog_name),
- prog_name, true);
+ prog_name, true, NULL);
}
static void test_func_replace(void)
@@ -123,11 +160,12 @@ static void test_func_replace(void)
"freplace/get_skb_len",
"freplace/get_skb_ifindex",
"freplace/get_constant",
+ "freplace/test_pkt_write_access_subprog",
};
test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
"./test_pkt_access.o",
ARRAY_SIZE(prog_name),
- prog_name, true);
+ prog_name, true, NULL);
}
static void test_func_replace_verify(void)
@@ -138,13 +176,198 @@ static void test_func_replace_verify(void)
test_fexit_bpf2bpf_common("./freplace_connect4.o",
"./connect4_prog.o",
ARRAY_SIZE(prog_name),
- prog_name, false);
+ prog_name, false, NULL);
+}
+
+static int test_second_attach(struct bpf_object *obj)
+{
+ const char *prog_name = "freplace/get_constant";
+ const char *tgt_name = prog_name + 9; /* cut off freplace/ */
+ const char *tgt_obj_file = "./test_pkt_access.o";
+ struct bpf_program *prog = NULL;
+ struct bpf_object *tgt_obj;
+ __u32 duration = 0, retval;
+ struct bpf_link *link;
+ int err = 0, tgt_fd;
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name))
+ return -ENOENT;
+
+ err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
+ &tgt_obj, &tgt_fd);
+ if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n",
+ tgt_obj_file, err, errno))
+ return err;
+
+ link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
+ if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+ goto out;
+
+ err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ if (CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration))
+ goto out;
+
+ err = check_data_map(obj, 1, true);
+ if (err)
+ goto out;
+
+out:
+ bpf_link__destroy(link);
+ bpf_object__close(tgt_obj);
+ return err;
+}
+
+static void test_func_replace_multi(void)
+{
+ const char *prog_name[] = {
+ "freplace/get_constant",
+ };
+ test_fexit_bpf2bpf_common("./freplace_get_constant.o",
+ "./test_pkt_access.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, true, test_second_attach);
+}
+
+static void test_fmod_ret_freplace(void)
+{
+ struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL;
+ const char *freplace_name = "./freplace_get_constant.o";
+ const char *fmod_ret_name = "./fmod_ret_freplace.o";
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+ const char *tgt_name = "./test_pkt_access.o";
+ struct bpf_link *freplace_link = NULL;
+ struct bpf_program *prog;
+ __u32 duration = 0;
+ int err, pkt_fd;
+
+ err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
+ &pkt_obj, &pkt_fd);
+ /* the target prog should load fine */
+ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+ tgt_name, err, errno))
+ return;
+ opts.attach_prog_fd = pkt_fd;
+
+ freplace_obj = bpf_object__open_file(freplace_name, &opts);
+ if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
+ "failed to open %s: %ld\n", freplace_name,
+ PTR_ERR(freplace_obj)))
+ goto out;
+
+ err = bpf_object__load(freplace_obj);
+ if (CHECK(err, "freplace_obj_load", "err %d\n", err))
+ goto out;
+
+ prog = bpf_program__next(NULL, freplace_obj);
+ freplace_link = bpf_program__attach_trace(prog);
+ if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+ goto out;
+
+ opts.attach_prog_fd = bpf_program__fd(prog);
+ fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
+ if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
+ "failed to open %s: %ld\n", fmod_ret_name,
+ PTR_ERR(fmod_obj)))
+ goto out;
+
+ err = bpf_object__load(fmod_obj);
+ if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n"))
+ goto out;
+
+out:
+ bpf_link__destroy(freplace_link);
+ bpf_object__close(freplace_obj);
+ bpf_object__close(fmod_obj);
+ bpf_object__close(pkt_obj);
+}
+
+
+static void test_func_sockmap_update(void)
+{
+ const char *prog_name[] = {
+ "freplace/cls_redirect",
+ };
+ test_fexit_bpf2bpf_common("./freplace_cls_redirect.o",
+ "./test_cls_redirect.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, false, NULL);
+}
+
+static void test_obj_load_failure_common(const char *obj_file,
+ const char *target_obj_file)
+
+{
+ /*
+ * standalone test that asserts failure to load freplace prog
+ * because of invalid return code.
+ */
+ struct bpf_object *obj = NULL, *pkt_obj;
+ int err, pkt_fd;
+ __u32 duration = 0;
+
+ err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ &pkt_obj, &pkt_fd);
+ /* the target prog should load fine */
+ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+ target_obj_file, err, errno))
+ return;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .attach_prog_fd = pkt_fd,
+ );
+
+ obj = bpf_object__open_file(obj_file, &opts);
+ if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
+ "failed to open %s: %ld\n", obj_file,
+ PTR_ERR(obj)))
+ goto close_prog;
+
+ /* It should fail to load the program */
+ err = bpf_object__load(obj);
+ if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))
+ goto close_prog;
+
+close_prog:
+ if (!IS_ERR_OR_NULL(obj))
+ bpf_object__close(obj);
+ bpf_object__close(pkt_obj);
+}
+
+static void test_func_replace_return_code(void)
+{
+ /* test invalid return code in the replaced program */
+ test_obj_load_failure_common("./freplace_connect_v4_prog.o",
+ "./connect4_prog.o");
+}
+
+static void test_func_map_prog_compatibility(void)
+{
+ /* test with spin lock map value in the replaced program */
+ test_obj_load_failure_common("./freplace_attach_probe.o",
+ "./test_attach_probe.o");
}
void test_fexit_bpf2bpf(void)
{
- test_target_no_callees();
- test_target_yes_callees();
- test_func_replace();
- test_func_replace_verify();
+ if (test__start_subtest("target_no_callees"))
+ test_target_no_callees();
+ if (test__start_subtest("target_yes_callees"))
+ test_target_yes_callees();
+ if (test__start_subtest("func_replace"))
+ test_func_replace();
+ if (test__start_subtest("func_replace_verify"))
+ test_func_replace_verify();
+ if (test__start_subtest("func_sockmap_update"))
+ test_func_sockmap_update();
+ if (test__start_subtest("func_replace_return_code"))
+ test_func_replace_return_code();
+ if (test__start_subtest("func_map_prog_compatibility"))
+ test_func_map_prog_compatibility();
+ if (test__start_subtest("func_replace_multi"))
+ test_func_replace_multi();
+ if (test__start_subtest("fmod_ret_freplace"))
+ test_fmod_ret_freplace();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 3bdaa5a40744..ee46b11f1f9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -12,7 +12,8 @@ void test_global_data_init(void)
size_t sz;
obj = bpf_object__open_file(file, NULL);
- if (CHECK_FAIL(!obj))
+ err = libbpf_get_error(obj);
+ if (CHECK_FAIL(err))
return;
map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
index e3d6777226a8..b295969b263b 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -7,39 +7,28 @@
static int duration;
-static __u64 kallsyms_find(const char *sym)
-{
- char type, name[500];
- __u64 addr, res = 0;
- FILE *f;
-
- f = fopen("/proc/kallsyms", "r");
- if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno))
- return 0;
-
- while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) {
- if (strcmp(name, sym) == 0) {
- res = addr;
- goto out;
- }
- }
-
- CHECK(false, "not_found", "symbol %s not found\n", sym);
-out:
- fclose(f);
- return res;
-}
-
void test_ksyms(void)
{
- __u64 link_fops_addr = kallsyms_find("bpf_link_fops");
const char *btf_path = "/sys/kernel/btf/vmlinux";
struct test_ksyms *skel;
struct test_ksyms__data *data;
+ __u64 link_fops_addr, per_cpu_start_addr;
struct stat st;
__u64 btf_size;
int err;
+ err = kallsyms_find("bpf_link_fops", &link_fops_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
+ return;
+
+ err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
+ return;
+
if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
return;
btf_size = st.st_size;
@@ -63,8 +52,9 @@ void test_ksyms(void)
"got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
CHECK(data->out__btf_size != btf_size, "btf_size",
"got %llu, exp %llu\n", data->out__btf_size, btf_size);
- CHECK(data->out__per_cpu_start != 0, "__per_cpu_start",
- "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0);
+ CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start",
+ "got %llu, exp %llu\n", data->out__per_cpu_start,
+ per_cpu_start_addr);
cleanup:
test_ksyms__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
new file mode 100644
index 000000000000..28e26bd3e0ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "test_ksyms_btf.skel.h"
+
+static int duration;
+
+void test_ksyms_btf(void)
+{
+ __u64 runqueues_addr, bpf_prog_active_addr;
+ __u32 this_rq_cpu;
+ int this_bpf_prog_active;
+ struct test_ksyms_btf *skel = NULL;
+ struct test_ksyms_btf__data *data;
+ struct btf *btf;
+ int percpu_datasec;
+ int err;
+
+ err = kallsyms_find("runqueues", &runqueues_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n"))
+ return;
+
+ err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
+ return;
+
+ btf = libbpf_find_kernel_btf();
+ if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
+ PTR_ERR(btf)))
+ return;
+
+ percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
+ BTF_KIND_DATASEC);
+ if (percpu_datasec < 0) {
+ printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
+ __func__);
+ test__skip();
+ goto cleanup;
+ }
+
+ skel = test_ksyms_btf__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ goto cleanup;
+
+ err = test_ksyms_btf__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr",
+ "got %llu, exp %llu\n",
+ (unsigned long long)data->out__runqueues_addr,
+ (unsigned long long)runqueues_addr);
+ CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr",
+ "got %llu, exp %llu\n",
+ (unsigned long long)data->out__bpf_prog_active_addr,
+ (unsigned long long)bpf_prog_active_addr);
+
+ CHECK(data->out__rq_cpu == -1, "rq_cpu",
+ "got %u, exp != -1\n", data->out__rq_cpu);
+ CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active",
+ "got %d, exp >= 0\n", data->out__bpf_prog_active);
+ CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu",
+ "got %u, exp 0\n", data->out__cpu_0_rq_cpu);
+
+ this_rq_cpu = data->out__this_rq_cpu;
+ CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu",
+ "got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu);
+
+ this_bpf_prog_active = data->out__this_bpf_prog_active;
+ CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active",
+ "got %d, exp %d\n", this_bpf_prog_active,
+ data->out__bpf_prog_active);
+
+cleanup:
+ btf__free(btf);
+ test_ksyms_btf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index c2d373e294bb..8073105548ff 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -80,9 +80,8 @@ out:
void test_l4lb_all(void)
{
- const char *file1 = "./test_l4lb.o";
- const char *file2 = "./test_l4lb_noinline.o";
-
- test_l4lb(file1);
- test_l4lb(file2);
+ if (test__start_subtest("l4lb_inline"))
+ test_l4lb("test_l4lb.o");
+ if (test__start_subtest("l4lb_noinline"))
+ test_l4lb("test_l4lb_noinline.o");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c
new file mode 100644
index 000000000000..2c53eade88e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/metadata.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "metadata_unused.skel.h"
+#include "metadata_used.skel.h"
+
+static int duration;
+
+static int prog_holds_map(int prog_fd, int map_fd)
+{
+ struct bpf_prog_info prog_info = {};
+ struct bpf_prog_info map_info = {};
+ __u32 prog_info_len;
+ __u32 map_info_len;
+ __u32 *map_ids;
+ int nr_maps;
+ int ret;
+ int i;
+
+ map_info_len = sizeof(map_info);
+ ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+ if (ret)
+ return -errno;
+
+ prog_info_len = sizeof(prog_info);
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ return -errno;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32));
+ if (!map_ids)
+ return -ENOMEM;
+
+ nr_maps = prog_info.nr_map_ids;
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.nr_map_ids = nr_maps;
+ prog_info.map_ids = ptr_to_u64(map_ids);
+ prog_info_len = sizeof(prog_info);
+
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret) {
+ ret = -errno;
+ goto free_map_ids;
+ }
+
+ ret = -ENOENT;
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ if (map_ids[i] == map_info.id) {
+ ret = 0;
+ break;
+ }
+ }
+
+free_map_ids:
+ free(map_ids);
+ return ret;
+}
+
+static void test_metadata_unused(void)
+{
+ struct metadata_unused *obj;
+ int err;
+
+ obj = metadata_unused__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata));
+ if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+ return;
+
+ /* Assert that we can access the metadata in skel and the values are
+ * what we expect.
+ */
+ if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "foo",
+ sizeof(obj->rodata->bpf_metadata_a)),
+ "bpf_metadata_a", "expected \"foo\", value differ"))
+ goto close_bpf_object;
+ if (CHECK(obj->rodata->bpf_metadata_b != 1, "bpf_metadata_b",
+ "expected 1, got %d", obj->rodata->bpf_metadata_b))
+ goto close_bpf_object;
+
+ /* Assert that binding metadata map to prog again succeeds. */
+ err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata), NULL);
+ CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+ metadata_unused__destroy(obj);
+}
+
+static void test_metadata_used(void)
+{
+ struct metadata_used *obj;
+ int err;
+
+ obj = metadata_used__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata));
+ if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+ return;
+
+ /* Assert that we can access the metadata in skel and the values are
+ * what we expect.
+ */
+ if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "bar",
+ sizeof(obj->rodata->bpf_metadata_a)),
+ "metadata_a", "expected \"bar\", value differ"))
+ goto close_bpf_object;
+ if (CHECK(obj->rodata->bpf_metadata_b != 2, "metadata_b",
+ "expected 2, got %d", obj->rodata->bpf_metadata_b))
+ goto close_bpf_object;
+
+ /* Assert that binding metadata map to prog again succeeds. */
+ err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata), NULL);
+ CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+ metadata_used__destroy(obj);
+}
+
+void test_metadata(void)
+{
+ if (test__start_subtest("unused"))
+ test_metadata_unused();
+
+ if (test__start_subtest("used"))
+ test_metadata_used();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c
new file mode 100644
index 000000000000..673d38395253
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "test_pe_preserve_elems.skel.h"
+
+static int duration;
+
+static void test_one_map(struct bpf_map *map, struct bpf_program *prog,
+ bool has_share_pe)
+{
+ int err, key = 0, pfd = -1, mfd = bpf_map__fd(map);
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct perf_event_attr attr = {
+ .size = sizeof(struct perf_event_attr),
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ };
+
+ pfd = syscall(__NR_perf_event_open, &attr, 0 /* pid */,
+ -1 /* cpu 0 */, -1 /* group id */, 0 /* flags */);
+ if (CHECK(pfd < 0, "perf_event_open", "failed\n"))
+ return;
+
+ err = bpf_map_update_elem(mfd, &key, &pfd, BPF_ANY);
+ close(pfd);
+ if (CHECK(err < 0, "bpf_map_update_elem", "failed\n"))
+ return;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+ return;
+ if (CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+ "failed with %d\n", opts.retval))
+ return;
+
+ /* closing mfd, prog still holds a reference on map */
+ close(mfd);
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+ return;
+
+ if (has_share_pe) {
+ CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+ "failed with %d\n", opts.retval);
+ } else {
+ CHECK(opts.retval != -ENOENT, "bpf_perf_event_read_value",
+ "should have failed with %d, but got %d\n", -ENOENT,
+ opts.retval);
+ }
+}
+
+void test_pe_preserve_elems(void)
+{
+ struct test_pe_preserve_elems *skel;
+
+ skel = test_pe_preserve_elems__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ test_one_map(skel->maps.array_1, skel->progs.read_array_1, false);
+ test_one_map(skel->maps.array_2, skel->progs.read_array_2, true);
+
+ test_pe_preserve_elems__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index c33ec180b3f2..ca9f0895ec84 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -7,6 +7,8 @@
#include "test_perf_buffer.skel.h"
#include "bpf/libbpf_internal.h"
+static int duration;
+
/* AddressSanitizer sometimes crashes due to data dereference below, due to
* this being mmap()'ed memory. Disable instrumentation with
* no_sanitize_address attribute
@@ -24,13 +26,31 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
CPU_SET(cpu, cpu_seen);
}
+int trigger_on_cpu(int cpu)
+{
+ cpu_set_t cpu_set;
+ int err;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(cpu, &cpu_set);
+
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", cpu, err))
+ return err;
+
+ usleep(1);
+
+ return 0;
+}
+
void test_perf_buffer(void)
{
- int err, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
+ int err, on_len, nr_on_cpus = 0, nr_cpus, i;
struct perf_buffer_opts pb_opts = {};
struct test_perf_buffer *skel;
- cpu_set_t cpu_set, cpu_seen;
+ cpu_set_t cpu_seen;
struct perf_buffer *pb;
+ int last_fd = -1, fd;
bool *online;
nr_cpus = libbpf_num_possible_cpus();
@@ -63,6 +83,9 @@ void test_perf_buffer(void)
if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
goto out_close;
+ CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
+ "bad fd: %d\n", perf_buffer__epoll_fd(pb));
+
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
for (i = 0; i < nr_cpus; i++) {
@@ -71,16 +94,8 @@ void test_perf_buffer(void)
continue;
}
- CPU_ZERO(&cpu_set);
- CPU_SET(i, &cpu_set);
-
- err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
- &cpu_set);
- if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
- i, err))
+ if (trigger_on_cpu(i))
goto out_close;
-
- usleep(1);
}
/* read perf buffer */
@@ -92,6 +107,34 @@ void test_perf_buffer(void)
"expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;
+ if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt",
+ "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus))
+ goto out_close;
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (i >= on_len || !online[i])
+ continue;
+
+ fd = perf_buffer__buffer_fd(pb, i);
+ CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd);
+ last_fd = fd;
+
+ err = perf_buffer__consume_buffer(pb, i);
+ if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err))
+ goto out_close;
+
+ CPU_CLR(i, &cpu_seen);
+ if (trigger_on_cpu(i))
+ goto out_close;
+
+ err = perf_buffer__consume_buffer(pb, i);
+ if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err))
+ goto out_close;
+
+ if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i))
+ goto out_close;
+ }
+
out_free_pb:
perf_buffer__free(pb);
out_close:
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index 041952524c55..fcf54b3a1dd0 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -37,7 +37,7 @@ void test_pinning(void)
struct stat statbuf = {};
struct bpf_object *obj;
struct bpf_map *map;
- int err;
+ int err, map_fd;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
.pin_root_path = custpath,
);
@@ -213,6 +213,53 @@ void test_pinning(void)
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
goto out;
+ /* remove the custom pin path to re-test it with reuse fd below */
+ err = unlink(custpinpath);
+ if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
+ goto out;
+
+ err = rmdir(custpath);
+ if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
+ goto out;
+
+ bpf_object__close(obj);
+
+ /* test pinning at custom path with reuse fd */
+ obj = bpf_object__open_file(file, NULL);
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
+ obj = NULL;
+ goto out;
+ }
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32),
+ sizeof(__u64), 1, 0);
+ if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd))
+ goto out;
+
+ map = bpf_object__find_map_by_name(obj, "pinmap");
+ if (CHECK(!map, "find map", "NULL map"))
+ goto close_map_fd;
+
+ err = bpf_map__reuse_fd(map, map_fd);
+ if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+ err = bpf_map__set_pin_path(map, custpinpath);
+ if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+ /* check that pinmap was pinned at the custom path */
+ err = stat(custpinpath, &statbuf);
+ if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+close_map_fd:
+ close(map_fd);
out:
unlink(pinpath);
unlink(nopinpath);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
new file mode 100644
index 000000000000..c5fb191874ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "bpf/libbpf_internal.h"
+#include "test_raw_tp_test_run.skel.h"
+
+static int duration;
+
+void test_raw_tp_test_run(void)
+{
+ struct bpf_prog_test_run_attr test_attr = {};
+ int comm_fd = -1, err, nr_online, i, prog_fd;
+ __u64 args[2] = {0x1234ULL, 0x5678ULL};
+ int expected_retval = 0x1234 + 0x5678;
+ struct test_raw_tp_test_run *skel;
+ char buf[] = "new_name";
+ bool *online = NULL;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ );
+
+ err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online,
+ &nr_online);
+ if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err))
+ return;
+
+ skel = test_raw_tp_test_run__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ goto cleanup;
+
+ err = test_raw_tp_test_run__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+ if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno))
+ goto cleanup;
+
+ err = write(comm_fd, buf, sizeof(buf));
+ CHECK(err < 0, "task rename", "err %d", errno);
+
+ CHECK(skel->bss->count == 0, "check_count", "didn't increase\n");
+ CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n");
+
+ prog_fd = bpf_program__fd(skel->progs.rename);
+ test_attr.prog_fd = prog_fd;
+ test_attr.ctx_in = args;
+ test_attr.ctx_size_in = sizeof(__u64);
+
+ err = bpf_prog_test_run_xattr(&test_attr);
+ CHECK(err == 0, "test_run", "should fail for too small ctx\n");
+
+ test_attr.ctx_size_in = sizeof(args);
+ err = bpf_prog_test_run_xattr(&test_attr);
+ CHECK(err < 0, "test_run", "err %d\n", errno);
+ CHECK(test_attr.retval != expected_retval, "check_retval",
+ "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval);
+
+ for (i = 0; i < nr_online; i++) {
+ if (!online[i])
+ continue;
+
+ opts.cpu = i;
+ opts.retval = 0;
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ CHECK(err < 0, "test_run_opts", "err %d\n", errno);
+ CHECK(skel->data->on_cpu != i, "check_on_cpu",
+ "expect %d got %d\n", i, skel->data->on_cpu);
+ CHECK(opts.retval != expected_retval,
+ "check_retval", "expect 0x%x, got 0x%x\n",
+ expected_retval, opts.retval);
+ }
+
+ /* invalid cpu ID should fail with ENXIO */
+ opts.cpu = 0xffffffff;
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ CHECK(err != -1 || errno != ENXIO,
+ "test_run_opts_fail",
+ "should failed with ENXIO\n");
+
+ /* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */
+ opts.cpu = 1;
+ opts.flags = 0;
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ CHECK(err != -1 || errno != EINVAL,
+ "test_run_opts_fail",
+ "should failed with EINVAL\n");
+
+cleanup:
+ close(comm_fd);
+ test_raw_tp_test_run__destroy(skel);
+ free(online);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index fc0d7f4f02cf..ac1ee10cffd8 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -27,7 +27,7 @@ void test_reference_tracking(void)
const char *title;
/* Ignore .text sections */
- title = bpf_program__title(prog, false);
+ title = bpf_program__section_name(prog);
if (strstr(title, ".text") != NULL)
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 3b127cab4864..6ace5e9efec1 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -28,6 +28,12 @@ struct symbol test_symbols[] = {
{ "func", BTF_KIND_FUNC, -1 },
};
+/* Align the .BTF_ids section to 4 bytes */
+asm (
+".pushsection " BTF_IDS_SECTION " ,\"a\"; \n"
+".balign 4, 0; \n"
+".popsection; \n");
+
BTF_ID_LIST(test_list_local)
BTF_ID_UNUSED
BTF_ID(typedef, S)
@@ -47,6 +53,15 @@ BTF_ID(struct, S)
BTF_ID(union, U)
BTF_ID(func, func)
+BTF_SET_START(test_set)
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+BTF_SET_END(test_set)
+
static int
__resolve_symbol(struct btf *btf, int type_id)
{
@@ -116,12 +131,40 @@ int test_resolve_btfids(void)
*/
for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
test_list = test_lists[j];
- for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
+ for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
ret = CHECK(test_list[i] != test_symbols[i].id,
"id_check",
"wrong ID for %s (%d != %d)\n",
test_symbols[i].name,
test_list[i], test_symbols[i].id);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* Check BTF_SET_START(test_set) IDs */
+ for (i = 0; i < test_set.cnt; i++) {
+ bool found = false;
+
+ for (j = 0; j < ARRAY_SIZE(test_symbols); j++) {
+ if (test_symbols[j].id != test_set.ids[i])
+ continue;
+ found = true;
+ break;
+ }
+
+ ret = CHECK(!found, "id_check",
+ "ID %d not found in test_symbols\n",
+ test_set.ids[i]);
+ if (ret)
+ break;
+
+ if (i > 0) {
+ ret = CHECK(test_set.ids[i - 1] > test_set.ids[i],
+ "sort_check",
+ "test_set is not sorted\n");
+ if (ret)
+ break;
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 47fa04adc147..3a469099f30d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -49,7 +49,7 @@ configure_stack(void)
sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
"direct-action object-file ./test_sk_assign.o",
"section classifier/sk_assign_test",
- (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "");
+ (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
if (CHECK(system(tc_cmd), "BPF load failed;",
"run with -vv for more info\n"))
return false;
@@ -265,9 +265,10 @@ void test_sk_assign(void)
TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false),
TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
};
- int server = -1;
+ __s64 server = -1;
int server_map;
int self_net;
+ int i;
self_net = open(NS_SELF, O_RDONLY);
if (CHECK_FAIL(self_net < 0)) {
@@ -286,7 +287,7 @@ void test_sk_assign(void)
goto cleanup;
}
- for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
struct test_sk_cfg *test = &tests[i];
const struct sockaddr *addr;
const int zero = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
new file mode 100644
index 000000000000..686b40f11a45
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/btf.h>
+#include "netif_receive_skb.skel.h"
+
+/* Demonstrate that bpf_snprintf_btf succeeds and that various data types
+ * are formatted correctly.
+ */
+void test_snprintf_btf(void)
+{
+ struct netif_receive_skb *skel;
+ struct netif_receive_skb__bss *bss;
+ int err, duration = 0;
+
+ skel = netif_receive_skb__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ err = netif_receive_skb__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+
+ err = netif_receive_skb__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* generate receive event */
+ err = system("ping -c 1 127.0.0.1 > /dev/null");
+ if (CHECK(err, "system", "ping failed: %d\n", err))
+ goto cleanup;
+
+ if (bss->skip) {
+ printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+
+ /*
+ * Make sure netif_receive_skb program was triggered
+ * and it set expected return values from bpf_trace_printk()s
+ * and all tests ran.
+ */
+ if (CHECK(bss->ret <= 0,
+ "bpf_snprintf_btf: got return value",
+ "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests))
+ goto cleanup;
+
+ if (CHECK(bss->ran_subtests == 0, "check if subtests ran",
+ "no subtests ran, did BPF program run?"))
+ goto cleanup;
+
+ if (CHECK(bss->num_subtests != bss->ran_subtests,
+ "check all subtests ran",
+ "only ran %d of %d tests\n", bss->num_subtests,
+ bss->ran_subtests))
+ goto cleanup;
+
+cleanup:
+ netif_receive_skb__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
new file mode 100644
index 000000000000..af87118e748e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "network_helpers.h"
+#include "cgroup_helpers.h"
+#include "test_progs.h"
+#include "bpf_rlimit.h"
+#include "test_sock_fields.skel.h"
+
+enum bpf_linum_array_idx {
+ EGRESS_LINUM_IDX,
+ INGRESS_LINUM_IDX,
+ __NR_BPF_LINUM_ARRAY_IDX,
+};
+
+struct bpf_spinlock_cnt {
+ struct bpf_spin_lock lock;
+ __u32 cnt;
+};
+
+#define PARENT_CGROUP "/test-bpf-sock-fields"
+#define CHILD_CGROUP "/test-bpf-sock-fields/child"
+#define DATA "Hello BPF!"
+#define DATA_LEN sizeof(DATA)
+
+static struct sockaddr_in6 srv_sa6, cli_sa6;
+static int sk_pkt_out_cnt10_fd;
+static struct test_sock_fields *skel;
+static int sk_pkt_out_cnt_fd;
+static __u64 parent_cg_id;
+static __u64 child_cg_id;
+static int linum_map_fd;
+static __u32 duration;
+
+static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
+static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
+
+static void print_sk(const struct bpf_sock *sk, const char *prefix)
+{
+ char src_ip4[24], dst_ip4[24];
+ char src_ip6[64], dst_ip6[64];
+
+ inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
+ inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
+ inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
+ inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
+
+ printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
+ "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
+ "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
+ prefix,
+ sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
+ sk->mark, sk->priority,
+ sk->src_ip4, src_ip4,
+ sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
+ src_ip6, sk->src_port,
+ sk->dst_ip4, dst_ip4,
+ sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
+ dst_ip6, ntohs(sk->dst_port));
+}
+
+static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
+{
+ printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
+ "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
+ "rate_delivered:%u rate_interval_us:%u packets_out:%u "
+ "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
+ "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
+ "bytes_received:%llu bytes_acked:%llu\n",
+ prefix,
+ tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
+ tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
+ tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
+ tp->packets_out, tp->retrans_out, tp->total_retrans,
+ tp->segs_in, tp->data_segs_in, tp->segs_out,
+ tp->data_segs_out, tp->lost_out, tp->sacked_out,
+ tp->bytes_received, tp->bytes_acked);
+}
+
+static void check_result(void)
+{
+ struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
+ struct bpf_sock srv_sk, cli_sk, listen_sk;
+ __u32 ingress_linum, egress_linum;
+ int err;
+
+ err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
+ &egress_linum);
+ CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ "err:%d errno:%d\n", err, errno);
+
+ err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
+ &ingress_linum);
+ CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ "err:%d errno:%d\n", err, errno);
+
+ memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
+ memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
+ memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
+ memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
+ memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
+ memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
+
+ print_sk(&listen_sk, "listen_sk");
+ print_sk(&srv_sk, "srv_sk");
+ print_sk(&cli_sk, "cli_sk");
+ print_tp(&listen_tp, "listen_tp");
+ print_tp(&srv_tp, "srv_tp");
+ print_tp(&cli_tp, "cli_tp");
+
+ CHECK(listen_sk.state != 10 ||
+ listen_sk.family != AF_INET6 ||
+ listen_sk.protocol != IPPROTO_TCP ||
+ memcmp(listen_sk.src_ip6, &in6addr_loopback,
+ sizeof(listen_sk.src_ip6)) ||
+ listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
+ listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
+ listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+ listen_sk.dst_port,
+ "listen_sk",
+ "Unexpected. Check listen_sk output. ingress_linum:%u\n",
+ ingress_linum);
+
+ CHECK(srv_sk.state == 10 ||
+ !srv_sk.state ||
+ srv_sk.family != AF_INET6 ||
+ srv_sk.protocol != IPPROTO_TCP ||
+ memcmp(srv_sk.src_ip6, &in6addr_loopback,
+ sizeof(srv_sk.src_ip6)) ||
+ memcmp(srv_sk.dst_ip6, &in6addr_loopback,
+ sizeof(srv_sk.dst_ip6)) ||
+ srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+ srv_sk.dst_port != cli_sa6.sin6_port,
+ "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
+
+ CHECK(cli_sk.state == 10 ||
+ !cli_sk.state ||
+ cli_sk.family != AF_INET6 ||
+ cli_sk.protocol != IPPROTO_TCP ||
+ memcmp(cli_sk.src_ip6, &in6addr_loopback,
+ sizeof(cli_sk.src_ip6)) ||
+ memcmp(cli_sk.dst_ip6, &in6addr_loopback,
+ sizeof(cli_sk.dst_ip6)) ||
+ cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
+ cli_sk.dst_port != srv_sa6.sin6_port,
+ "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(listen_tp.data_segs_out ||
+ listen_tp.data_segs_in ||
+ listen_tp.total_retrans ||
+ listen_tp.bytes_acked,
+ "listen_tp",
+ "Unexpected. Check listen_tp output. ingress_linum:%u\n",
+ ingress_linum);
+
+ CHECK(srv_tp.data_segs_out != 2 ||
+ srv_tp.data_segs_in ||
+ srv_tp.snd_cwnd != 10 ||
+ srv_tp.total_retrans ||
+ srv_tp.bytes_acked < 2 * DATA_LEN,
+ "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(cli_tp.data_segs_out ||
+ cli_tp.data_segs_in != 2 ||
+ cli_tp.snd_cwnd != 10 ||
+ cli_tp.total_retrans ||
+ cli_tp.bytes_received < 2 * DATA_LEN,
+ "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(skel->bss->parent_cg_id != parent_cg_id,
+ "parent_cg_id", "%zu != %zu\n",
+ (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
+
+ CHECK(skel->bss->child_cg_id != child_cg_id,
+ "child_cg_id", "%zu != %zu\n",
+ (size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
+}
+
+static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
+{
+ struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
+ int err;
+
+ pkt_out_cnt.cnt = ~0;
+ pkt_out_cnt10.cnt = ~0;
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
+ if (!err)
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
+ &pkt_out_cnt10);
+
+ /* The bpf prog only counts for fullsock and
+ * passive connection did not become fullsock until 3WHS
+ * had been finished, so the bpf prog only counted two data
+ * packet out.
+ */
+ CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
+ pkt_out_cnt10.cnt < 0xeB9F + 20,
+ "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
+ "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+ err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+
+ pkt_out_cnt.cnt = ~0;
+ pkt_out_cnt10.cnt = ~0;
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
+ if (!err)
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
+ &pkt_out_cnt10);
+ /* Active connection is fullsock from the beginning.
+ * 1 SYN and 1 ACK during 3WHS
+ * 2 Acks on data packet.
+ *
+ * The bpf_prog initialized it to 0xeB9F.
+ */
+ CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
+ pkt_out_cnt10.cnt < 0xeB9F + 40,
+ "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
+ "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+ err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+}
+
+static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
+{
+ struct bpf_spinlock_cnt scnt = {};
+ int err;
+
+ scnt.cnt = pkt_out_cnt;
+ err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
+ BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
+ "err:%d errno:%d\n", err, errno))
+ return err;
+
+ err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
+ BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
+ "err:%d errno:%d\n", err, errno))
+ return err;
+
+ return 0;
+}
+
+static void test(void)
+{
+ int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
+ socklen_t addrlen = sizeof(struct sockaddr_in6);
+ char buf[DATA_LEN];
+
+ /* Prepare listen_fd */
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ /* start_server() has logged the error details */
+ if (CHECK_FAIL(listen_fd == -1))
+ goto done;
+
+ err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+ if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+ errno))
+ goto done;
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
+ err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
+ if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (CHECK(accept_fd == -1, "accept(listen_fd)",
+ "accept_fd:%d errno:%d\n",
+ accept_fd, errno))
+ goto done;
+
+ if (init_sk_storage(accept_fd, 0xeB9F))
+ goto done;
+
+ for (i = 0; i < 2; i++) {
+ /* Send some data from accept_fd to cli_fd.
+ * MSG_EOR to stop kernel from coalescing two pkts.
+ */
+ err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
+ if (CHECK(err != DATA_LEN, "send(accept_fd)",
+ "err:%d errno:%d\n", err, errno))
+ goto done;
+
+ err = recv(cli_fd, buf, DATA_LEN, 0);
+ if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+ }
+
+ shutdown(cli_fd, SHUT_WR);
+ err = recv(accept_fd, buf, 1, 0);
+ if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+ shutdown(accept_fd, SHUT_WR);
+ err = recv(cli_fd, buf, 1, 0);
+ if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+ check_sk_pkt_out_cnt(accept_fd, cli_fd);
+ check_result();
+
+done:
+ if (accept_fd != -1)
+ close(accept_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+}
+
+void test_sock_fields(void)
+{
+ struct bpf_link *egress_link = NULL, *ingress_link = NULL;
+ int parent_cg_fd = -1, child_cg_fd = -1;
+
+ /* Create a cgroup, get fd, and join it */
+ parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
+ if (CHECK_FAIL(parent_cg_fd < 0))
+ return;
+ parent_cg_id = get_cgroup_id(PARENT_CGROUP);
+ if (CHECK_FAIL(!parent_cg_id))
+ goto done;
+
+ child_cg_fd = test__join_cgroup(CHILD_CGROUP);
+ if (CHECK_FAIL(child_cg_fd < 0))
+ goto done;
+ child_cg_id = get_cgroup_id(CHILD_CGROUP);
+ if (CHECK_FAIL(!child_cg_id))
+ goto done;
+
+ skel = test_sock_fields__open_and_load();
+ if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
+ goto done;
+
+ egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
+ child_cg_fd);
+ if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
+ PTR_ERR(egress_link)))
+ goto done;
+
+ ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
+ child_cg_fd);
+ if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
+ PTR_ERR(ingress_link)))
+ goto done;
+
+ linum_map_fd = bpf_map__fd(skel->maps.linum_map);
+ sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
+ sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
+
+ test();
+
+done:
+ bpf_link__destroy(egress_link);
+ bpf_link__destroy(ingress_link);
+ test_sock_fields__destroy(skel);
+ if (child_cg_fd != -1)
+ close(child_cg_fd);
+ if (parent_cg_fd != -1)
+ close(parent_cg_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 96e7b7f84c65..85f73261fab0 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -4,6 +4,9 @@
#include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_update.skel.h"
+#include "test_sockmap_invalid_update.skel.h"
+#include "bpf_iter_sockmap.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -45,6 +48,37 @@ error:
return -1;
}
+static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
+{
+ __u32 i, max_entries = bpf_map__max_entries(src);
+ int err, duration = 0, src_fd, dst_fd;
+
+ src_fd = bpf_map__fd(src);
+ dst_fd = bpf_map__fd(dst);
+
+ for (i = 0; i < max_entries; i++) {
+ __u64 src_cookie, dst_cookie;
+
+ err = bpf_map_lookup_elem(src_fd, &i, &src_cookie);
+ if (err && errno == ENOENT) {
+ err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+ CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i);
+ CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n",
+ strerror(errno));
+ continue;
+ }
+ if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno)))
+ continue;
+
+ err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+ if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno)))
+ continue;
+
+ CHECK(dst_cookie != src_cookie, "cookie mismatch",
+ "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i);
+ }
+}
+
/* Create a map, populate it with one socket, and free the map. */
static void test_sockmap_create_update_free(enum bpf_map_type map_type)
{
@@ -101,6 +135,151 @@ out:
test_skmsg_load_helpers__destroy(skel);
}
+static void test_sockmap_update(enum bpf_map_type map_type)
+{
+ struct bpf_prog_test_run_attr tattr;
+ int err, prog, src, duration = 0;
+ struct test_sockmap_update *skel;
+ struct bpf_map *dst_map;
+ const __u32 zero = 0;
+ char dummy[14] = {0};
+ __s64 sk;
+
+ sk = connected_socket_v4();
+ if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n"))
+ return;
+
+ skel = test_sockmap_update__open_and_load();
+ if (CHECK(!skel, "open_and_load", "cannot load skeleton\n"))
+ goto close_sk;
+
+ prog = bpf_program__fd(skel->progs.copy_sock_map);
+ src = bpf_map__fd(skel->maps.src);
+ if (map_type == BPF_MAP_TYPE_SOCKMAP)
+ dst_map = skel->maps.dst_sock_map;
+ else
+ dst_map = skel->maps.dst_sock_hash;
+
+ err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
+ if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
+ goto out;
+
+ tattr = (struct bpf_prog_test_run_attr){
+ .prog_fd = prog,
+ .repeat = 1,
+ .data_in = dummy,
+ .data_size_in = sizeof(dummy),
+ };
+
+ err = bpf_prog_test_run_xattr(&tattr);
+ if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
+ "errno=%u retval=%u\n", errno, tattr.retval))
+ goto out;
+
+ compare_cookies(skel->maps.src, dst_map);
+
+out:
+ test_sockmap_update__destroy(skel);
+close_sk:
+ close(sk);
+}
+
+static void test_sockmap_invalid_update(void)
+{
+ struct test_sockmap_invalid_update *skel;
+ int duration = 0;
+
+ skel = test_sockmap_invalid_update__open_and_load();
+ if (CHECK(skel, "open_and_load", "verifier accepted map_update\n"))
+ test_sockmap_invalid_update__destroy(skel);
+}
+
+static void test_sockmap_copy(enum bpf_map_type map_type)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ int err, len, src_fd, iter_fd, duration = 0;
+ union bpf_iter_link_info linfo = {};
+ __u32 i, num_sockets, num_elems;
+ struct bpf_iter_sockmap *skel;
+ __s64 *sock_fd = NULL;
+ struct bpf_link *link;
+ struct bpf_map *src;
+ char buf[64];
+
+ skel = bpf_iter_sockmap__open_and_load();
+ if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n"))
+ return;
+
+ if (map_type == BPF_MAP_TYPE_SOCKMAP) {
+ src = skel->maps.sockmap;
+ num_elems = bpf_map__max_entries(src);
+ num_sockets = num_elems - 1;
+ } else {
+ src = skel->maps.sockhash;
+ num_elems = bpf_map__max_entries(src) - 1;
+ num_sockets = num_elems;
+ }
+
+ sock_fd = calloc(num_sockets, sizeof(*sock_fd));
+ if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n"))
+ goto out;
+
+ for (i = 0; i < num_sockets; i++)
+ sock_fd[i] = -1;
+
+ src_fd = bpf_map__fd(src);
+
+ for (i = 0; i < num_sockets; i++) {
+ sock_fd[i] = connected_socket_v4();
+ if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n"))
+ goto out;
+
+ err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST);
+ if (CHECK(err, "map_update", "failed: %s\n", strerror(errno)))
+ goto out;
+ }
+
+ linfo.map.map_fd = src_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.copy, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n",
+ skel->bss->elems, num_elems))
+ goto close_iter;
+
+ if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n",
+ skel->bss->socks, num_sockets))
+ goto close_iter;
+
+ compare_cookies(src, skel->maps.dst);
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ for (i = 0; sock_fd && i < num_sockets; i++)
+ if (sock_fd[i] >= 0)
+ close(sock_fd[i]);
+ if (sock_fd)
+ free(sock_fd);
+ bpf_iter_sockmap__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -111,4 +290,14 @@ void test_sockmap_basic(void)
test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash sk_msg load helpers"))
test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap update"))
+ test_sockmap_update(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash update"))
+ test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap update in unsafe context"))
+ test_sockmap_invalid_update();
+ if (test__start_subtest("sockmap copy"))
+ test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash copy"))
+ test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 5f54c6aec7f0..b25c9c45c148 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -45,9 +45,9 @@ static int getsetsockopt(void)
goto err;
}
- if (*(int *)big_buf != 0x08) {
+ if (*big_buf != 0x08) {
log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08",
- *(int *)big_buf);
+ (int)*big_buf);
goto err;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c
new file mode 100644
index 000000000000..a00abf58c037
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <time.h>
+#include "test_subprogs.skel.h"
+
+static int duration;
+
+void test_subprogs(void)
+{
+ struct test_subprogs *skel;
+ int err;
+
+ skel = test_subprogs__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ err = test_subprogs__attach(skel);
+ if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12);
+ CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17);
+ CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
+ CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
+
+cleanup:
+ test_subprogs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index bb8fe646dd9f..ee27d68d2a1c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
/* test_tailcall_1 checks basic functionality by patching multiple locations
* in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -472,6 +473,329 @@ out:
bpf_object__close(obj);
}
+/* test_tailcall_bpf2bpf_1 purpose is to make sure that tailcalls are working
+ * correctly in correlation with BPF subprograms
+ */
+static void test_tailcall_bpf2bpf_1(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ /* nop -> jmp */
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ 0, &retval, &duration);
+ CHECK(err || retval != 1, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+
+ /* jmp -> nop, call subprog that will do tailcall */
+ i = 1;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ 0, &retval, &duration);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ /* make sure that subprog can access ctx and entry prog that
+ * called this subprog can properly return
+ */
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ 0, &retval, &duration);
+ CHECK(err || retval != sizeof(pkt_v4) * 2,
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_2 checks that the count value of the tail call limit
+ * enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call.
+ */
+static void test_tailcall_bpf2bpf_2(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char buff[128] = {};
+
+ err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier/0");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
+ err, errno, val);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_3 checks that non-trivial amount of stack (up to
+ * 256 bytes) can be used within bpf subprograms that have the tailcalls
+ * in them
+ */
+static void test_tailcall_bpf2bpf_3(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 3,
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 1;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4),
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 2,
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved
+ * across tailcalls combined with bpf2bpf calls. for making sure that tailcall
+ * counter behaves correctly, bpf program will go through following flow:
+ *
+ * entry -> entry_subprog -> tailcall0 -> bpf_func0 -> subprog0 ->
+ * -> tailcall1 -> bpf_func1 -> subprog1 -> tailcall2 -> bpf_func2 ->
+ * subprog2 [here bump global counter] --------^
+ *
+ * We go through first two tailcalls and start counting from the subprog2 where
+ * the loop begins. At the end of the test make sure that the global counter is
+ * equal to 31, because tailcall counter includes the first two tailcalls
+ * whereas global counter is incremented only on loop presented on flow above.
+ */
+static void test_tailcall_bpf2bpf_4(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n",
+ err, errno, val);
+
+out:
+ bpf_object__close(obj);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -484,4 +808,12 @@ void test_tailcalls(void)
test_tailcall_4();
if (test__start_subtest("tailcall_5"))
test_tailcall_5();
+ if (test__start_subtest("tailcall_bpf2bpf_1"))
+ test_tailcall_bpf2bpf_1();
+ if (test__start_subtest("tailcall_bpf2bpf_2"))
+ test_tailcall_bpf2bpf_2();
+ if (test__start_subtest("tailcall_bpf2bpf_3"))
+ test_tailcall_bpf2bpf_3();
+ if (test__start_subtest("tailcall_bpf2bpf_4"))
+ test_tailcall_bpf2bpf_4();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
new file mode 100644
index 000000000000..c85174cdcb77
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -0,0 +1,610 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/compiler.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_hdr_options.h"
+#include "test_tcp_hdr_options.skel.h"
+#include "test_misc_tcp_hdr_options.skel.h"
+
+#define LO_ADDR6 "::1"
+#define CG_NAME "/tcpbpf-hdr-opt-test"
+
+struct bpf_test_option exp_passive_estab_in;
+struct bpf_test_option exp_active_estab_in;
+struct bpf_test_option exp_passive_fin_in;
+struct bpf_test_option exp_active_fin_in;
+struct hdr_stg exp_passive_hdr_stg;
+struct hdr_stg exp_active_hdr_stg = { .active = true, };
+
+static struct test_misc_tcp_hdr_options *misc_skel;
+static struct test_tcp_hdr_options *skel;
+static int lport_linum_map_fd;
+static int hdr_stg_map_fd;
+static __u32 duration;
+static int cg_fd;
+
+struct sk_fds {
+ int srv_fd;
+ int passive_fd;
+ int active_fd;
+ int passive_lport;
+ int active_lport;
+};
+
+static int create_netns(void)
+{
+ if (CHECK(unshare(CLONE_NEWNET), "create netns",
+ "unshare(CLONE_NEWNET): %s (%d)",
+ strerror(errno), errno))
+ return -1;
+
+ if (CHECK(system("ip link set dev lo up"), "run ip cmd",
+ "failed to bring lo link up\n"))
+ return -1;
+
+ return 0;
+}
+
+static int write_sysctl(const char *sysctl, const char *value)
+{
+ int fd, err, len;
+
+ fd = open(sysctl, O_WRONLY);
+ if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
+ sysctl, strerror(errno), errno))
+ return -1;
+
+ len = strlen(value);
+ err = write(fd, value, len);
+ close(fd);
+ if (CHECK(err != len, "write sysctl",
+ "write(%s, %s): err:%d %s (%d)\n",
+ sysctl, value, err, strerror(errno), errno))
+ return -1;
+
+ return 0;
+}
+
+static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
+{
+ fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
+ prefix ? : "", hdr_stg->active, hdr_stg->resend_syn,
+ hdr_stg->syncookie, hdr_stg->fastopen);
+}
+
+static void print_option(const struct bpf_test_option *opt, const char *prefix)
+{
+ fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n",
+ prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand);
+}
+
+static void sk_fds_close(struct sk_fds *sk_fds)
+{
+ close(sk_fds->srv_fd);
+ close(sk_fds->passive_fd);
+ close(sk_fds->active_fd);
+}
+
+static int sk_fds_shutdown(struct sk_fds *sk_fds)
+{
+ int ret, abyte;
+
+ shutdown(sk_fds->active_fd, SHUT_WR);
+ ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte));
+ if (CHECK(ret != 0, "read-after-shutdown(passive_fd):",
+ "ret:%d %s (%d)\n",
+ ret, strerror(errno), errno))
+ return -1;
+
+ shutdown(sk_fds->passive_fd, SHUT_WR);
+ ret = read(sk_fds->active_fd, &abyte, sizeof(abyte));
+ if (CHECK(ret != 0, "read-after-shutdown(active_fd):",
+ "ret:%d %s (%d)\n",
+ ret, strerror(errno), errno))
+ return -1;
+
+ return 0;
+}
+
+static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
+{
+ const char fast[] = "FAST!!!";
+ struct sockaddr_in6 addr6;
+ socklen_t len;
+
+ sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+ if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error;
+
+ if (fast_open)
+ sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast,
+ sizeof(fast), 0);
+ else
+ sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0);
+
+ if (CHECK_FAIL(sk_fds->active_fd == -1)) {
+ close(sk_fds->srv_fd);
+ goto error;
+ }
+
+ len = sizeof(addr6);
+ if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(srv_fd)", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error_close;
+ sk_fds->passive_lport = ntohs(addr6.sin6_port);
+
+ len = sizeof(addr6);
+ if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(active_fd)", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error_close;
+ sk_fds->active_lport = ntohs(addr6.sin6_port);
+
+ sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0);
+ if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error_close;
+
+ if (fast_open) {
+ char bytes_in[sizeof(fast)];
+ int ret;
+
+ ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in));
+ if (CHECK(ret != sizeof(fast), "read fastopen syn data",
+ "expected=%lu actual=%d\n", sizeof(fast), ret)) {
+ close(sk_fds->passive_fd);
+ goto error_close;
+ }
+ }
+
+ return 0;
+
+error_close:
+ close(sk_fds->active_fd);
+ close(sk_fds->srv_fd);
+
+error:
+ memset(sk_fds, -1, sizeof(*sk_fds));
+ return -1;
+}
+
+static int check_hdr_opt(const struct bpf_test_option *exp,
+ const struct bpf_test_option *act,
+ const char *hdr_desc)
+{
+ if (CHECK(memcmp(exp, act, sizeof(*exp)),
+ "expected-vs-actual", "unexpected %s\n", hdr_desc)) {
+ print_option(exp, "expected: ");
+ print_option(act, " actual: ");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_hdr_stg(const struct hdr_stg *exp, int fd,
+ const char *stg_desc)
+{
+ struct hdr_stg act;
+
+ if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
+ "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n",
+ stg_desc, strerror(errno), errno))
+ return -1;
+
+ if (CHECK(memcmp(exp, &act, sizeof(*exp)),
+ "expected-vs-actual", "unexpected %s\n", stg_desc)) {
+ print_hdr_stg(exp, "expected: ");
+ print_hdr_stg(&act, " actual: ");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_error_linum(const struct sk_fds *sk_fds)
+{
+ unsigned int nr_errors = 0;
+ struct linum_err linum_err;
+ int lport;
+
+ lport = sk_fds->passive_lport;
+ if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+ fprintf(stderr,
+ "bpf prog error out at lport:passive(%d), linum:%u err:%d\n",
+ lport, linum_err.linum, linum_err.err);
+ nr_errors++;
+ }
+
+ lport = sk_fds->active_lport;
+ if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+ fprintf(stderr,
+ "bpf prog error out at lport:active(%d), linum:%u err:%d\n",
+ lport, linum_err.linum, linum_err.err);
+ nr_errors++;
+ }
+
+ return nr_errors;
+}
+
+static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
+{
+ const __u32 expected_inherit_cb_flags =
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_STATE_CB_FLAG;
+
+ if (sk_fds_shutdown(sk_fds))
+ goto check_linum;
+
+ if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags,
+ "Unexpected inherit_cb_flags", "0x%x != 0x%x\n",
+ skel->bss->inherit_cb_flags, expected_inherit_cb_flags))
+ goto check_linum;
+
+ if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
+ "passive_hdr_stg"))
+ goto check_linum;
+
+ if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd,
+ "active_hdr_stg"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in,
+ "passive_estab_in"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in,
+ "active_estab_in"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in,
+ "passive_fin_in"))
+ goto check_linum;
+
+ check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in,
+ "active_fin_in");
+
+check_linum:
+ CHECK_FAIL(check_error_linum(sk_fds));
+ sk_fds_close(sk_fds);
+}
+
+static void prepare_out(void)
+{
+ skel->bss->active_syn_out = exp_passive_estab_in;
+ skel->bss->passive_synack_out = exp_active_estab_in;
+
+ skel->bss->active_fin_out = exp_passive_fin_in;
+ skel->bss->passive_fin_out = exp_active_fin_in;
+}
+
+static void reset_test(void)
+{
+ size_t optsize = sizeof(struct bpf_test_option);
+ int lport, err;
+
+ memset(&skel->bss->passive_synack_out, 0, optsize);
+ memset(&skel->bss->passive_fin_out, 0, optsize);
+
+ memset(&skel->bss->passive_estab_in, 0, optsize);
+ memset(&skel->bss->passive_fin_in, 0, optsize);
+
+ memset(&skel->bss->active_syn_out, 0, optsize);
+ memset(&skel->bss->active_fin_out, 0, optsize);
+
+ memset(&skel->bss->active_estab_in, 0, optsize);
+ memset(&skel->bss->active_fin_in, 0, optsize);
+
+ skel->bss->inherit_cb_flags = 0;
+
+ skel->data->test_kind = TCPOPT_EXP;
+ skel->data->test_magic = 0xeB9F;
+
+ memset(&exp_passive_estab_in, 0, optsize);
+ memset(&exp_active_estab_in, 0, optsize);
+ memset(&exp_passive_fin_in, 0, optsize);
+ memset(&exp_active_fin_in, 0, optsize);
+
+ memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg));
+ memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg));
+ exp_active_hdr_stg.active = true;
+
+ err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport);
+ while (!err) {
+ bpf_map_delete_elem(lport_linum_map_fd, &lport);
+ err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport);
+ }
+}
+
+static void fastopen_estab(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ exp_passive_hdr_stg.fastopen = true;
+
+ prepare_out();
+
+ /* Allow fastopen without fastopen cookie */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, true)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void syncookie_estab(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS |
+ OPTION_F_RESEND;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ exp_passive_hdr_stg.syncookie = true;
+ exp_active_hdr_stg.resend_syn = true,
+
+ prepare_out();
+
+ /* Clear the RESEND to ensure the bpf prog can learn
+ * want_cookie and set the RESEND by itself.
+ */
+ skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND;
+
+ /* Enforce syncookie mode */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void fin(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_fin_in.flags = OPTION_F_RAND;
+ exp_passive_fin_in.rand = 0xfa;
+
+ exp_active_fin_in.flags = OPTION_F_RAND;
+ exp_active_fin_in.rand = 0xce;
+
+ prepare_out();
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void __simple_estab(bool exprm)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ prepare_out();
+
+ if (!exprm) {
+ skel->data->test_kind = 0xB9;
+ skel->data->test_magic = 0;
+ }
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void no_exprm_estab(void)
+{
+ __simple_estab(false);
+}
+
+static void simple_estab(void)
+{
+ __simple_estab(true);
+}
+
+static void misc(void)
+{
+ const char send_msg[] = "MISC!!!";
+ char recv_msg[sizeof(send_msg)];
+ const unsigned int nr_data = 2;
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+ int i, ret;
+
+ lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map);
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ for (i = 0; i < nr_data; i++) {
+ /* MSG_EOR to ensure skb will not be combined */
+ ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg),
+ MSG_EOR);
+ if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n",
+ ret))
+ goto check_linum;
+
+ ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
+ if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n",
+ ret))
+ goto check_linum;
+ }
+
+ if (sk_fds_shutdown(&sk_fds))
+ goto check_linum;
+
+ CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn",
+ "expected (1) != actual (%u)\n",
+ misc_skel->bss->nr_syn);
+
+ CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data",
+ "expected (%u) != actual (%u)\n",
+ nr_data, misc_skel->bss->nr_data);
+
+ /* The last ACK may have been delayed, so it is either 1 or 2. */
+ CHECK(misc_skel->bss->nr_pure_ack != 1 &&
+ misc_skel->bss->nr_pure_ack != 2,
+ "unexpected nr_pure_ack",
+ "expected (1 or 2) != actual (%u)\n",
+ misc_skel->bss->nr_pure_ack);
+
+ CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin",
+ "expected (1) != actual (%u)\n",
+ misc_skel->bss->nr_fin);
+
+check_linum:
+ CHECK_FAIL(check_error_linum(&sk_fds));
+ sk_fds_close(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+struct test {
+ const char *desc;
+ void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, name }
+static struct test tests[] = {
+ DEF_TEST(simple_estab),
+ DEF_TEST(no_exprm_estab),
+ DEF_TEST(syncookie_estab),
+ DEF_TEST(fastopen_estab),
+ DEF_TEST(fin),
+ DEF_TEST(misc),
+};
+
+void test_tcp_hdr_options(void)
+{
+ int i;
+
+ skel = test_tcp_hdr_options__open_and_load();
+ if (CHECK(!skel, "open and load skel", "failed"))
+ return;
+
+ misc_skel = test_misc_tcp_hdr_options__open_and_load();
+ if (CHECK(!misc_skel, "open and load misc test skel", "failed"))
+ goto skel_destroy;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (CHECK_FAIL(cg_fd < 0))
+ goto skel_destroy;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (!test__start_subtest(tests[i].desc))
+ continue;
+
+ if (create_netns())
+ break;
+
+ tests[i].run();
+
+ reset_test();
+ }
+
+ close(cg_fd);
+skel_destroy:
+ test_misc_tcp_hdr_options__destroy(misc_skel);
+ test_tcp_hdr_options__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
new file mode 100644
index 000000000000..172c999e523c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <test_progs.h>
+
+#define TDIR "/sys/kernel/debug"
+
+static int read_iter(char *file)
+{
+ /* 1024 should be enough to get contiguous 4 "iter" letters at some point */
+ char buf[1024];
+ int fd, len;
+
+ fd = open(file, 0);
+ if (fd < 0)
+ return -1;
+ while ((len = read(fd, buf, sizeof(buf))) > 0)
+ if (strstr(buf, "iter")) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+ return -1;
+}
+
+static int fn(void)
+{
+ int err, duration = 0;
+
+ err = unshare(CLONE_NEWNS);
+ if (CHECK(err, "unshare", "failed: %d\n", errno))
+ goto out;
+
+ err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL);
+ if (CHECK(err, "mount /", "failed: %d\n", errno))
+ goto out;
+
+ err = umount(TDIR);
+ if (CHECK(err, "umount " TDIR, "failed: %d\n", errno))
+ goto out;
+
+ err = mount("none", TDIR, "tmpfs", 0, NULL);
+ if (CHECK(err, "mount", "mount root failed: %d\n", errno))
+ goto out;
+
+ err = mkdir(TDIR "/fs1", 0777);
+ if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno))
+ goto out;
+ err = mkdir(TDIR "/fs2", 0777);
+ if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno))
+ goto out;
+
+ err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL);
+ if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno))
+ goto out;
+ err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL);
+ if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno))
+ goto out;
+
+ err = read_iter(TDIR "/fs1/maps.debug");
+ if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n"))
+ goto out;
+ err = read_iter(TDIR "/fs2/progs.debug");
+ if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n"))
+ goto out;
+out:
+ umount(TDIR "/fs1");
+ umount(TDIR "/fs2");
+ rmdir(TDIR "/fs1");
+ rmdir(TDIR "/fs2");
+ umount(TDIR);
+ exit(err);
+}
+
+void test_test_bpffs(void)
+{
+ int err, duration = 0, status = 0;
+ pid_t pid;
+
+ pid = fork();
+ if (CHECK(pid == -1, "clone", "clone failed %d", errno))
+ return;
+ if (pid == 0)
+ fn();
+ err = waitpid(pid, &status, 0);
+ if (CHECK(err == -1 && errno != ECHILD, "waitpid", "failed %d", errno))
+ return;
+ if (CHECK(WEXITSTATUS(status), "bpffs test ", "failed %d", WEXITSTATUS(status)))
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
new file mode 100644
index 000000000000..91cd6f357246
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <linux/limits.h>
+
+#include "local_storage.skel.h"
+#include "network_helpers.h"
+
+int create_and_unlink_file(void)
+{
+ char fname[PATH_MAX] = "/tmp/fileXXXXXX";
+ int fd;
+
+ fd = mkstemp(fname);
+ if (fd < 0)
+ return fd;
+
+ close(fd);
+ unlink(fname);
+ return 0;
+}
+
+void test_test_local_storage(void)
+{
+ struct local_storage *skel = NULL;
+ int err, duration = 0, serv_sk = -1;
+
+ skel = local_storage__open_and_load();
+ if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+ goto close_prog;
+
+ err = local_storage__attach(skel);
+ if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+ goto close_prog;
+
+ skel->bss->monitored_pid = getpid();
+
+ err = create_and_unlink_file();
+ if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
+ "inode_local_storage not set\n");
+
+ serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+ goto close_prog;
+
+ CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
+ "sk_local_storage not set\n");
+
+ close(serv_sk);
+
+close_prog:
+ local_storage__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index b17eb2045c1d..6ab29226c99b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -10,6 +10,7 @@
#include <unistd.h>
#include <malloc.h>
#include <stdlib.h>
+#include <unistd.h>
#include "lsm.skel.h"
@@ -55,6 +56,7 @@ void test_test_lsm(void)
{
struct lsm *skel = NULL;
int err, duration = 0;
+ int buf = 1234;
skel = lsm__open_and_load();
if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
@@ -81,6 +83,13 @@ void test_test_lsm(void)
CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
"mprotect_count = %d\n", skel->bss->mprotect_count);
+ syscall(__NR_setdomainname, &buf, -2L);
+ syscall(__NR_setdomainname, 0, -3L);
+ syscall(__NR_setdomainname, ~0L, -4L);
+
+ CHECK(skel->bss->copy_test != 3, "copy_test",
+ "copy_test = %d\n", skel->bss->copy_test);
+
close_prog:
lsm__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 2702df2b2343..9966685866fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -61,10 +61,9 @@ void test_test_overhead(void)
const char *raw_tp_name = "raw_tp/task_rename";
const char *fentry_name = "fentry/__set_task_comm";
const char *fexit_name = "fexit/__set_task_comm";
- const char *fmodret_name = "fmod_ret/__set_task_comm";
const char *kprobe_func = "__set_task_comm";
struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
- struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog;
+ struct bpf_program *fentry_prog, *fexit_prog;
struct bpf_object *obj;
struct bpf_link *link;
int err, duration = 0;
@@ -97,11 +96,6 @@ void test_test_overhead(void)
if (CHECK(!fexit_prog, "find_probe",
"prog '%s' not found\n", fexit_name))
goto cleanup;
- fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name);
- if (CHECK(!fmodret_prog, "find_probe",
- "prog '%s' not found\n", fmodret_name))
- goto cleanup;
-
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
@@ -148,12 +142,6 @@ void test_test_overhead(void)
test_run("fexit");
bpf_link__destroy(link);
- /* attach fmod_ret */
- link = bpf_program__attach_trace(fmodret_prog);
- if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link)))
- goto cleanup;
- test_run("fmod_ret");
- bpf_link__destroy(link);
cleanup:
prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
new file mode 100644
index 000000000000..4ca275101ee0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "progs/profiler.h"
+#include "profiler1.skel.h"
+#include "profiler2.skel.h"
+#include "profiler3.skel.h"
+
+static int sanity_run(struct bpf_program *prog)
+{
+ struct bpf_prog_test_run_attr test_attr = {};
+ __u64 args[] = {1, 2, 3};
+ __u32 duration = 0;
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ test_attr.prog_fd = prog_fd;
+ test_attr.ctx_in = args;
+ test_attr.ctx_size_in = sizeof(args);
+ err = bpf_prog_test_run_xattr(&test_attr);
+ if (CHECK(err || test_attr.retval, "test_run",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, test_attr.retval, duration))
+ return -1;
+ return 0;
+}
+
+void test_test_profiler(void)
+{
+ struct profiler1 *profiler1_skel = NULL;
+ struct profiler2 *profiler2_skel = NULL;
+ struct profiler3 *profiler3_skel = NULL;
+ __u32 duration = 0;
+ int err;
+
+ profiler1_skel = profiler1__open_and_load();
+ if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n"))
+ goto cleanup;
+
+ err = profiler1__attach(profiler1_skel);
+ if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec))
+ goto cleanup;
+
+ profiler2_skel = profiler2__open_and_load();
+ if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n"))
+ goto cleanup;
+
+ err = profiler2__attach(profiler2_skel);
+ if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec))
+ goto cleanup;
+
+ profiler3_skel = profiler3__open_and_load();
+ if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n"))
+ goto cleanup;
+
+ err = profiler3__attach(profiler3_skel);
+ if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec))
+ goto cleanup;
+cleanup:
+ profiler1__destroy(profiler1_skel);
+ profiler2__destroy(profiler2_skel);
+ profiler3__destroy(profiler3_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
new file mode 100644
index 000000000000..924441d4362d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#include "test_pkt_md_access.skel.h"
+#include "test_trace_ext.skel.h"
+#include "test_trace_ext_tracing.skel.h"
+
+static __u32 duration;
+
+void test_trace_ext(void)
+{
+ struct test_pkt_md_access *skel_pkt = NULL;
+ struct test_trace_ext_tracing *skel_trace = NULL;
+ struct test_trace_ext_tracing__bss *bss_trace;
+ struct test_trace_ext *skel_ext = NULL;
+ struct test_trace_ext__bss *bss_ext;
+ int err, pkt_fd, ext_fd;
+ struct bpf_program *prog;
+ char buf[100];
+ __u32 retval;
+ __u64 len;
+
+ /* open/load/attach test_pkt_md_access */
+ skel_pkt = test_pkt_md_access__open_and_load();
+ if (CHECK(!skel_pkt, "setup", "classifier/test_pkt_md_access open failed\n"))
+ goto cleanup;
+
+ err = test_pkt_md_access__attach(skel_pkt);
+ if (CHECK(err, "setup", "classifier/test_pkt_md_access attach failed: %d\n", err))
+ goto cleanup;
+
+ prog = skel_pkt->progs.test_pkt_md_access;
+ pkt_fd = bpf_program__fd(prog);
+
+ /* open extension */
+ skel_ext = test_trace_ext__open();
+ if (CHECK(!skel_ext, "setup", "freplace/test_pkt_md_access open failed\n"))
+ goto cleanup;
+
+ /* set extension's attach target - test_pkt_md_access */
+ prog = skel_ext->progs.test_pkt_md_access_new;
+ bpf_program__set_attach_target(prog, pkt_fd, "test_pkt_md_access");
+
+ /* load/attach extension */
+ err = test_trace_ext__load(skel_ext);
+ if (CHECK(err, "setup", "freplace/test_pkt_md_access load failed\n")) {
+ libbpf_strerror(err, buf, sizeof(buf));
+ fprintf(stderr, "%s\n", buf);
+ goto cleanup;
+ }
+
+ err = test_trace_ext__attach(skel_ext);
+ if (CHECK(err, "setup", "freplace/test_pkt_md_access attach failed: %d\n", err))
+ goto cleanup;
+
+ prog = skel_ext->progs.test_pkt_md_access_new;
+ ext_fd = bpf_program__fd(prog);
+
+ /* open tracing */
+ skel_trace = test_trace_ext_tracing__open();
+ if (CHECK(!skel_trace, "setup", "tracing/test_pkt_md_access_new open failed\n"))
+ goto cleanup;
+
+ /* set tracing's attach target - fentry */
+ prog = skel_trace->progs.fentry;
+ bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+ /* set tracing's attach target - fexit */
+ prog = skel_trace->progs.fexit;
+ bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+ /* load/attach tracing */
+ err = test_trace_ext_tracing__load(skel_trace);
+ if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) {
+ libbpf_strerror(err, buf, sizeof(buf));
+ fprintf(stderr, "%s\n", buf);
+ goto cleanup;
+ }
+
+ err = test_trace_ext_tracing__attach(skel_trace);
+ if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger the test */
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval);
+
+ bss_ext = skel_ext->bss;
+ bss_trace = skel_trace->bss;
+
+ len = bss_ext->ext_called;
+
+ CHECK(bss_ext->ext_called == 0,
+ "check", "failed to trigger freplace/test_pkt_md_access\n");
+ CHECK(bss_trace->fentry_called != len,
+ "check", "failed to trigger fentry/test_pkt_md_access_new\n");
+ CHECK(bss_trace->fexit_called != len,
+ "check", "failed to trigger fexit/test_pkt_md_access_new\n");
+
+cleanup:
+ test_trace_ext_tracing__destroy(skel_trace);
+ test_trace_ext__destroy(skel_ext);
+ test_pkt_md_access__destroy(skel_pkt);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index f284f72158ef..0281095de266 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -1,11 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
+#include "test_xdp_noinline.skel.h"
void test_xdp_noinline(void)
{
- const char *file = "./test_xdp_noinline.o";
unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct test_xdp_noinline *skel;
struct vip key = {.protocol = 6};
struct vip_meta {
__u32 flags;
@@ -24,59 +25,43 @@ void test_xdp_noinline(void)
__u8 flags;
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
- __u32 duration, retval, size;
- int err, i, prog_fd, map_fd;
+ __u32 duration = 0, retval, size;
+ int err, i;
__u64 bytes = 0, pkts = 0;
- struct bpf_object *obj;
char buf[128];
u32 *magic = (u32 *)buf;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ skel = test_xdp_noinline__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "failed\n"))
return;
- map_fd = bpf_find_map(__func__, obj, "vip_map");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &key, &value, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
- map_fd = bpf_find_map(__func__, obj, "ch_rings");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
-
- map_fd = bpf_find_map(__func__, obj, "reals");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
-
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4),
+ NUM_ITER, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
CHECK(err || retval != 1 || size != 54 ||
*magic != MAGIC_VAL, "ipv4",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6),
+ NUM_ITER, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
CHECK(err || retval != 1 || size != 74 ||
*magic != MAGIC_VAL, "ipv6",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
- map_fd = bpf_find_map(__func__, obj, "stats");
- if (map_fd < 0)
- goto out;
- bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
for (i = 0; i < nr_cpus; i++) {
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
- pkts != NUM_ITER * 2)) {
- printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
- bytes, pkts);
- }
-out:
- bpf_object__close(obj);
+ CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2,
+ "stats", "bytes %lld pkts %lld\n",
+ (unsigned long long)bytes, (unsigned long long)pkts);
+ test_xdp_noinline__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index ef574087f1e1..6939bfd8690f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -15,6 +15,8 @@
*/
#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 3fb4260570b1..4dc1a967776a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -9,6 +9,8 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index de6de9221518..5a65f6b51377 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -118,18 +118,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
switch (proto) {
case bpf_htons(ETH_P_IP):
- bpf_tail_call(skb, &jmp_table, IP);
+ bpf_tail_call_static(skb, &jmp_table, IP);
break;
case bpf_htons(ETH_P_IPV6):
- bpf_tail_call(skb, &jmp_table, IPV6);
+ bpf_tail_call_static(skb, &jmp_table, IPV6);
break;
case bpf_htons(ETH_P_MPLS_MC):
case bpf_htons(ETH_P_MPLS_UC):
- bpf_tail_call(skb, &jmp_table, MPLS);
+ bpf_tail_call_static(skb, &jmp_table, MPLS);
break;
case bpf_htons(ETH_P_8021Q):
case bpf_htons(ETH_P_8021AD):
- bpf_tail_call(skb, &jmp_table, VLAN);
+ bpf_tail_call_static(skb, &jmp_table, VLAN);
break;
default:
/* Protocol not supported */
@@ -246,10 +246,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
- bpf_tail_call(skb, &jmp_table, IPV6OP);
+ bpf_tail_call_static(skb, &jmp_table, IPV6OP);
break;
case IPPROTO_FRAGMENT:
- bpf_tail_call(skb, &jmp_table, IPV6FR);
+ bpf_tail_call_static(skb, &jmp_table, IPV6FR);
break;
default:
return parse_ip_proto(skb, nexthdr);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index c196280df90d..6a1255465fd6 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -13,6 +13,12 @@
#define udp6_sock udp6_sock___not_used
#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
+#define bpf_iter__sockmap bpf_iter__sockmap___not_used
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__bpf_map
@@ -26,6 +32,12 @@
#undef udp6_sock
#undef bpf_iter__bpf_map_elem
#undef bpf_iter__bpf_sk_storage_map
+#undef bpf_iter__sockmap
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
struct bpf_iter_meta {
struct seq_file *seq;
@@ -96,3 +108,23 @@ struct bpf_iter__bpf_sk_storage_map {
struct sock *sk;
void *value;
};
+
+struct bpf_iter__sockmap {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ void *key;
+ struct sock *sk;
+};
+
+struct btf_ptr {
+ void *ptr;
+ __u32 type_id;
+ __u32 flags;
+};
+
+enum {
+ BTF_F_COMPACT = (1ULL << 0),
+ BTF_F_NONAME = (1ULL << 1),
+ BTF_F_PTR_RAW = (1ULL << 2),
+ BTF_F_ZERO = (1ULL << 3),
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
new file mode 100644
index 000000000000..f3af0e30cead
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Cloudflare */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 64);
+ __type(key, __u32);
+ __type(value, __u64);
+} sockmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 64);
+ __type(key, __u32);
+ __type(value, __u64);
+} sockhash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 64);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst SEC(".maps");
+
+__u32 elems = 0;
+__u32 socks = 0;
+
+SEC("iter/sockmap")
+int copy(struct bpf_iter__sockmap *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 tmp, *key = ctx->key;
+ int ret;
+
+ if (!key)
+ return 0;
+
+ elems++;
+
+ /* We need a temporary buffer on the stack, since the verifier doesn't
+ * let us use the pointer from the context as an argument to the helper.
+ */
+ tmp = *key;
+
+ if (sk) {
+ socks++;
+ return bpf_map_update_elem(&dst, &tmp, sk, 0) != 0;
+ }
+
+ ret = bpf_map_delete_elem(&dst, &tmp);
+ return ret && ret != -ENOENT;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
new file mode 100644
index 000000000000..a1ddc36f13ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+long tasks = 0;
+long seq_err = 0;
+bool skip = false;
+
+SEC("iter/task")
+int dump_task_struct(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ static struct btf_ptr ptr = { };
+ long ret;
+
+#if __has_builtin(__builtin_btf_type_id)
+ ptr.type_id = bpf_core_type_id_kernel(struct task_struct);
+ ptr.ptr = task;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "Raw BTF task\n");
+
+ ret = bpf_seq_printf_btf(seq, &ptr, sizeof(ptr), 0);
+ switch (ret) {
+ case 0:
+ tasks++;
+ break;
+ case -ERANGE:
+ /* NULL task or task->fs, don't count it as an error. */
+ break;
+ case -E2BIG:
+ return 1;
+ default:
+ seq_err = ret;
+ break;
+ }
+#else
+ skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 8b787baa2654..b2f7c7c5f952 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -6,6 +6,9 @@
char _license[] SEC("license") = "GPL";
+int count = 0;
+int tgid = 0;
+
SEC("iter/task_file")
int dump_task_file(struct bpf_iter__task_file *ctx)
{
@@ -17,8 +20,13 @@ int dump_task_file(struct bpf_iter__task_file *ctx)
if (task == (void *)0 || file == (void *)0)
return 0;
- if (ctx->meta->seq_num == 0)
+ if (ctx->meta->seq_num == 0) {
+ count = 0;
BPF_SEQ_PRINTF(seq, " tgid gid fd file\n");
+ }
+
+ if (tgid == task->tgid && task->tgid != task->pid)
+ count++;
BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
(long)file->f_op);
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
new file mode 100644
index 000000000000..48e62f3f074f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
new file mode 100644
index 000000000000..53e5e5a76888
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
new file mode 100644
index 000000000000..d024fb2ac06e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
new file mode 100644
index 000000000000..9de6595d250c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
new file mode 100644
index 000000000000..f3e9904df9c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___err_ambiguous1 x,
+ struct core_reloc_size___err_ambiguous2 y) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
new file mode 100644
index 000000000000..fc3f69e58c71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
new file mode 100644
index 000000000000..51511648b4ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___all_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
new file mode 100644
index 000000000000..67db3dceb279
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
new file mode 100644
index 000000000000..b357fc65431d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___fn_wrong_args x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
new file mode 100644
index 000000000000..8ddf20d33d9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___incompat x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
new file mode 100644
index 000000000000..abbe5bddcefd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
new file mode 100644
index 000000000000..24e7caf4f013
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id___missing_targets x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_ptr.h b/tools/testing/selftests/bpf/progs/btf_ptr.h
new file mode 100644
index 000000000000..c3c9797c67db
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_ptr.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
+#include "vmlinux.h"
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
+
+struct btf_ptr {
+ void *ptr;
+ __u32 type_id;
+ __u32 flags;
+};
+
+enum {
+ BTF_F_COMPACT = (1ULL << 0),
+ BTF_F_NONAME = (1ULL << 1),
+ BTF_F_PTR_RAW = (1ULL << 2),
+ BTF_F_ZERO = (1ULL << 3),
+};
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index b1b2773c0b9d..a943d394fd3a 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -23,6 +23,10 @@
#define TCP_CA_NAME_MAX 16
#endif
+#ifndef TCP_NOTSENT_LOWAT
+#define TCP_NOTSENT_LOWAT 25
+#endif
+
#ifndef IFNAMSIZ
#define IFNAMSIZ 16
#endif
@@ -128,6 +132,18 @@ static __inline int set_keepalive(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
+{
+ int lowat = 65535;
+
+ if (ctx->type == SOCK_STREAM) {
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
+ return 1;
+ }
+
+ return 0;
+}
+
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
@@ -148,6 +164,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
if (set_keepalive(ctx))
return 0;
+ if (set_notsent_lowat(ctx))
+ return 0;
+
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 69139ed66216..e6e616cb7bc9 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -652,7 +652,7 @@ struct core_reloc_misc_extensible {
};
/*
- * EXISTENCE
+ * FIELD EXISTENCE
*/
struct core_reloc_existence_output {
int a_exists;
@@ -809,3 +809,353 @@ struct core_reloc_size___diff_sz {
void *ptr_field;
enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
};
+
+/* Error case of two candidates with the fields (int_field) at the same
+ * offset, but with differing final relocation values: size 4 vs size 1
+ */
+struct core_reloc_size___err_ambiguous1 {
+ /* int at offset 0 */
+ int int_field;
+
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE___1 = 123 } enum_field;
+};
+
+struct core_reloc_size___err_ambiguous2 {
+ /* char at offset 0 */
+ char int_field;
+
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE___2 = 123 } enum_field;
+};
+
+/*
+ * TYPE EXISTENCE & SIZE
+ */
+struct core_reloc_type_based_output {
+ bool struct_exists;
+ bool union_exists;
+ bool enum_exists;
+ bool typedef_named_struct_exists;
+ bool typedef_anon_struct_exists;
+ bool typedef_struct_ptr_exists;
+ bool typedef_int_exists;
+ bool typedef_enum_exists;
+ bool typedef_void_ptr_exists;
+ bool typedef_func_proto_exists;
+ bool typedef_arr_exists;
+
+ int struct_sz;
+ int union_sz;
+ int enum_sz;
+ int typedef_named_struct_sz;
+ int typedef_anon_struct_sz;
+ int typedef_struct_ptr_sz;
+ int typedef_int_sz;
+ int typedef_enum_sz;
+ int typedef_void_ptr_sz;
+ int typedef_func_proto_sz;
+ int typedef_arr_sz;
+};
+
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+ int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based {
+ struct a_struct f1;
+ union a_union f2;
+ enum an_enum f3;
+ named_struct_typedef f4;
+ anon_struct_typedef f5;
+ struct_ptr_typedef f6;
+ int_typedef f7;
+ enum_typedef f8;
+ void_ptr_typedef f9;
+ func_proto_typedef f10;
+ arr_typedef f11;
+};
+
+/* no types in target */
+struct core_reloc_type_based___all_missing {
+};
+
+/* different type sizes, extra modifiers, anon vs named enums, etc */
+struct a_struct___diff_sz {
+ long x;
+ int y;
+ char z;
+};
+
+union a_union___diff_sz {
+ char yy;
+ char zz;
+};
+
+typedef struct a_struct___diff_sz named_struct_typedef___diff_sz;
+
+typedef struct { long xx, yy, zzz; } anon_struct_typedef___diff_sz;
+
+typedef struct {
+ char aa[1], bb[2], cc[3];
+} *struct_ptr_typedef___diff_sz;
+
+enum an_enum___diff_sz {
+ AN_ENUM_VAL1___diff_sz = 0x123412341234,
+ AN_ENUM_VAL2___diff_sz = 2,
+};
+
+typedef unsigned long int_typedef___diff_sz;
+
+typedef enum an_enum___diff_sz enum_typedef___diff_sz;
+
+typedef const void * const void_ptr_typedef___diff_sz;
+
+typedef int_typedef___diff_sz (*func_proto_typedef___diff_sz)(char);
+
+typedef int arr_typedef___diff_sz[2];
+
+struct core_reloc_type_based___diff_sz {
+ struct a_struct___diff_sz f1;
+ union a_union___diff_sz f2;
+ enum an_enum___diff_sz f3;
+ named_struct_typedef___diff_sz f4;
+ anon_struct_typedef___diff_sz f5;
+ struct_ptr_typedef___diff_sz f6;
+ int_typedef___diff_sz f7;
+ enum_typedef___diff_sz f8;
+ void_ptr_typedef___diff_sz f9;
+ func_proto_typedef___diff_sz f10;
+ arr_typedef___diff_sz f11;
+};
+
+/* incompatibilities between target and local types */
+union a_struct___incompat { /* union instead of struct */
+ int x;
+};
+
+struct a_union___incompat { /* struct instead of union */
+ int y;
+ int z;
+};
+
+/* typedef to union, not to struct */
+typedef union a_struct___incompat named_struct_typedef___incompat;
+
+/* typedef to void pointer, instead of struct */
+typedef void *anon_struct_typedef___incompat;
+
+/* extra pointer indirection */
+typedef struct {
+ int a, b, c;
+} **struct_ptr_typedef___incompat;
+
+/* typedef of a struct with int, instead of int */
+typedef struct { int x; } int_typedef___incompat;
+
+/* typedef to func_proto, instead of enum */
+typedef int (*enum_typedef___incompat)(void);
+
+/* pointer to char instead of void */
+typedef char *void_ptr_typedef___incompat;
+
+/* void return type instead of int */
+typedef void (*func_proto_typedef___incompat)(long);
+
+/* multi-dimensional array instead of a single-dimensional */
+typedef int arr_typedef___incompat[20][2];
+
+struct core_reloc_type_based___incompat {
+ union a_struct___incompat f1;
+ struct a_union___incompat f2;
+ /* the only valid one is enum, to check that something still succeeds */
+ enum an_enum f3;
+ named_struct_typedef___incompat f4;
+ anon_struct_typedef___incompat f5;
+ struct_ptr_typedef___incompat f6;
+ int_typedef___incompat f7;
+ enum_typedef___incompat f8;
+ void_ptr_typedef___incompat f9;
+ func_proto_typedef___incompat f10;
+ arr_typedef___incompat f11;
+};
+
+/* func_proto with incompatible signature */
+typedef void (*func_proto_typedef___fn_wrong_ret1)(long);
+typedef int * (*func_proto_typedef___fn_wrong_ret2)(long);
+typedef struct { int x; } int_struct_typedef;
+typedef int_struct_typedef (*func_proto_typedef___fn_wrong_ret3)(long);
+typedef int (*func_proto_typedef___fn_wrong_arg)(void *);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt1)(long, long);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt2)(void);
+
+struct core_reloc_type_based___fn_wrong_args {
+ /* one valid type to make sure relos still work */
+ struct a_struct f1;
+ func_proto_typedef___fn_wrong_ret1 f2;
+ func_proto_typedef___fn_wrong_ret2 f3;
+ func_proto_typedef___fn_wrong_ret3 f4;
+ func_proto_typedef___fn_wrong_arg f5;
+ func_proto_typedef___fn_wrong_arg_cnt1 f6;
+ func_proto_typedef___fn_wrong_arg_cnt2 f7;
+};
+
+/*
+ * TYPE ID MAPPING (LOCAL AND TARGET)
+ */
+struct core_reloc_type_id_output {
+ int local_anon_struct;
+ int local_anon_union;
+ int local_anon_enum;
+ int local_anon_func_proto_ptr;
+ int local_anon_void_ptr;
+ int local_anon_arr;
+
+ int local_struct;
+ int local_union;
+ int local_enum;
+ int local_int;
+ int local_struct_typedef;
+ int local_func_proto_typedef;
+ int local_arr_typedef;
+
+ int targ_struct;
+ int targ_union;
+ int targ_enum;
+ int targ_int;
+ int targ_struct_typedef;
+ int targ_func_proto_typedef;
+ int targ_arr_typedef;
+};
+
+struct core_reloc_type_id {
+ struct a_struct f1;
+ union a_union f2;
+ enum an_enum f3;
+ named_struct_typedef f4;
+ func_proto_typedef f5;
+ arr_typedef f6;
+};
+
+struct core_reloc_type_id___missing_targets {
+ /* nothing */
+};
+
+/*
+ * ENUMERATOR VALUE EXISTENCE AND VALUE RELOCATION
+ */
+struct core_reloc_enumval_output {
+ bool named_val1_exists;
+ bool named_val2_exists;
+ bool named_val3_exists;
+ bool anon_val1_exists;
+ bool anon_val2_exists;
+ bool anon_val3_exists;
+
+ int named_val1;
+ int named_val2;
+ int anon_val1;
+ int anon_val2;
+};
+
+enum named_enum {
+ NAMED_ENUM_VAL1 = 1,
+ NAMED_ENUM_VAL2 = 2,
+ NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1 = 0x10,
+ ANON_ENUM_VAL2 = 0x20,
+ ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval {
+ enum named_enum f1;
+ anon_enum f2;
+};
+
+/* differing enumerator values */
+enum named_enum___diff {
+ NAMED_ENUM_VAL1___diff = 101,
+ NAMED_ENUM_VAL2___diff = 202,
+ NAMED_ENUM_VAL3___diff = 303,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___diff = 0x11,
+ ANON_ENUM_VAL2___diff = 0x22,
+ ANON_ENUM_VAL3___diff = 0x33,
+} anon_enum___diff;
+
+struct core_reloc_enumval___diff {
+ enum named_enum___diff f1;
+ anon_enum___diff f2;
+};
+
+/* missing (optional) third enum value */
+enum named_enum___val3_missing {
+ NAMED_ENUM_VAL1___val3_missing = 111,
+ NAMED_ENUM_VAL2___val3_missing = 222,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___val3_missing = 0x111,
+ ANON_ENUM_VAL2___val3_missing = 0x222,
+} anon_enum___val3_missing;
+
+struct core_reloc_enumval___val3_missing {
+ enum named_enum___val3_missing f1;
+ anon_enum___val3_missing f2;
+};
+
+/* missing (mandatory) second enum value, should fail */
+enum named_enum___err_missing {
+ NAMED_ENUM_VAL1___err_missing = 1,
+ NAMED_ENUM_VAL3___err_missing = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___err_missing = 0x111,
+ ANON_ENUM_VAL3___err_missing = 0x222,
+} anon_enum___err_missing;
+
+struct core_reloc_enumval___err_missing {
+ enum named_enum___err_missing f1;
+ anon_enum___err_missing f2;
+};
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 98e1efe14549..49a84a3a2306 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <linux/stddef.h>
+#include <linux/if_ether.h>
#include <linux/ipv6.h>
#include <linux/bpf.h>
+#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_tracing.h>
@@ -151,4 +153,29 @@ int new_get_constant(long val)
test_get_constant = 1;
return test_get_constant; /* original get_constant() returns val - 122 */
}
+
+__u64 test_pkt_write_access_subprog = 0;
+SEC("freplace/test_pkt_write_access_subprog")
+int new_test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct tcphdr *tcp;
+
+ if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+ return -1;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return -1;
+
+ /* make modifications to the packet data */
+ tcp->check++;
+ tcp->syn = 0;
+
+ test_pkt_write_access_subprog = 1;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c
new file mode 100644
index 000000000000..c8943ccee6c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+volatile __u64 test_fmod_ret = 0;
+SEC("fmod_ret/security_new_get_constant")
+int BPF_PROG(fmod_ret_test, long val, int ret)
+{
+ test_fmod_ret = 1;
+ return 120;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
new file mode 100644
index 000000000000..bb2a77c5b62b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define VAR_NUM 2
+
+struct hmap_elem {
+ struct bpf_spin_lock lock;
+ int var[VAR_NUM];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct hmap_elem);
+} hash_map SEC(".maps");
+
+SEC("freplace/handle_kprobe")
+int new_handle_kprobe(struct pt_regs *ctx)
+{
+ struct hmap_elem zero = {}, *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&hash_map, &key);
+ if (!val)
+ return 1;
+ /* spin_lock in hash map */
+ bpf_spin_lock(&val->lock);
+ val->var[0] = 99;
+ bpf_spin_unlock(&val->lock);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
new file mode 100644
index 000000000000..68a5a9db928a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_map_def SEC("maps") sock_map = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2,
+};
+
+SEC("freplace/cls_redirect")
+int freplace_cls_redirect_test(struct __sk_buff *skb)
+{
+ int ret = 0;
+ const int zero = 0;
+ struct bpf_sock *sk;
+
+ sk = bpf_map_lookup_elem(&sock_map, &zero);
+ if (!sk)
+ return TC_ACT_SHOT;
+
+ ret = bpf_map_update_elem(&sock_map, &zero, sk, 0);
+ bpf_sk_release(sk);
+
+ return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
new file mode 100644
index 000000000000..544e5ac90461
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/ipv6.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+SEC("freplace/connect_v4_prog")
+int new_connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ // return value thats in invalid range
+ return 255;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_get_constant.c b/tools/testing/selftests/bpf/progs/freplace_get_constant.c
new file mode 100644
index 000000000000..705e4b64dfc2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_get_constant.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+volatile __u64 test_get_constant = 0;
+SEC("freplace/get_constant")
+int security_new_get_constant(long val)
+{
+ if (val != 123)
+ return 0;
+ test_get_constant = 1;
+ return test_get_constant; /* original get_constant() returns val - 122 */
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
new file mode 100644
index 000000000000..0758ba229ae0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define DUMMY_STORAGE_VALUE 0xdeadbeef
+
+int monitored_pid = 0;
+int inode_storage_result = -1;
+int sk_storage_result = -1;
+
+struct dummy_storage {
+ __u32 value;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct dummy_storage);
+} inode_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct dummy_storage);
+} sk_storage_map SEC(".maps");
+
+/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed.
+ */
+struct sock {} __attribute__((preserve_access_index));
+struct sockaddr {} __attribute__((preserve_access_index));
+struct socket {
+ struct sock *sk;
+} __attribute__((preserve_access_index));
+
+struct inode {} __attribute__((preserve_access_index));
+struct dentry {
+ struct inode *d_inode;
+} __attribute__((preserve_access_index));
+struct file {
+ struct inode *f_inode;
+} __attribute__((preserve_access_index));
+
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (storage->value == DUMMY_STORAGE_VALUE)
+ inode_storage_result = -1;
+
+ inode_storage_result =
+ bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+
+ return 0;
+}
+
+SEC("lsm/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (storage->value == DUMMY_STORAGE_VALUE)
+ sk_storage_result = -1;
+
+ sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+ return 0;
+}
+
+SEC("lsm/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ storage->value = DUMMY_STORAGE_VALUE;
+
+ return 0;
+}
+
+SEC("lsm/file_open")
+int BPF_PROG(file_open, struct file *file)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ if (!file->f_inode)
+ return 0;
+
+ storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ storage->value = DUMMY_STORAGE_VALUE;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index b4598d4bc4f7..ff4d343b94b5 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -9,6 +9,27 @@
#include <bpf/bpf_tracing.h>
#include <errno.h>
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} lru_hash SEC(".maps");
+
char _license[] SEC("license") = "GPL";
int monitored_pid = 0;
@@ -36,13 +57,54 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
return ret;
}
-SEC("lsm/bprm_committed_creds")
+SEC("lsm.s/bprm_committed_creds")
int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
+ char args[64];
+ __u32 key = 0;
+ __u64 *value;
if (monitored_pid == pid)
bprm_count++;
+ bpf_copy_from_user(args, sizeof(args), (void *)bprm->vma->vm_mm->arg_start);
+ bpf_copy_from_user(args, sizeof(args), (void *)bprm->mm->arg_start);
+
+ value = bpf_map_lookup_elem(&array, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&hash, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&lru_hash, &key);
+ if (value)
+ *value = 0;
+
+ return 0;
+}
+SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */
+int BPF_PROG(test_task_free, struct task_struct *task)
+{
+ return 0;
+}
+
+int copy_test = 0;
+
+SEC("fentry.s/__x64_sys_setdomainname")
+int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
+{
+ void *ptr = (void *)PT_REGS_PARM1(regs);
+ int len = PT_REGS_PARM2(regs);
+ int buf = 0;
+ long ret;
+
+ ret = bpf_copy_from_user(&buf, sizeof(buf), ptr);
+ if (len == -2 && ret == 0 && buf == 1234)
+ copy_test++;
+ if (len == -3 && ret == -EFAULT)
+ copy_test++;
+ if (len == -4 && ret == -EFAULT)
+ copy_test++;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index 473665cac67e..c325405751e2 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -82,6 +82,14 @@ static inline int check_default(struct bpf_map *indirect,
return 1;
}
+static __noinline int
+check_default_noinline(struct bpf_map *indirect, struct bpf_map *direct)
+{
+ VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+ MAX_ENTRIES));
+ return 1;
+}
+
typedef struct {
int counter;
} atomic_t;
@@ -107,7 +115,7 @@ static inline int check_hash(void)
struct bpf_map *map = (struct bpf_map *)&m_hash;
int i;
- VERIFY(check_default(&hash->map, map));
+ VERIFY(check_default_noinline(&hash->map, map));
VERIFY(hash->n_buckets == MAX_ENTRIES);
VERIFY(hash->elem_size == 64);
@@ -589,7 +597,7 @@ static inline int check_stack(void)
return 1;
}
-struct bpf_sk_storage_map {
+struct bpf_local_storage_map {
struct bpf_map map;
} __attribute__((preserve_access_index));
@@ -602,8 +610,8 @@ struct {
static inline int check_sk_storage(void)
{
- struct bpf_sk_storage_map *sk_storage =
- (struct bpf_sk_storage_map *)&m_sk_storage;
+ struct bpf_local_storage_map *sk_storage =
+ (struct bpf_local_storage_map *)&m_sk_storage;
struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
diff --git a/tools/testing/selftests/bpf/progs/metadata_unused.c b/tools/testing/selftests/bpf/progs/metadata_unused.c
new file mode 100644
index 000000000000..672a0d19f8d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_unused.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "foo";
+volatile const int bpf_metadata_b SEC(".rodata") = 1;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/metadata_used.c b/tools/testing/selftests/bpf/progs/metadata_used.c
new file mode 100644
index 000000000000..b7198e65383d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_used.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "bar";
+volatile const int bpf_metadata_b SEC(".rodata") = 2;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+ return bpf_metadata_b ? 1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
new file mode 100644
index 000000000000..6b670039ea67
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include "btf_ptr.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+long ret = 0;
+int num_subtests = 0;
+int ran_subtests = 0;
+bool skip = false;
+
+#define STRSIZE 2048
+#define EXPECTED_STRSIZE 256
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, char[STRSIZE]);
+} strdata SEC(".maps");
+
+static int __strncmp(const void *m1, const void *m2, size_t len)
+{
+ const unsigned char *s1 = m1;
+ const unsigned char *s2 = m2;
+ int i, delta = 0;
+
+ for (i = 0; i < len; i++) {
+ delta = s1[i] - s2[i];
+ if (delta || s1[i] == 0 || s2[i] == 0)
+ break;
+ }
+ return delta;
+}
+
+#if __has_builtin(__builtin_btf_type_id)
+#define TEST_BTF(_str, _type, _flags, _expected, ...) \
+ do { \
+ static const char _expectedval[EXPECTED_STRSIZE] = \
+ _expected; \
+ static const char _ptrtype[64] = #_type; \
+ __u64 _hflags = _flags | BTF_F_COMPACT; \
+ static _type _ptrdata = __VA_ARGS__; \
+ static struct btf_ptr _ptr = { }; \
+ int _cmp; \
+ \
+ ++num_subtests; \
+ if (ret < 0) \
+ break; \
+ ++ran_subtests; \
+ _ptr.ptr = &_ptrdata; \
+ _ptr.type_id = bpf_core_type_id_kernel(_type); \
+ if (_ptr.type_id <= 0) { \
+ ret = -EINVAL; \
+ break; \
+ } \
+ ret = bpf_snprintf_btf(_str, STRSIZE, \
+ &_ptr, sizeof(_ptr), _hflags); \
+ if (ret) \
+ break; \
+ _cmp = __strncmp(_str, _expectedval, EXPECTED_STRSIZE); \
+ if (_cmp != 0) { \
+ bpf_printk("(%d) got %s", _cmp, _str); \
+ bpf_printk("(%d) expected %s", _cmp, \
+ _expectedval); \
+ ret = -EBADMSG; \
+ break; \
+ } \
+ } while (0)
+#endif
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_C(_str, _type, _flags, ...) \
+ TEST_BTF(_str, _type, _flags, "(" #_type ")" #__VA_ARGS__, \
+ __VA_ARGS__)
+
+/* TRACE_EVENT(netif_receive_skb,
+ * TP_PROTO(struct sk_buff *skb),
+ */
+SEC("tp_btf/netif_receive_skb")
+int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
+{
+ static __u64 flags[] = { 0, BTF_F_COMPACT, BTF_F_ZERO, BTF_F_PTR_RAW,
+ BTF_F_NONAME, BTF_F_COMPACT | BTF_F_ZERO |
+ BTF_F_PTR_RAW | BTF_F_NONAME };
+ static struct btf_ptr p = { };
+ __u32 key = 0;
+ int i, __ret;
+ char *str;
+
+#if __has_builtin(__builtin_btf_type_id)
+ str = bpf_map_lookup_elem(&strdata, &key);
+ if (!str)
+ return 0;
+
+ /* Ensure we can write skb string representation */
+ p.type_id = bpf_core_type_id_kernel(struct sk_buff);
+ p.ptr = skb;
+ for (i = 0; i < ARRAY_SIZE(flags); i++) {
+ ++num_subtests;
+ ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+ if (ret < 0)
+ bpf_printk("returned %d when writing skb", ret);
+ ++ran_subtests;
+ }
+
+ /* Check invalid ptr value */
+ p.ptr = 0;
+ __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+ if (__ret >= 0) {
+ bpf_printk("printing NULL should generate error, got (%d)",
+ __ret);
+ ret = -ERANGE;
+ }
+
+ /* Verify type display for various types. */
+
+ /* simple int */
+ TEST_BTF_C(str, int, 0, 1234);
+ TEST_BTF(str, int, BTF_F_NONAME, "1234", 1234);
+ /* zero value should be printed at toplevel */
+ TEST_BTF(str, int, 0, "(int)0", 0);
+ TEST_BTF(str, int, BTF_F_NONAME, "0", 0);
+ TEST_BTF(str, int, BTF_F_ZERO, "(int)0", 0);
+ TEST_BTF(str, int, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+ TEST_BTF_C(str, int, 0, -4567);
+ TEST_BTF(str, int, BTF_F_NONAME, "-4567", -4567);
+
+ /* simple char */
+ TEST_BTF_C(str, char, 0, 100);
+ TEST_BTF(str, char, BTF_F_NONAME, "100", 100);
+ /* zero value should be printed at toplevel */
+ TEST_BTF(str, char, 0, "(char)0", 0);
+ TEST_BTF(str, char, BTF_F_NONAME, "0", 0);
+ TEST_BTF(str, char, BTF_F_ZERO, "(char)0", 0);
+ TEST_BTF(str, char, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+
+ /* simple typedef */
+ TEST_BTF_C(str, uint64_t, 0, 100);
+ TEST_BTF(str, u64, BTF_F_NONAME, "1", 1);
+ /* zero value should be printed at toplevel */
+ TEST_BTF(str, u64, 0, "(u64)0", 0);
+ TEST_BTF(str, u64, BTF_F_NONAME, "0", 0);
+ TEST_BTF(str, u64, BTF_F_ZERO, "(u64)0", 0);
+ TEST_BTF(str, u64, BTF_F_NONAME|BTF_F_ZERO, "0", 0);
+
+ /* typedef struct */
+ TEST_BTF_C(str, atomic_t, 0, {.counter = (int)1,});
+ TEST_BTF(str, atomic_t, BTF_F_NONAME, "{1,}", {.counter = 1,});
+ /* typedef with 0 value should be printed at toplevel */
+ TEST_BTF(str, atomic_t, 0, "(atomic_t){}", {.counter = 0,});
+ TEST_BTF(str, atomic_t, BTF_F_NONAME, "{}", {.counter = 0,});
+ TEST_BTF(str, atomic_t, BTF_F_ZERO, "(atomic_t){.counter = (int)0,}",
+ {.counter = 0,});
+ TEST_BTF(str, atomic_t, BTF_F_NONAME|BTF_F_ZERO,
+ "{0,}", {.counter = 0,});
+
+ /* enum where enum value does (and does not) exist */
+ TEST_BTF_C(str, enum bpf_cmd, 0, BPF_MAP_CREATE);
+ TEST_BTF(str, enum bpf_cmd, 0, "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+ "BPF_MAP_CREATE", 0);
+
+ TEST_BTF(str, enum bpf_cmd, BTF_F_ZERO, "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+ "BPF_MAP_CREATE", BPF_MAP_CREATE);
+ TEST_BTF_C(str, enum bpf_cmd, 0, 2000);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "2000", 2000);
+
+ /* simple struct */
+ TEST_BTF_C(str, struct btf_enum, 0,
+ {.name_off = (__u32)3,.val = (__s32)-1,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{3,-1,}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{-1,}",
+ { .name_off = 0, .val = -1,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME|BTF_F_ZERO, "{0,-1,}",
+ { .name_off = 0, .val = -1,});
+ /* empty struct should be printed */
+ TEST_BTF(str, struct btf_enum, 0, "(struct btf_enum){}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF(str, struct btf_enum, BTF_F_ZERO,
+ "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+ { .name_off = 0, .val = 0,});
+
+ /* struct with pointers */
+ TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+ "(struct list_head){.next = (struct list_head *)0x0000000000000001,}",
+ { .next = (struct list_head *)1 });
+ /* NULL pointer should not be displayed */
+ TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+ "(struct list_head){}",
+ { .next = (struct list_head *)0 });
+
+ /* struct with char array */
+ TEST_BTF(str, struct bpf_prog_info, 0,
+ "(struct bpf_prog_info){.name = (char[])['f','o','o',],}",
+ { .name = "foo",});
+ TEST_BTF(str, struct bpf_prog_info, BTF_F_NONAME,
+ "{['f','o','o',],}",
+ {.name = "foo",});
+ /* leading null char means do not display string */
+ TEST_BTF(str, struct bpf_prog_info, 0,
+ "(struct bpf_prog_info){}",
+ {.name = {'\0', 'f', 'o', 'o'}});
+ /* handle non-printable characters */
+ TEST_BTF(str, struct bpf_prog_info, 0,
+ "(struct bpf_prog_info){.name = (char[])[1,2,3,],}",
+ { .name = {1, 2, 3, 0}});
+
+ /* struct with non-char array */
+ TEST_BTF(str, struct __sk_buff, 0,
+ "(struct __sk_buff){.cb = (__u32[])[1,2,3,4,5,],}",
+ { .cb = {1, 2, 3, 4, 5,},});
+ TEST_BTF(str, struct __sk_buff, BTF_F_NONAME,
+ "{[1,2,3,4,5,],}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ /* For non-char, arrays, show non-zero values only */
+ TEST_BTF(str, struct __sk_buff, 0,
+ "(struct __sk_buff){.cb = (__u32[])[1,],}",
+ { .cb = { 0, 0, 1, 0, 0},});
+
+ /* struct with bitfields */
+ TEST_BTF_C(str, struct bpf_insn, 0,
+ {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+ TEST_BTF(str, struct bpf_insn, BTF_F_NONAME, "{1,0x2,0x3,4,5,}",
+ {.code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+ .imm = 5,});
+#else
+ skip = true;
+#endif
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h
new file mode 100644
index 000000000000..3bac4fdd4bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler.h
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#pragma once
+
+#define TASK_COMM_LEN 16
+#define MAX_ANCESTORS 4
+#define MAX_PATH 256
+#define KILL_TARGET_LEN 64
+#define CTL_MAXNAME 10
+#define MAX_ARGS_LEN 4096
+#define MAX_FILENAME_LEN 512
+#define MAX_ENVIRON_LEN 8192
+#define MAX_PATH_DEPTH 32
+#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH)
+#define MAX_CGROUPS_PATH_DEPTH 8
+
+#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN
+
+#define MAX_CGROUP_PAYLOAD_LEN \
+ (MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH))
+
+#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
+
+#define MAX_SYSCTL_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH)
+
+#define MAX_KILL_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \
+ KILL_TARGET_LEN)
+
+#define MAX_EXEC_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \
+ MAX_ARGS_LEN + MAX_ENVIRON_LEN)
+
+#define MAX_FILEMOD_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \
+ MAX_FILEPATH_LENGTH)
+
+enum data_type {
+ INVALID_EVENT,
+ EXEC_EVENT,
+ FORK_EVENT,
+ KILL_EVENT,
+ SYSCTL_EVENT,
+ FILEMOD_EVENT,
+ MAX_DATA_TYPE_EVENT
+};
+
+enum filemod_type {
+ FMOD_OPEN,
+ FMOD_LINK,
+ FMOD_SYMLINK,
+};
+
+struct ancestors_data_t {
+ pid_t ancestor_pids[MAX_ANCESTORS];
+ uint32_t ancestor_exec_ids[MAX_ANCESTORS];
+ uint64_t ancestor_start_times[MAX_ANCESTORS];
+ uint32_t num_ancestors;
+};
+
+struct var_metadata_t {
+ enum data_type type;
+ pid_t pid;
+ uint32_t exec_id;
+ uid_t uid;
+ gid_t gid;
+ uint64_t start_time;
+ uint32_t cpu_id;
+ uint64_t bpf_stats_num_perf_events;
+ uint64_t bpf_stats_start_ktime_ns;
+ uint8_t comm_length;
+};
+
+struct cgroup_data_t {
+ ino_t cgroup_root_inode;
+ ino_t cgroup_proc_inode;
+ uint64_t cgroup_root_mtime;
+ uint64_t cgroup_proc_mtime;
+ uint16_t cgroup_root_length;
+ uint16_t cgroup_proc_length;
+ uint16_t cgroup_full_length;
+ int cgroup_full_path_root_pos;
+};
+
+struct var_sysctl_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ struct ancestors_data_t ancestors_info;
+ uint8_t sysctl_val_length;
+ uint16_t sysctl_path_length;
+ char payload[MAX_SYSCTL_PAYLOAD_LEN];
+};
+
+struct var_kill_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ struct ancestors_data_t ancestors_info;
+ pid_t kill_target_pid;
+ int kill_sig;
+ uint32_t kill_count;
+ uint64_t last_kill_time;
+ uint8_t kill_target_name_length;
+ uint8_t kill_target_cgroup_proc_length;
+ char payload[MAX_KILL_PAYLOAD_LEN];
+ size_t payload_length;
+};
+
+struct var_exec_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ pid_t parent_pid;
+ uint32_t parent_exec_id;
+ uid_t parent_uid;
+ uint64_t parent_start_time;
+ uint16_t bin_path_length;
+ uint16_t cmdline_length;
+ uint16_t environment_length;
+ char payload[MAX_EXEC_PAYLOAD_LEN];
+};
+
+struct var_fork_data_t {
+ struct var_metadata_t meta;
+ pid_t parent_pid;
+ uint32_t parent_exec_id;
+ uint64_t parent_start_time;
+ char payload[MAX_METADATA_PAYLOAD_LEN];
+};
+
+struct var_filemod_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ enum filemod_type fmod_type;
+ unsigned int dst_flags;
+ uint32_t src_device_id;
+ uint32_t dst_device_id;
+ ino_t src_inode;
+ ino_t dst_inode;
+ uint16_t src_filepath_length;
+ uint16_t dst_filepath_length;
+ char payload[MAX_FILEMOD_PAYLOAD_LEN];
+};
+
+struct profiler_config_struct {
+ bool fetch_cgroups_from_bpf;
+ ino_t cgroup_fs_inode;
+ ino_t cgroup_login_session_inode;
+ uint64_t kill_signals_mask;
+ ino_t inode_filter;
+ uint32_t stale_info_secs;
+ bool use_variable_buffers;
+ bool read_environ_from_exec;
+ bool enable_cgroup_v1_resolver;
+};
+
+struct bpf_func_stats_data {
+ uint64_t time_elapsed_ns;
+ uint64_t num_executions;
+ uint64_t num_perf_events;
+};
+
+struct bpf_func_stats_ctx {
+ uint64_t start_time_ns;
+ struct bpf_func_stats_data* bpf_func_stats_data_val;
+};
+
+enum bpf_function_id {
+ profiler_bpf_proc_sys_write,
+ profiler_bpf_sched_process_exec,
+ profiler_bpf_sched_process_exit,
+ profiler_bpf_sys_enter_kill,
+ profiler_bpf_do_filp_open_ret,
+ profiler_bpf_sched_process_fork,
+ profiler_bpf_vfs_link,
+ profiler_bpf_vfs_symlink,
+ profiler_bpf_max_function_id
+};
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
new file mode 100644
index 000000000000..00578311a423
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -0,0 +1,969 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "profiler.h"
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define O_WRONLY 00000001
+#define O_RDWR 00000002
+#define O_DIRECTORY 00200000
+#define __O_TMPFILE 020000000
+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+#define MAX_ERRNO 4095
+#define S_IFMT 00170000
+#define S_IFSOCK 0140000
+#define S_IFLNK 0120000
+#define S_IFREG 0100000
+#define S_IFBLK 0060000
+#define S_IFDIR 0040000
+#define S_IFCHR 0020000
+#define S_IFIFO 0010000
+#define S_ISUID 0004000
+#define S_ISGID 0002000
+#define S_ISVTX 0001000
+#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
+#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
+#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
+#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
+#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
+#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
+#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
+
+#define KILL_DATA_ARRAY_SIZE 8
+
+struct var_kill_data_arr_t {
+ struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
+};
+
+union any_profiler_data_t {
+ struct var_exec_data_t var_exec;
+ struct var_kill_data_t var_kill;
+ struct var_sysctl_data_t var_sysctl;
+ struct var_filemod_data_t var_filemod;
+ struct var_fork_data_t var_fork;
+ struct var_kill_data_arr_t var_kill_data_arr;
+};
+
+volatile struct profiler_config_struct bpf_config = {};
+
+#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
+#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
+#define CGROUP_LOGIN_SESSION_INODE \
+ (bpf_config.cgroup_login_session_inode)
+#define KILL_SIGNALS (bpf_config.kill_signals_mask)
+#define STALE_INFO (bpf_config.stale_info_secs)
+#define INODE_FILTER (bpf_config.inode_filter)
+#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
+#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
+
+struct kernfs_iattrs___52 {
+ struct iattr ia_iattr;
+};
+
+struct kernfs_node___52 {
+ union /* kernfs_node_id */ {
+ struct {
+ u32 ino;
+ u32 generation;
+ };
+ u64 id;
+ } id;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, union any_profiler_data_t);
+} data_heap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, KILL_DATA_ARRAY_SIZE);
+ __type(key, u32);
+ __type(value, struct var_kill_data_arr_t);
+} var_tpid_to_data SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, profiler_bpf_max_function_id);
+ __type(key, u32);
+ __type(value, struct bpf_func_stats_data);
+} bpf_func_stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, bool);
+ __uint(max_entries, 16);
+} allowed_devices SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, bool);
+ __uint(max_entries, 1024);
+} allowed_file_inodes SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, bool);
+ __uint(max_entries, 1024);
+} allowed_directory_inodes SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, bool);
+ __uint(max_entries, 16);
+} disallowed_exec_inodes SEC(".maps");
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+#endif
+
+static INLINE bool IS_ERR(const void* ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static INLINE u32 get_userspace_pid()
+{
+ return bpf_get_current_pid_tgid() >> 32;
+}
+
+static INLINE bool is_init_process(u32 tgid)
+{
+ return tgid == 1 || tgid == 0;
+}
+
+static INLINE unsigned long
+probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
+{
+ len = len < max ? len : max;
+ if (len > 1) {
+ if (bpf_probe_read(dst, len, src))
+ return 0;
+ } else if (len == 1) {
+ if (bpf_probe_read(dst, 1, src))
+ return 0;
+ }
+ return len;
+}
+
+static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
+ int spid)
+{
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
+ if (arr_struct->array[i].meta.pid == spid)
+ return i;
+ return -1;
+}
+
+static INLINE void populate_ancestors(struct task_struct* task,
+ struct ancestors_data_t* ancestors_data)
+{
+ struct task_struct* parent = task;
+ u32 num_ancestors, ppid;
+
+ ancestors_data->num_ancestors = 0;
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
+ parent = BPF_CORE_READ(parent, real_parent);
+ if (parent == NULL)
+ break;
+ ppid = BPF_CORE_READ(parent, tgid);
+ if (is_init_process(ppid))
+ break;
+ ancestors_data->ancestor_pids[num_ancestors] = ppid;
+ ancestors_data->ancestor_exec_ids[num_ancestors] =
+ BPF_CORE_READ(parent, self_exec_id);
+ ancestors_data->ancestor_start_times[num_ancestors] =
+ BPF_CORE_READ(parent, start_time);
+ ancestors_data->num_ancestors = num_ancestors;
+ }
+}
+
+static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
+ struct kernfs_node* cgroup_root_node,
+ void* payload,
+ int* root_pos)
+{
+ void* payload_start = payload;
+ size_t filepart_length;
+
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
+ filepart_length =
+ bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
+ if (!cgroup_node)
+ return payload;
+ if (cgroup_node == cgroup_root_node)
+ *root_pos = payload - payload_start;
+ if (filepart_length <= MAX_PATH) {
+ barrier_var(filepart_length);
+ payload += filepart_length;
+ }
+ cgroup_node = BPF_CORE_READ(cgroup_node, parent);
+ }
+ return payload;
+}
+
+static ino_t get_inode_from_kernfs(struct kernfs_node* node)
+{
+ struct kernfs_node___52* node52 = (void*)node;
+
+ if (bpf_core_field_exists(node52->id.ino)) {
+ barrier_var(node52);
+ return BPF_CORE_READ(node52, id.ino);
+ } else {
+ barrier_var(node);
+ return (u64)BPF_CORE_READ(node, id);
+ }
+}
+
+int pids_cgrp_id = 1;
+
+static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
+ struct task_struct* task,
+ void* payload)
+{
+ struct kernfs_node* root_kernfs =
+ BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
+ struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
+
+ if (ENABLE_CGROUP_V1_RESOLVER) {
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys_state* subsys =
+ BPF_CORE_READ(task, cgroups, subsys[i]);
+ if (subsys != NULL) {
+ int subsys_id = BPF_CORE_READ(subsys, ss, id);
+ if (subsys_id == pids_cgrp_id) {
+ proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
+ root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
+ break;
+ }
+ }
+ }
+ }
+
+ cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
+ cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
+
+ if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
+ cgroup_data->cgroup_root_mtime =
+ BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
+ cgroup_data->cgroup_proc_mtime =
+ BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
+ } else {
+ struct kernfs_iattrs___52* root_iattr =
+ (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
+ cgroup_data->cgroup_root_mtime =
+ BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
+
+ struct kernfs_iattrs___52* proc_iattr =
+ (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
+ cgroup_data->cgroup_proc_mtime =
+ BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
+ }
+
+ cgroup_data->cgroup_root_length = 0;
+ cgroup_data->cgroup_proc_length = 0;
+ cgroup_data->cgroup_full_length = 0;
+
+ size_t cgroup_root_length =
+ bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
+ barrier_var(cgroup_root_length);
+ if (cgroup_root_length <= MAX_PATH) {
+ barrier_var(cgroup_root_length);
+ cgroup_data->cgroup_root_length = cgroup_root_length;
+ payload += cgroup_root_length;
+ }
+
+ size_t cgroup_proc_length =
+ bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
+ barrier_var(cgroup_proc_length);
+ if (cgroup_proc_length <= MAX_PATH) {
+ barrier_var(cgroup_proc_length);
+ cgroup_data->cgroup_proc_length = cgroup_proc_length;
+ payload += cgroup_proc_length;
+ }
+
+ if (FETCH_CGROUPS_FROM_BPF) {
+ cgroup_data->cgroup_full_path_root_pos = -1;
+ void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
+ &cgroup_data->cgroup_full_path_root_pos);
+ cgroup_data->cgroup_full_length = payload_end_pos - payload;
+ payload = payload_end_pos;
+ }
+
+ return (void*)payload;
+}
+
+static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
+ struct task_struct* task,
+ u32 pid, void* payload)
+{
+ u64 uid_gid = bpf_get_current_uid_gid();
+
+ metadata->uid = (u32)uid_gid;
+ metadata->gid = uid_gid >> 32;
+ metadata->pid = pid;
+ metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
+ metadata->start_time = BPF_CORE_READ(task, start_time);
+ metadata->comm_length = 0;
+
+ size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
+ barrier_var(comm_length);
+ if (comm_length <= TASK_COMM_LEN) {
+ barrier_var(comm_length);
+ metadata->comm_length = comm_length;
+ payload += comm_length;
+ }
+
+ return (void*)payload;
+}
+
+static INLINE struct var_kill_data_t*
+get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
+{
+ int zero = 0;
+ struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
+
+ if (kill_data == NULL)
+ return NULL;
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
+ payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
+ size_t payload_length = payload - (void*)kill_data->payload;
+ kill_data->payload_length = payload_length;
+ populate_ancestors(task, &kill_data->ancestors_info);
+ kill_data->meta.type = KILL_EVENT;
+ kill_data->kill_target_pid = tpid;
+ kill_data->kill_sig = sig;
+ kill_data->kill_count = 1;
+ kill_data->last_kill_time = bpf_ktime_get_ns();
+ return kill_data;
+}
+
+static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
+{
+ if ((KILL_SIGNALS & (1ULL << sig)) == 0)
+ return 0;
+
+ u32 spid = get_userspace_pid();
+ struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
+
+ if (arr_struct == NULL) {
+ struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
+ int zero = 0;
+
+ if (kill_data == NULL)
+ return 0;
+ arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
+ if (arr_struct == NULL)
+ return 0;
+ bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
+ } else {
+ int index = get_var_spid_index(arr_struct, spid);
+
+ if (index == -1) {
+ struct var_kill_data_t* kill_data =
+ get_var_kill_data(ctx, spid, tpid, sig);
+ if (kill_data == NULL)
+ return 0;
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
+ if (arr_struct->array[i].meta.pid == 0) {
+ bpf_probe_read(&arr_struct->array[i],
+ sizeof(arr_struct->array[i]), kill_data);
+ bpf_map_update_elem(&var_tpid_to_data, &tpid,
+ arr_struct, 0);
+
+ return 0;
+ }
+ return 0;
+ }
+
+ struct var_kill_data_t* kill_data = &arr_struct->array[index];
+
+ u64 delta_sec =
+ (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
+
+ if (delta_sec < STALE_INFO) {
+ kill_data->kill_count++;
+ kill_data->last_kill_time = bpf_ktime_get_ns();
+ bpf_probe_read(&arr_struct->array[index],
+ sizeof(arr_struct->array[index]),
+ kill_data);
+ } else {
+ struct var_kill_data_t* kill_data =
+ get_var_kill_data(ctx, spid, tpid, sig);
+ if (kill_data == NULL)
+ return 0;
+ bpf_probe_read(&arr_struct->array[index],
+ sizeof(arr_struct->array[index]),
+ kill_data);
+ }
+ }
+ bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
+ return 0;
+}
+
+static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
+ enum bpf_function_id func_id)
+{
+ int func_id_key = func_id;
+
+ bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
+ bpf_stat_ctx->bpf_func_stats_data_val =
+ bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
+ if (bpf_stat_ctx->bpf_func_stats_data_val)
+ bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
+}
+
+static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
+{
+ if (bpf_stat_ctx->bpf_func_stats_data_val)
+ bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
+ bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
+}
+
+static INLINE void
+bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
+ struct var_metadata_t* meta)
+{
+ if (bpf_stat_ctx->bpf_func_stats_data_val) {
+ bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
+ meta->bpf_stats_num_perf_events =
+ bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
+ }
+ meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
+ meta->cpu_id = bpf_get_smp_processor_id();
+}
+
+static INLINE size_t
+read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
+{
+ size_t length = 0;
+ size_t filepart_length;
+ struct dentry* parent_dentry;
+
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < MAX_PATH_DEPTH; i++) {
+ filepart_length = bpf_probe_read_str(payload, MAX_PATH,
+ BPF_CORE_READ(filp_dentry, d_name.name));
+ barrier_var(filepart_length);
+ if (filepart_length > MAX_PATH)
+ break;
+ barrier_var(filepart_length);
+ payload += filepart_length;
+ length += filepart_length;
+
+ parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
+ if (filp_dentry == parent_dentry)
+ break;
+ filp_dentry = parent_dentry;
+ }
+
+ return length;
+}
+
+static INLINE bool
+is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
+{
+ struct dentry* parent_dentry;
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < MAX_PATH_DEPTH; i++) {
+ u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
+ bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
+
+ if (allowed_dir != NULL)
+ return true;
+ parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
+ if (filp_dentry == parent_dentry)
+ break;
+ filp_dentry = parent_dentry;
+ }
+ return false;
+}
+
+static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
+ u32* device_id,
+ u64* file_ino)
+{
+ u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
+ *device_id = dev_id;
+ bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
+
+ if (allowed_device == NULL)
+ return false;
+
+ u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
+ *file_ino = ino;
+ bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
+
+ if (allowed_file == NULL)
+ if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
+ return false;
+ return true;
+}
+
+SEC("kprobe/proc_sys_write")
+ssize_t BPF_KPROBE(kprobe__proc_sys_write,
+ struct file* filp, const char* buf,
+ size_t count, loff_t* ppos)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
+
+ u32 pid = get_userspace_pid();
+ int zero = 0;
+ struct var_sysctl_data_t* sysctl_data =
+ bpf_map_lookup_elem(&data_heap, &zero);
+ if (!sysctl_data)
+ goto out;
+
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+ sysctl_data->meta.type = SYSCTL_EVENT;
+ void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
+ payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
+
+ populate_ancestors(task, &sysctl_data->ancestors_info);
+
+ sysctl_data->sysctl_val_length = 0;
+ sysctl_data->sysctl_path_length = 0;
+
+ size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
+ barrier_var(sysctl_val_length);
+ if (sysctl_val_length <= CTL_MAXNAME) {
+ barrier_var(sysctl_val_length);
+ sysctl_data->sysctl_val_length = sysctl_val_length;
+ payload += sysctl_val_length;
+ }
+
+ size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
+ BPF_CORE_READ(filp, f_path.dentry, d_name.name));
+ barrier_var(sysctl_path_length);
+ if (sysctl_path_length <= MAX_PATH) {
+ barrier_var(sysctl_path_length);
+ sysctl_data->sysctl_path_length = sysctl_path_length;
+ payload += sysctl_path_length;
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
+ unsigned long data_len = payload - (void*)sysctl_data;
+ data_len = data_len > sizeof(struct var_sysctl_data_t)
+ ? sizeof(struct var_sysctl_data_t)
+ : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("tracepoint/syscalls/sys_enter_kill")
+int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
+ int pid = ctx->args[0];
+ int sig = ctx->args[1];
+ int ret = trace_var_sys_kill(ctx, pid, sig);
+ bpf_stats_exit(&stats_ctx);
+ return ret;
+};
+
+SEC("raw_tracepoint/sched_process_exit")
+int raw_tracepoint__sched_process_exit(void* ctx)
+{
+ int zero = 0;
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
+
+ u32 tpid = get_userspace_pid();
+
+ struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
+ struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
+
+ if (arr_struct == NULL || kill_data == NULL)
+ goto out;
+
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+ struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
+
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
+ struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
+
+ if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
+ bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
+ void* payload = kill_data->payload;
+ size_t offset = kill_data->payload_length;
+ if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
+ return 0;
+ payload += offset;
+
+ kill_data->kill_target_name_length = 0;
+ kill_data->kill_target_cgroup_proc_length = 0;
+
+ size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
+ barrier_var(comm_length);
+ if (comm_length <= TASK_COMM_LEN) {
+ barrier_var(comm_length);
+ kill_data->kill_target_name_length = comm_length;
+ payload += comm_length;
+ }
+
+ size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
+ BPF_CORE_READ(proc_kernfs, name));
+ barrier_var(cgroup_proc_length);
+ if (cgroup_proc_length <= KILL_TARGET_LEN) {
+ barrier_var(cgroup_proc_length);
+ kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
+ payload += cgroup_proc_length;
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
+ unsigned long data_len = (void*)payload - (void*)kill_data;
+ data_len = data_len > sizeof(struct var_kill_data_t)
+ ? sizeof(struct var_kill_data_t)
+ : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
+ }
+ }
+ bpf_map_delete_elem(&var_tpid_to_data, &tpid);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("raw_tracepoint/sched_process_exec")
+int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
+
+ struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
+ u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
+
+ bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
+ if (should_filter_binprm != NULL)
+ goto out;
+
+ int zero = 0;
+ struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!proc_exec_data)
+ goto out;
+
+ if (INODE_FILTER && inode != INODE_FILTER)
+ return 0;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ proc_exec_data->meta.type = EXEC_EVENT;
+ proc_exec_data->bin_path_length = 0;
+ proc_exec_data->cmdline_length = 0;
+ proc_exec_data->environment_length = 0;
+ void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
+ proc_exec_data->payload);
+ payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
+
+ struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
+ proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
+ proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
+ proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
+ proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
+
+ const char* filename = BPF_CORE_READ(bprm, filename);
+ size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
+ barrier_var(bin_path_length);
+ if (bin_path_length <= MAX_FILENAME_LEN) {
+ barrier_var(bin_path_length);
+ proc_exec_data->bin_path_length = bin_path_length;
+ payload += bin_path_length;
+ }
+
+ void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
+ void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
+ unsigned int cmdline_length = probe_read_lim(payload, arg_start,
+ arg_end - arg_start, MAX_ARGS_LEN);
+
+ if (cmdline_length <= MAX_ARGS_LEN) {
+ barrier_var(cmdline_length);
+ proc_exec_data->cmdline_length = cmdline_length;
+ payload += cmdline_length;
+ }
+
+ if (READ_ENVIRON_FROM_EXEC) {
+ void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
+ void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
+ unsigned long env_len = probe_read_lim(payload, env_start,
+ env_end - env_start, MAX_ENVIRON_LEN);
+ if (cmdline_length <= MAX_ENVIRON_LEN) {
+ proc_exec_data->environment_length = env_len;
+ payload += env_len;
+ }
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
+ unsigned long data_len = payload - (void*)proc_exec_data;
+ data_len = data_len > sizeof(struct var_exec_data_t)
+ ? sizeof(struct var_exec_data_t)
+ : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("kretprobe/do_filp_open")
+int kprobe_ret__do_filp_open(struct pt_regs* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
+
+ struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
+
+ if (filp == NULL || IS_ERR(filp))
+ goto out;
+ unsigned int flags = BPF_CORE_READ(filp, f_flags);
+ if ((flags & (O_RDWR | O_WRONLY)) == 0)
+ goto out;
+ if ((flags & O_TMPFILE) > 0)
+ goto out;
+ struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
+ umode_t mode = BPF_CORE_READ(file_inode, i_mode);
+ if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
+ S_ISSOCK(mode))
+ goto out;
+
+ struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
+ u32 device_id = 0;
+ u64 file_ino = 0;
+ if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
+ goto out;
+
+ int zero = 0;
+ struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!filemod_data)
+ goto out;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ filemod_data->meta.type = FILEMOD_EVENT;
+ filemod_data->fmod_type = FMOD_OPEN;
+ filemod_data->dst_flags = flags;
+ filemod_data->src_inode = 0;
+ filemod_data->dst_inode = file_ino;
+ filemod_data->src_device_id = 0;
+ filemod_data->dst_device_id = device_id;
+ filemod_data->src_filepath_length = 0;
+ filemod_data->dst_filepath_length = 0;
+
+ void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+ filemod_data->payload);
+ payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+ size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->dst_filepath_length = len;
+ }
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+ unsigned long data_len = payload - (void*)filemod_data;
+ data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("kprobe/vfs_link")
+int BPF_KPROBE(kprobe__vfs_link,
+ struct dentry* old_dentry, struct inode* dir,
+ struct dentry* new_dentry, struct inode** delegated_inode)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
+
+ u32 src_device_id = 0;
+ u64 src_file_ino = 0;
+ u32 dst_device_id = 0;
+ u64 dst_file_ino = 0;
+ if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
+ !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
+ goto out;
+
+ int zero = 0;
+ struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!filemod_data)
+ goto out;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ filemod_data->meta.type = FILEMOD_EVENT;
+ filemod_data->fmod_type = FMOD_LINK;
+ filemod_data->dst_flags = 0;
+ filemod_data->src_inode = src_file_ino;
+ filemod_data->dst_inode = dst_file_ino;
+ filemod_data->src_device_id = src_device_id;
+ filemod_data->dst_device_id = dst_device_id;
+ filemod_data->src_filepath_length = 0;
+ filemod_data->dst_filepath_length = 0;
+
+ void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+ filemod_data->payload);
+ payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+ size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->src_filepath_length = len;
+ }
+
+ len = read_absolute_file_path_from_dentry(new_dentry, payload);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->dst_filepath_length = len;
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+ unsigned long data_len = payload - (void*)filemod_data;
+ data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("kprobe/vfs_symlink")
+int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
+ const char* oldname)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
+
+ u32 dst_device_id = 0;
+ u64 dst_file_ino = 0;
+ if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
+ goto out;
+
+ int zero = 0;
+ struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!filemod_data)
+ goto out;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ filemod_data->meta.type = FILEMOD_EVENT;
+ filemod_data->fmod_type = FMOD_SYMLINK;
+ filemod_data->dst_flags = 0;
+ filemod_data->src_inode = 0;
+ filemod_data->dst_inode = dst_file_ino;
+ filemod_data->src_device_id = 0;
+ filemod_data->dst_device_id = dst_device_id;
+ filemod_data->src_filepath_length = 0;
+ filemod_data->dst_filepath_length = 0;
+
+ void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+ filemod_data->payload);
+ payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+ size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->src_filepath_length = len;
+ }
+ len = read_absolute_file_path_from_dentry(dentry, payload);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->dst_filepath_length = len;
+ }
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+ unsigned long data_len = payload - (void*)filemod_data;
+ data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("raw_tracepoint/sched_process_fork")
+int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
+
+ int zero = 0;
+ struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!fork_data)
+ goto out;
+
+ struct task_struct* parent = (struct task_struct*)ctx->args[0];
+ struct task_struct* child = (struct task_struct*)ctx->args[1];
+ fork_data->meta.type = FORK_EVENT;
+
+ void* payload = populate_var_metadata(&fork_data->meta, child,
+ BPF_CORE_READ(child, pid), fork_data->payload);
+ fork_data->parent_pid = BPF_CORE_READ(parent, pid);
+ fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
+ fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
+
+ unsigned long data_len = payload - (void*)fork_data;
+ data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c
new file mode 100644
index 000000000000..4df9088bfc00
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler1.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
+#define UNROLL
+#define INLINE __always_inline
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/profiler2.c b/tools/testing/selftests/bpf/progs/profiler2.c
new file mode 100644
index 000000000000..0f32a3cbf556
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler2.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) /**/
+/* undef #define UNROLL */
+#define INLINE /**/
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/profiler3.c b/tools/testing/selftests/bpf/progs/profiler3.c
new file mode 100644
index 000000000000..6249fc31ccb0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler3.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) /**/
+#define UNROLL
+#define INLINE __noinline
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index cc615b82b56e..2fb7adafb6b6 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -67,7 +67,12 @@ typedef struct {
void* co_name; // PyCodeObject.co_name
} FrameData;
-static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *get_thread_state(void *tls_base, PidData *pidData)
{
void* thread_state;
int key;
@@ -155,7 +160,9 @@ struct {
} stackmap SEC(".maps");
#ifdef GLOBAL_FUNC
-__attribute__((noinline))
+__noinline
+#elif defined(SUBPROGS)
+static __noinline
#else
static __always_inline
#endif
diff --git a/tools/testing/selftests/bpf/progs/pyperf_subprogs.c b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
new file mode 100644
index 000000000000..60e27a7f0cca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define STACK_MAX_LEN 50
+#define SUBPROGS
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index ad61b722a9de..7de534f38c3f 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -266,8 +266,12 @@ struct tls_index {
uint64_t offset;
};
-static __always_inline void *calc_location(struct strobe_value_loc *loc,
- void *tls_base)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
{
/*
* tls_mode value is:
@@ -327,10 +331,15 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
: NULL;
}
-static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
- size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void read_int_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data)
{
void *location = calc_location(&cfg->int_locs[idx], tls_base);
if (!location)
@@ -440,8 +449,13 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
*/
-static __always_inline void *read_strobe_meta(struct task_struct *task,
- struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *read_strobe_meta(struct task_struct *task,
+ struct strobemeta_payload *data)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0};
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
new file mode 100644
index 000000000000..b6c01f8fc559
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 13
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#define SUBPROGS
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c
index 1f407e65ae52..7115bcefbe8a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall1.c
@@ -26,20 +26,20 @@ int entry(struct __sk_buff *skb)
/* Multiple locations to make sure we patch
* all of them.
*/
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
-
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
-
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
return 3;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c
index a093e739cf0e..0431e4fe7efd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall2.c
@@ -13,14 +13,14 @@ struct {
SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
return 0;
}
SEC("classifier/1")
int bpf_func_1(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
return 1;
}
@@ -33,25 +33,25 @@ int bpf_func_2(struct __sk_buff *skb)
SEC("classifier/3")
int bpf_func_3(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 4);
+ bpf_tail_call_static(skb, &jmp_table, 4);
return 3;
}
SEC("classifier/4")
int bpf_func_4(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 3);
+ bpf_tail_call_static(skb, &jmp_table, 3);
return 4;
}
SEC("classifier")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
/* Check multi-prog update. */
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
/* Check tail call limit. */
- bpf_tail_call(skb, &jmp_table, 3);
+ bpf_tail_call_static(skb, &jmp_table, 3);
return 3;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index cabda877cf0a..739dc2a51e74 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -16,14 +16,14 @@ SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
{
count++;
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 1;
}
SEC("classifier")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
new file mode 100644
index 000000000000..0103f3dd9f02
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+#define TAIL_FUNC(x) \
+ SEC("classifier/" #x) \
+ int bpf_func_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ return skb->len * 2;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
new file mode 100644
index 000000000000..7b1c04183824
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ if (load_byte(skb, 0))
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ else
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return 1;
+}
+
+static volatile int count;
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ count++;
+ return subprog_tail(skb);
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
new file mode 100644
index 000000000000..0d5482bea6c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+__noinline
+int subprog_tail2(struct __sk_buff *skb)
+{
+ volatile char arr[64] = {};
+
+ if (load_word(skb, 0) || load_half(skb, 0))
+ bpf_tail_call_static(skb, &jmp_table, 10);
+ else
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
+ return skb->len;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ volatile char arr[64] = {};
+
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ return skb->len * 2;
+}
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ return subprog_tail2(skb);
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ return skb->len * 3;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
new file mode 100644
index 000000000000..9a1b166b7fbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int count;
+
+__noinline
+int subprog_tail_2(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ return skb->len * 3;
+}
+
+__noinline
+int subprog_tail_1(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ return skb->len * 2;
+}
+
+__noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return skb->len;
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+ return subprog_tail_2(skb);
+}
+
+SEC("classifier/2")
+int bpf_func_2(struct __sk_buff *skb)
+{
+ count++;
+ return subprog_tail_2(skb);
+}
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ return subprog_tail_1(skb);
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
index e5093796be97..c1e0c8c7c55f 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -11,6 +11,13 @@ struct inner_map {
} inner_map1 SEC(".maps"),
inner_map2 SEC(".maps");
+struct inner_map_sz2 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} inner_map_sz2 SEC(".maps");
+
struct outer_arr {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 3);
@@ -34,6 +41,43 @@ struct outer_arr {
.values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
};
+struct inner_map_sz3 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_INNER_MAP);
+ __uint(max_entries, 3);
+ __type(key, int);
+ __type(value, int);
+} inner_map3 SEC(".maps"),
+ inner_map4 SEC(".maps");
+
+struct inner_map_sz4 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_INNER_MAP);
+ __uint(max_entries, 5);
+ __type(key, int);
+ __type(value, int);
+} inner_map5 SEC(".maps");
+
+struct outer_arr_dyn {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_INNER_MAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ });
+} outer_arr_dyn SEC(".maps") = {
+ .values = {
+ [0] = (void *)&inner_map3,
+ [1] = (void *)&inner_map4,
+ [2] = (void *)&inner_map5,
+ },
+};
+
struct outer_hash {
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
__uint(max_entries, 5);
@@ -50,6 +94,30 @@ struct outer_hash {
},
};
+struct sockarr_sz1 {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockarr_sz1 SEC(".maps");
+
+struct sockarr_sz2 {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} sockarr_sz2 SEC(".maps");
+
+struct outer_sockarr_sz1 {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct sockarr_sz1);
+} outer_sockarr SEC(".maps") = {
+ .values = { (void *)&sockarr_sz1 },
+};
+
int input = 0;
SEC("raw_tp/sys_enter")
@@ -70,6 +138,12 @@ int handle__sys_enter(void *ctx)
val = input + 1;
bpf_map_update_elem(inner_map, &key, &val, 0);
+ inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key);
+ if (!inner_map)
+ return 1;
+ val = input + 2;
+ bpf_map_update_elem(inner_map, &key, &val, 0);
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
new file mode 100644
index 000000000000..9a6b85dd52d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <string.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
+
+struct sockaddr_in6 srv_sa6 = {};
+__u16 listen_tp_sport = 0;
+__u16 req_sk_sport = 0;
+__u32 recv_cookie = 0;
+__u32 gen_cookie = 0;
+__u32 linum = 0;
+
+#define LOG() ({ if (!linum) linum = __LINE__; })
+
+static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th,
+ struct tcp_sock *tp,
+ struct __sk_buff *skb)
+{
+ if (th->syn) {
+ __s64 mss_cookie;
+ void *data_end;
+
+ data_end = (void *)(long)(skb->data_end);
+
+ if (th->doff * 4 != 40) {
+ LOG();
+ return;
+ }
+
+ if ((void *)th + 40 > data_end) {
+ LOG();
+ return;
+ }
+
+ mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h),
+ th, 40);
+ if (mss_cookie < 0) {
+ if (mss_cookie != -ENOENT)
+ LOG();
+ } else {
+ gen_cookie = (__u32)mss_cookie;
+ }
+ } else if (gen_cookie) {
+ /* It was in cookie mode */
+ int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h),
+ th, sizeof(*th));
+
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ LOG();
+ } else {
+ recv_cookie = bpf_ntohl(th->ack_seq) - 1;
+ }
+ }
+}
+
+static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple *tuple;
+ struct bpf_sock *bpf_skc;
+ unsigned int tuple_len;
+ struct tcphdr *th;
+ void *data_end;
+
+ data_end = (void *)(long)(skb->data_end);
+
+ th = (struct tcphdr *)(ip6h + 1);
+ if (th + 1 > data_end)
+ return TC_ACT_OK;
+
+ /* Is it the testing traffic? */
+ if (th->dest != srv_sa6.sin6_port)
+ return TC_ACT_OK;
+
+ tuple_len = sizeof(tuple->ipv6);
+ tuple = (struct bpf_sock_tuple *)&ip6h->saddr;
+ if ((void *)tuple + tuple_len > data_end) {
+ LOG();
+ return TC_ACT_OK;
+ }
+
+ bpf_skc = bpf_skc_lookup_tcp(skb, tuple, tuple_len,
+ BPF_F_CURRENT_NETNS, 0);
+ if (!bpf_skc) {
+ LOG();
+ return TC_ACT_OK;
+ }
+
+ if (bpf_skc->state == BPF_TCP_NEW_SYN_RECV) {
+ struct request_sock *req_sk;
+
+ req_sk = (struct request_sock *)bpf_skc_to_tcp_request_sock(bpf_skc);
+ if (!req_sk) {
+ LOG();
+ goto release;
+ }
+
+ if (bpf_sk_assign(skb, req_sk, 0)) {
+ LOG();
+ goto release;
+ }
+
+ req_sk_sport = req_sk->__req_common.skc_num;
+
+ bpf_sk_release(req_sk);
+ return TC_ACT_OK;
+ } else if (bpf_skc->state == BPF_TCP_LISTEN) {
+ struct tcp_sock *tp;
+
+ tp = bpf_skc_to_tcp_sock(bpf_skc);
+ if (!tp) {
+ LOG();
+ goto release;
+ }
+
+ if (bpf_sk_assign(skb, tp, 0)) {
+ LOG();
+ goto release;
+ }
+
+ listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num;
+
+ test_syncookie_helper(ip6h, th, tp, skb);
+ bpf_sk_release(tp);
+ return TC_ACT_OK;
+ }
+
+ if (bpf_sk_assign(skb, bpf_skc, 0))
+ LOG();
+
+release:
+ bpf_sk_release(bpf_skc);
+ return TC_ACT_OK;
+}
+
+SEC("classifier/ingress")
+int cls_ingress(struct __sk_buff *skb)
+{
+ struct ipv6hdr *ip6h;
+ struct ethhdr *eth;
+ void *data_end;
+
+ data_end = (void *)(long)(skb->data_end);
+
+ eth = (struct ethhdr *)(long)(skb->data);
+ if (eth + 1 > data_end)
+ return TC_ACT_OK;
+
+ if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+ return TC_ACT_OK;
+
+ ip6h = (struct ipv6hdr *)(eth + 1);
+ if (ip6h + 1 > data_end)
+ return TC_ACT_OK;
+
+ if (ip6h->nexthdr == IPPROTO_TCP)
+ return handle_ip6_tcp(ip6h, skb);
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index f0b72e86bee5..c9f8464996ea 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -22,6 +22,12 @@
#include "test_cls_redirect.h"
+#ifdef SUBPROGS
+#define INLINING __noinline
+#else
+#define INLINING __always_inline
+#endif
+
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
@@ -125,7 +131,7 @@ typedef struct buf {
uint8_t *const tail;
} buf_t;
-static size_t buf_off(const buf_t *buf)
+static __always_inline size_t buf_off(const buf_t *buf)
{
/* Clang seems to optimize constructs like
* a - b + c
@@ -145,7 +151,7 @@ static size_t buf_off(const buf_t *buf)
return off;
}
-static bool buf_copy(buf_t *buf, void *dst, size_t len)
+static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len)
{
if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
return false;
@@ -155,7 +161,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len)
return true;
}
-static bool buf_skip(buf_t *buf, const size_t len)
+static __always_inline bool buf_skip(buf_t *buf, const size_t len)
{
/* Check whether off + len is valid in the non-linear part. */
if (buf_off(buf) + len > buf->skb->len) {
@@ -173,7 +179,7 @@ static bool buf_skip(buf_t *buf, const size_t len)
* If scratch is not NULL, the function will attempt to load non-linear
* data via bpf_skb_load_bytes. On success, scratch is returned.
*/
-static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
+static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch)
{
if (buf->head + len > buf->tail) {
if (scratch == NULL) {
@@ -188,7 +194,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
return ptr;
}
-static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
+static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
{
if (ipv4->ihl <= 5) {
return true;
@@ -197,13 +203,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
return buf_skip(buf, (ipv4->ihl - 5) * 4);
}
-static bool ipv4_is_fragment(const struct iphdr *ip)
+static INLINING bool ipv4_is_fragment(const struct iphdr *ip)
{
uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
}
-static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
+static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
{
struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
if (ipv4 == NULL) {
@@ -222,7 +228,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
}
/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
-static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
+static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
{
if (!buf_copy(pkt, ports, sizeof(*ports))) {
return false;
@@ -237,7 +243,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
return true;
}
-static uint16_t pkt_checksum_fold(uint32_t csum)
+static INLINING uint16_t pkt_checksum_fold(uint32_t csum)
{
/* The highest reasonable value for an IPv4 header
* checksum requires two folds, so we just do that always.
@@ -247,7 +253,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum)
return (uint16_t)~csum;
}
-static void pkt_ipv4_checksum(struct iphdr *iph)
+static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
{
iph->check = 0;
@@ -268,10 +274,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph)
iph->check = pkt_checksum_fold(acc);
}
-static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
- const struct ipv6hdr *ipv6,
- uint8_t *upper_proto,
- bool *is_fragment)
+static INLINING
+bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
+ const struct ipv6hdr *ipv6,
+ uint8_t *upper_proto,
+ bool *is_fragment)
{
/* We understand five extension headers.
* https://tools.ietf.org/html/rfc8200#section-4.1 states that all
@@ -336,7 +343,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
* scratch is allocated on the stack. However, this usage should be safe since
* it's the callers stack after all.
*/
-static inline __attribute__((__always_inline__)) struct ipv6hdr *
+static __always_inline struct ipv6hdr *
pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
bool *is_fragment)
{
@@ -354,20 +361,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
/* Global metrics, per CPU
*/
-struct bpf_map_def metrics_map SEC("maps") = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(unsigned int),
- .value_size = sizeof(metrics_t),
- .max_entries = 1,
-};
-
-static metrics_t *get_global_metrics(void)
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, unsigned int);
+ __type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static INLINING metrics_t *get_global_metrics(void)
{
uint64_t key = 0;
return bpf_map_lookup_elem(&metrics_map, &key);
}
-static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
{
const int payload_off =
sizeof(*encap) +
@@ -388,8 +395,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
}
-static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
- struct in_addr *next_hop, metrics_t *metrics)
+static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
{
metrics->forwarded_packets_total_gre++;
@@ -509,8 +516,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
return bpf_redirect(skb->ifindex, 0);
}
-static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
- struct in_addr *next_hop, metrics_t *metrics)
+static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
{
/* swap L2 addresses */
/* This assumes that packets are received from a router.
@@ -546,7 +553,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
return bpf_redirect(skb->ifindex, 0);
}
-static ret_t skip_next_hops(buf_t *pkt, int n)
+static INLINING ret_t skip_next_hops(buf_t *pkt, int n)
{
switch (n) {
case 1:
@@ -566,8 +573,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n)
* pkt is positioned just after the variable length GLB header
* iff the call is successful.
*/
-static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
- struct in_addr *next_hop)
+static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
+ struct in_addr *next_hop)
{
if (encap->unigue.next_hop > encap->unigue.hop_count) {
return TC_ACT_SHOT;
@@ -601,8 +608,8 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
* return value, and calling code works while still being "generic" to
* IPv4 and IPv6.
*/
-static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
- uint64_t iphlen, uint16_t sport, uint16_t dport)
+static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+ uint64_t iphlen, uint16_t sport, uint16_t dport)
{
switch (iphlen) {
case sizeof(struct iphdr): {
@@ -630,9 +637,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
}
}
-static verdict_t classify_tcp(struct __sk_buff *skb,
- struct bpf_sock_tuple *tuple, uint64_t tuplen,
- void *iph, struct tcphdr *tcp)
+static INLINING verdict_t classify_tcp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ void *iph, struct tcphdr *tcp)
{
struct bpf_sock *sk =
bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -663,8 +670,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb,
return UNKNOWN;
}
-static verdict_t classify_udp(struct __sk_buff *skb,
- struct bpf_sock_tuple *tuple, uint64_t tuplen)
+static INLINING verdict_t classify_udp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen)
{
struct bpf_sock *sk =
bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -681,9 +688,9 @@ static verdict_t classify_udp(struct __sk_buff *skb,
return UNKNOWN;
}
-static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
- struct bpf_sock_tuple *tuple, uint64_t tuplen,
- metrics_t *metrics)
+static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ metrics_t *metrics)
{
switch (proto) {
case IPPROTO_TCP:
@@ -698,7 +705,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
}
}
-static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
{
struct icmphdr icmp;
if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
@@ -745,7 +752,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
sizeof(tuple.ipv4), metrics);
}
-static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
{
struct icmp6hdr icmp6;
if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
@@ -797,8 +804,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
metrics);
}
-static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
- metrics_t *metrics)
+static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
{
metrics->l4_protocol_packets_total_tcp++;
@@ -819,8 +826,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
}
-static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
- metrics_t *metrics)
+static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
{
metrics->l4_protocol_packets_total_udp++;
@@ -837,7 +844,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
return classify_udp(pkt->skb, &tuple, tuplen);
}
-static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
{
metrics->l3_protocol_packets_total_ipv4++;
@@ -874,7 +881,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
}
}
-static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
{
metrics->l3_protocol_packets_total_ipv6++;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
new file mode 100644
index 000000000000..eed26b70e3a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
@@ -0,0 +1,2 @@
+#define SUBPROGS
+#include "test_cls_redirect.c"
diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c
new file mode 100644
index 000000000000..44f5aa2e8956
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* fields of exactly the same size */
+struct test_struct___samesize {
+ void *ptr;
+ unsigned long long val1;
+ unsigned int val2;
+ unsigned short val3;
+ unsigned char val4;
+} __attribute((preserve_access_index));
+
+/* unsigned fields that have to be downsized by libbpf */
+struct test_struct___downsize {
+ void *ptr;
+ unsigned long val1;
+ unsigned long val2;
+ unsigned long val3;
+ unsigned long val4;
+ /* total sz: 40 */
+} __attribute__((preserve_access_index));
+
+/* fields with signed integers of wrong size, should be rejected */
+struct test_struct___signed {
+ void *ptr;
+ long val1;
+ long val2;
+ long val3;
+ long val4;
+} __attribute((preserve_access_index));
+
+/* real layout and sizes according to test's (32-bit) BTF */
+struct test_struct___real {
+ unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
+ unsigned int val2;
+ unsigned long long val1;
+ unsigned short val3;
+ unsigned char val4;
+ unsigned char _pad;
+ /* total sz: 20 */
+};
+
+struct test_struct___real input = {
+ .ptr = 0x01020304,
+ .val1 = 0x1020304050607080,
+ .val2 = 0x0a0b0c0d,
+ .val3 = 0xfeed,
+ .val4 = 0xb9,
+ ._pad = 0xff, /* make sure no accidental zeros are present */
+};
+
+unsigned long long ptr_samesized = 0;
+unsigned long long val1_samesized = 0;
+unsigned long long val2_samesized = 0;
+unsigned long long val3_samesized = 0;
+unsigned long long val4_samesized = 0;
+struct test_struct___real output_samesized = {};
+
+unsigned long long ptr_downsized = 0;
+unsigned long long val1_downsized = 0;
+unsigned long long val2_downsized = 0;
+unsigned long long val3_downsized = 0;
+unsigned long long val4_downsized = 0;
+struct test_struct___real output_downsized = {};
+
+unsigned long long ptr_probed = 0;
+unsigned long long val1_probed = 0;
+unsigned long long val2_probed = 0;
+unsigned long long val3_probed = 0;
+unsigned long long val4_probed = 0;
+
+unsigned long long ptr_signed = 0;
+unsigned long long val1_signed = 0;
+unsigned long long val2_signed = 0;
+unsigned long long val3_signed = 0;
+unsigned long long val4_signed = 0;
+struct test_struct___real output_signed = {};
+
+SEC("raw_tp/sys_exit")
+int handle_samesize(void *ctx)
+{
+ struct test_struct___samesize *in = (void *)&input;
+ struct test_struct___samesize *out = (void *)&output_samesized;
+
+ ptr_samesized = (unsigned long long)in->ptr;
+ val1_samesized = in->val1;
+ val2_samesized = in->val2;
+ val3_samesized = in->val3;
+ val4_samesized = in->val4;
+
+ out->ptr = in->ptr;
+ out->val1 = in->val1;
+ out->val2 = in->val2;
+ out->val3 = in->val3;
+ out->val4 = in->val4;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int handle_downsize(void *ctx)
+{
+ struct test_struct___downsize *in = (void *)&input;
+ struct test_struct___downsize *out = (void *)&output_downsized;
+
+ ptr_downsized = (unsigned long long)in->ptr;
+ val1_downsized = in->val1;
+ val2_downsized = in->val2;
+ val3_downsized = in->val3;
+ val4_downsized = in->val4;
+
+ out->ptr = in->ptr;
+ out->val1 = in->val1;
+ out->val2 = in->val2;
+ out->val3 = in->val3;
+ out->val4 = in->val4;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_probed(void *ctx)
+{
+ struct test_struct___downsize *in = (void *)&input;
+ __u64 tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+ ptr_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
+ val1_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
+ val2_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
+ val3_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
+ val4_probed = tmp;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_signed(void *ctx)
+{
+ struct test_struct___signed *in = (void *)&input;
+ struct test_struct___signed *out = (void *)&output_signed;
+
+ val2_signed = in->val2;
+ val3_signed = in->val3;
+ val4_signed = in->val4;
+
+ out->val2= in->val2;
+ out->val3= in->val3;
+ out->val4= in->val4;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
new file mode 100644
index 000000000000..e7ef3dada2bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+enum named_enum {
+ NAMED_ENUM_VAL1 = 1,
+ NAMED_ENUM_VAL2 = 2,
+ NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1 = 0x10,
+ ANON_ENUM_VAL2 = 0x20,
+ ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval_output {
+ bool named_val1_exists;
+ bool named_val2_exists;
+ bool named_val3_exists;
+ bool anon_val1_exists;
+ bool anon_val2_exists;
+ bool anon_val3_exists;
+
+ int named_val1;
+ int named_val2;
+ int anon_val1;
+ int anon_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enumval(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_enum_value)
+ struct core_reloc_enumval_output *out = (void *)&data.out;
+ enum named_enum named = 0;
+ anon_enum anon = 0;
+
+ out->named_val1_exists = bpf_core_enum_value_exists(named, NAMED_ENUM_VAL1);
+ out->named_val2_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL2);
+ out->named_val3_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL3);
+
+ out->anon_val1_exists = bpf_core_enum_value_exists(anon, ANON_ENUM_VAL1);
+ out->anon_val2_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL2);
+ out->anon_val3_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL3);
+
+ out->named_val1 = bpf_core_enum_value(named, NAMED_ENUM_VAL1);
+ out->named_val2 = bpf_core_enum_value(named, NAMED_ENUM_VAL2);
+ /* NAMED_ENUM_VAL3 value is optional */
+
+ out->anon_val1 = bpf_core_enum_value(anon, ANON_ENUM_VAL1);
+ out->anon_val2 = bpf_core_enum_value(anon, ANON_ENUM_VAL2);
+ /* ANON_ENUM_VAL3 value is optional */
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index aba928fd60d3..145028b52ad8 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <stdint.h>
+#include <stdbool.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
@@ -11,6 +12,7 @@ char _license[] SEC("license") = "GPL";
struct {
char in[256];
char out[256];
+ bool skip;
uint64_t my_pid_tgid;
} data = {};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
new file mode 100644
index 000000000000..fb60f8195c53
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+ int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based_output {
+ bool struct_exists;
+ bool union_exists;
+ bool enum_exists;
+ bool typedef_named_struct_exists;
+ bool typedef_anon_struct_exists;
+ bool typedef_struct_ptr_exists;
+ bool typedef_int_exists;
+ bool typedef_enum_exists;
+ bool typedef_void_ptr_exists;
+ bool typedef_func_proto_exists;
+ bool typedef_arr_exists;
+
+ int struct_sz;
+ int union_sz;
+ int enum_sz;
+ int typedef_named_struct_sz;
+ int typedef_anon_struct_sz;
+ int typedef_struct_ptr_sz;
+ int typedef_int_sz;
+ int typedef_enum_sz;
+ int typedef_void_ptr_sz;
+ int typedef_func_proto_sz;
+ int typedef_arr_sz;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_based(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_type_info)
+ struct core_reloc_type_based_output *out = (void *)&data.out;
+
+ out->struct_exists = bpf_core_type_exists(struct a_struct);
+ out->union_exists = bpf_core_type_exists(union a_union);
+ out->enum_exists = bpf_core_type_exists(enum an_enum);
+ out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
+ out->typedef_anon_struct_exists = bpf_core_type_exists(anon_struct_typedef);
+ out->typedef_struct_ptr_exists = bpf_core_type_exists(struct_ptr_typedef);
+ out->typedef_int_exists = bpf_core_type_exists(int_typedef);
+ out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
+ out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+ out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
+ out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
+
+ out->struct_sz = bpf_core_type_size(struct a_struct);
+ out->union_sz = bpf_core_type_size(union a_union);
+ out->enum_sz = bpf_core_type_size(enum an_enum);
+ out->typedef_named_struct_sz = bpf_core_type_size(named_struct_typedef);
+ out->typedef_anon_struct_sz = bpf_core_type_size(anon_struct_typedef);
+ out->typedef_struct_ptr_sz = bpf_core_type_size(struct_ptr_typedef);
+ out->typedef_int_sz = bpf_core_type_size(int_typedef);
+ out->typedef_enum_sz = bpf_core_type_size(enum_typedef);
+ out->typedef_void_ptr_sz = bpf_core_type_size(void_ptr_typedef);
+ out->typedef_func_proto_sz = bpf_core_type_size(func_proto_typedef);
+ out->typedef_arr_sz = bpf_core_type_size(arr_typedef);
+#else
+ data.skip = true;
+#endif
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
new file mode 100644
index 000000000000..22aba3f6e344
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+/* some types are shared with test_core_reloc_type_based.c */
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_id_output {
+ int local_anon_struct;
+ int local_anon_union;
+ int local_anon_enum;
+ int local_anon_func_proto_ptr;
+ int local_anon_void_ptr;
+ int local_anon_arr;
+
+ int local_struct;
+ int local_union;
+ int local_enum;
+ int local_int;
+ int local_struct_typedef;
+ int local_func_proto_typedef;
+ int local_arr_typedef;
+
+ int targ_struct;
+ int targ_union;
+ int targ_enum;
+ int targ_int;
+ int targ_struct_typedef;
+ int targ_func_proto_typedef;
+ int targ_arr_typedef;
+};
+
+/* preserve types even if Clang doesn't support built-in */
+struct a_struct t1 = {};
+union a_union t2 = {};
+enum an_enum t3 = 0;
+named_struct_typedef t4 = {};
+func_proto_typedef t5 = 0;
+arr_typedef t6 = {};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_id(void *ctx)
+{
+ /* We use __builtin_btf_type_id() in this tests, but up until the time
+ * __builtin_preserve_type_info() was added it contained a bug that
+ * would make this test fail. The bug was fixed ([0]) with addition of
+ * __builtin_preserve_type_info(), though, so that's what we are using
+ * to detect whether this test has to be executed, however strange
+ * that might look like.
+ *
+ * [0] https://reviews.llvm.org/D85174
+ */
+#if __has_builtin(__builtin_preserve_type_info)
+ struct core_reloc_type_id_output *out = (void *)&data.out;
+
+ out->local_anon_struct = bpf_core_type_id_local(struct { int marker_field; });
+ out->local_anon_union = bpf_core_type_id_local(union { int marker_field; });
+ out->local_anon_enum = bpf_core_type_id_local(enum { MARKER_ENUM_VAL = 123 });
+ out->local_anon_func_proto_ptr = bpf_core_type_id_local(_Bool(*)(int));
+ out->local_anon_void_ptr = bpf_core_type_id_local(void *);
+ out->local_anon_arr = bpf_core_type_id_local(_Bool[47]);
+
+ out->local_struct = bpf_core_type_id_local(struct a_struct);
+ out->local_union = bpf_core_type_id_local(union a_union);
+ out->local_enum = bpf_core_type_id_local(enum an_enum);
+ out->local_int = bpf_core_type_id_local(int);
+ out->local_struct_typedef = bpf_core_type_id_local(named_struct_typedef);
+ out->local_func_proto_typedef = bpf_core_type_id_local(func_proto_typedef);
+ out->local_arr_typedef = bpf_core_type_id_local(arr_typedef);
+
+ out->targ_struct = bpf_core_type_id_kernel(struct a_struct);
+ out->targ_union = bpf_core_type_id_kernel(union a_union);
+ out->targ_enum = bpf_core_type_id_kernel(enum an_enum);
+ out->targ_int = bpf_core_type_id_kernel(int);
+ out->targ_struct_typedef = bpf_core_type_id_kernel(named_struct_typedef);
+ out->targ_func_proto_typedef = bpf_core_type_id_kernel(func_proto_typedef);
+ out->targ_arr_typedef = bpf_core_type_id_kernel(arr_typedef);
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
new file mode 100644
index 000000000000..84e1f883f97b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_PATH_LEN 128
+#define MAX_FILES 7
+
+pid_t my_pid = 0;
+__u32 cnt_stat = 0;
+__u32 cnt_close = 0;
+char paths_stat[MAX_FILES][MAX_PATH_LEN] = {};
+char paths_close[MAX_FILES][MAX_PATH_LEN] = {};
+int rets_stat[MAX_FILES] = {};
+int rets_close[MAX_FILES] = {};
+
+int called_stat = 0;
+int called_close = 0;
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ __u32 cnt = cnt_stat;
+ int ret;
+
+ called_stat = 1;
+
+ if (pid != my_pid)
+ return 0;
+
+ if (cnt >= MAX_FILES)
+ return 0;
+ ret = bpf_d_path(path, paths_stat[cnt], MAX_PATH_LEN);
+
+ rets_stat[cnt] = ret;
+ cnt_stat++;
+ return 0;
+}
+
+SEC("fentry/filp_close")
+int BPF_PROG(prog_close, struct file *file, void *id)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ __u32 cnt = cnt_close;
+ int ret;
+
+ called_close = 1;
+
+ if (pid != my_pid)
+ return 0;
+
+ if (cnt >= MAX_FILES)
+ return 0;
+ ret = bpf_d_path(&file->f_path,
+ paths_close[cnt], MAX_PATH_LEN);
+
+ rets_close[cnt] = ret;
+ cnt_close++;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c
new file mode 100644
index 000000000000..bb8ea9270f29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+__u64 out__runqueues_addr = -1;
+__u64 out__bpf_prog_active_addr = -1;
+
+__u32 out__rq_cpu = -1; /* percpu struct fields */
+int out__bpf_prog_active = -1; /* percpu int */
+
+__u32 out__this_rq_cpu = -1;
+int out__this_bpf_prog_active = -1;
+
+__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */
+
+extern const struct rq runqueues __ksym; /* struct type global var. */
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ struct rq *rq;
+ int *active;
+ __u32 cpu;
+
+ out__runqueues_addr = (__u64)&runqueues;
+ out__bpf_prog_active_addr = (__u64)&bpf_prog_active;
+
+ cpu = bpf_get_smp_processor_id();
+
+ /* test bpf_per_cpu_ptr() */
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
+ if (rq)
+ out__rq_cpu = rq->cpu;
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active)
+ out__bpf_prog_active = *active;
+
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+ if (rq) /* should always be valid, but we can't spare the check. */
+ out__cpu_0_rq_cpu = rq->cpu;
+
+ /* test bpf_this_cpu_ptr */
+ rq = (struct rq *)bpf_this_cpu_ptr(&runqueues);
+ out__this_rq_cpu = rq->cpu;
+ active = (int *)bpf_this_cpu_ptr(&bpf_prog_active);
+ out__this_bpf_prog_active = *active;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 28351936a438..b9e2753f4f91 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -17,9 +17,7 @@
#include "test_iptunnel_common.h"
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
@@ -52,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
-static u32 jhash(const void *key, u32 length, u32 initval)
+static __noinline u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
const unsigned char *k = key;
@@ -88,7 +86,7 @@ static u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
-static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
b += initval;
@@ -97,7 +95,7 @@ static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
-static u32 jhash_2words(u32 a, u32 b, u32 initval)
+static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
}
@@ -200,8 +198,7 @@ struct {
__type(value, struct ctl_value);
} ctl_array SEC(".maps");
-static __u32 get_packet_hash(struct packet_description *pckt,
- bool ipv6)
+static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
{
if (ipv6)
return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
@@ -210,10 +207,10 @@ static __u32 get_packet_hash(struct packet_description *pckt,
return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
}
-static bool get_packet_dst(struct real_definition **real,
- struct packet_description *pckt,
- struct vip_meta *vip_info,
- bool is_ipv6)
+static __noinline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
{
__u32 hash = get_packet_hash(pckt, is_ipv6);
__u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
@@ -233,8 +230,8 @@ static bool get_packet_dst(struct real_definition **real,
return true;
}
-static int parse_icmpv6(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
+static __noinline int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
{
struct icmp6hdr *icmp_hdr;
struct ipv6hdr *ip6h;
@@ -255,8 +252,8 @@ static int parse_icmpv6(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
-static int parse_icmp(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
+static __noinline int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
{
struct icmphdr *icmp_hdr;
struct iphdr *iph;
@@ -280,8 +277,8 @@ static int parse_icmp(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
-static bool parse_udp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
+static __noinline bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
{
struct udphdr *udp;
udp = data + off;
@@ -299,8 +296,8 @@ static bool parse_udp(void *data, __u64 off, void *data_end,
return true;
}
-static bool parse_tcp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
+static __noinline bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
{
struct tcphdr *tcp;
@@ -321,8 +318,8 @@ static bool parse_tcp(void *data, __u64 off, void *data_end,
return true;
}
-static int process_packet(void *data, __u64 off, void *data_end,
- bool is_ipv6, struct __sk_buff *skb)
+static __noinline int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
{
void *pkt_start = (void *)(long)skb->data;
struct packet_description pckt = {};
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
new file mode 100644
index 000000000000..6077a025092c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+__u16 last_addr16_n = __bpf_htons(1);
+__u16 active_lport_n = 0;
+__u16 active_lport_h = 0;
+__u16 passive_lport_n = 0;
+__u16 passive_lport_h = 0;
+
+/* options received at passive side */
+unsigned int nr_pure_ack = 0;
+unsigned int nr_data = 0;
+unsigned int nr_syn = 0;
+unsigned int nr_fin = 0;
+
+/* Check the header received from the active side */
+static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
+{
+ union {
+ struct tcphdr th;
+ struct ipv6hdr ip6;
+ struct tcp_exprm_opt exprm_opt;
+ struct tcp_opt reg_opt;
+ __u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
+ } hdr = {};
+ __u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+ struct tcphdr *pth;
+ int ret;
+
+ hdr.reg_opt.kind = 0xB9;
+
+ /* The option is 4 bytes long instead of 2 bytes */
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
+ if (ret != -ENOSPC)
+ RET_CG_ERR(ret);
+
+ /* Test searching magic with regular kind */
+ hdr.reg_opt.len = 4;
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+ load_flags);
+ if (ret != -EINVAL)
+ RET_CG_ERR(ret);
+
+ hdr.reg_opt.len = 0;
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+ load_flags);
+ if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
+ hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
+ RET_CG_ERR(ret);
+
+ /* Test searching experimental option with invalid kind length */
+ hdr.exprm_opt.kind = TCPOPT_EXP;
+ hdr.exprm_opt.len = 5;
+ hdr.exprm_opt.magic = 0;
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != -EINVAL)
+ RET_CG_ERR(ret);
+
+ /* Test searching experimental option with 0 magic value */
+ hdr.exprm_opt.len = 4;
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != -ENOMSG)
+ RET_CG_ERR(ret);
+
+ hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != 4 || hdr.exprm_opt.len != 4 ||
+ hdr.exprm_opt.kind != TCPOPT_EXP ||
+ hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
+ RET_CG_ERR(ret);
+
+ if (!check_syn)
+ return CG_OK;
+
+ /* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
+ *
+ * Test loading from tp->saved_syn for other sk_state.
+ */
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
+ sizeof(hdr.ip6));
+ if (ret != -ENOSPC)
+ RET_CG_ERR(ret);
+
+ if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
+ hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
+ RET_CG_ERR(0);
+
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
+ if (ret < 0)
+ RET_CG_ERR(ret);
+
+ pth = (struct tcphdr *)(&hdr.ip6 + 1);
+ if (pth->dest != passive_lport_n || pth->source != active_lport_n)
+ RET_CG_ERR(0);
+
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
+ if (ret < 0)
+ RET_CG_ERR(ret);
+
+ if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
+ RET_CG_ERR(0);
+
+ return CG_OK;
+}
+
+static int check_active_syn_in(struct bpf_sock_ops *skops)
+{
+ return __check_active_hdr_in(skops, true);
+}
+
+static int check_active_hdr_in(struct bpf_sock_ops *skops)
+{
+ struct tcphdr *th;
+
+ if (__check_active_hdr_in(skops, false) == CG_ERR)
+ return CG_ERR;
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (tcp_hdrlen(th) < skops->skb_len)
+ nr_data++;
+
+ if (th->fin)
+ nr_fin++;
+
+ if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
+ nr_pure_ack++;
+
+ return CG_OK;
+}
+
+static int active_opt_len(struct bpf_sock_ops *skops)
+{
+ int err;
+
+ /* Reserve more than enough to allow the -EEXIST test in
+ * the write_active_opt().
+ */
+ err = bpf_reserve_hdr_opt(skops, 12, 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int write_active_opt(struct bpf_sock_ops *skops)
+{
+ struct tcp_exprm_opt exprm_opt = {};
+ struct tcp_opt win_scale_opt = {};
+ struct tcp_opt reg_opt = {};
+ struct tcphdr *th;
+ int err, ret;
+
+ exprm_opt.kind = TCPOPT_EXP;
+ exprm_opt.len = 4;
+ exprm_opt.magic = __bpf_htons(0xeB9F);
+
+ reg_opt.kind = 0xB9;
+ reg_opt.len = 4;
+ reg_opt.data[0] = 0xfa;
+ reg_opt.data[1] = 0xce;
+
+ win_scale_opt.kind = TCPOPT_WINDOW;
+
+ err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ /* Store the same exprm option */
+ err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+
+ err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+ if (err)
+ RET_CG_ERR(err);
+ err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+
+ /* Check the option has been written and can be searched */
+ ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
+ exprm_opt.magic != __bpf_htons(0xeB9F))
+ RET_CG_ERR(ret);
+
+ reg_opt.len = 0;
+ ret = bpf_load_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+ if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
+ reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
+ RET_CG_ERR(ret);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (th->syn) {
+ active_lport_h = skops->local_port;
+ active_lport_n = th->source;
+
+ /* Search the win scale option written by kernel
+ * in the SYN packet.
+ */
+ ret = bpf_load_hdr_opt(skops, &win_scale_opt,
+ sizeof(win_scale_opt), 0);
+ if (ret != 3 || win_scale_opt.len != 3 ||
+ win_scale_opt.kind != TCPOPT_WINDOW)
+ RET_CG_ERR(ret);
+
+ /* Write the win scale option that kernel
+ * has already written.
+ */
+ err = bpf_store_hdr_opt(skops, &win_scale_opt,
+ sizeof(win_scale_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ /* Check the SYN from bpf_sock_ops_kern->syn_skb */
+ return check_active_syn_in(skops);
+
+ /* Passive side should have cleared the write hdr cb by now */
+ if (skops->local_port == passive_lport_h)
+ RET_CG_ERR(0);
+
+ return active_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+ if (skops->local_port == passive_lport_h)
+ RET_CG_ERR(0);
+
+ return write_active_opt(skops);
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+ /* Passive side is not writing any non-standard/unknown
+ * option, so the active side should never be called.
+ */
+ if (skops->local_port == active_lport_h)
+ RET_CG_ERR(0);
+
+ return check_active_hdr_in(skops);
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+ int err;
+
+ /* No more write hdr cb */
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+
+ /* Recheck the SYN but check the tp->saved_syn this time */
+ err = check_active_syn_in(skops);
+ if (err == CG_ERR)
+ return err;
+
+ nr_syn++;
+
+ /* The ack has header option written by the active side also */
+ return check_active_hdr_in(skops);
+}
+
+SEC("sockops/misc_estab")
+int misc_estab(struct bpf_sock_ops *skops)
+{
+ int true_val = 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ passive_lport_h = skops->local_port;
+ passive_lport_n = __bpf_htons(passive_lport_h);
+ bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+ &true_val, sizeof(true_val));
+ set_hdr_cb_flags(skops, 0);
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ set_hdr_cb_flags(skops, 0);
+ break;
+ case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+ return handle_parse_hdr(skops);
+ case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+ return handle_hdr_opt_len(skops);
+ case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+ return handle_write_hdr_opt(skops);
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ return handle_passive_estab(skops);
+ }
+
+ return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index 42403d088abc..abb7344b531f 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -39,10 +39,4 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
return 0;
}
-SEC("fmod_ret/__set_task_comm")
-int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec)
-{
- return !tsk;
-}
-
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
new file mode 100644
index 000000000000..fb22de7c365d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} array_1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} array_2 SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(read_array_1)
+{
+ struct bpf_perf_event_value val;
+
+ return bpf_perf_event_read_value(&array_1, 0, &val, sizeof(val));
+}
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(read_array_2)
+{
+ struct bpf_perf_event_value val;
+
+ return bpf_perf_event_read_value(&array_2, 0, &val, sizeof(val));
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index e72eba4a93d2..852051064507 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -79,6 +79,24 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
return skb->ifindex * val * var;
}
+__attribute__ ((noinline))
+int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct tcphdr *tcp = NULL;
+
+ if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+ return -1;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return -1;
+ /* make modification to the packet data */
+ tcp->check++;
+ return 0;
+}
+
SEC("classifier/test_pkt_access")
int test_pkt_access(struct __sk_buff *skb)
{
@@ -117,6 +135,8 @@ int test_pkt_access(struct __sk_buff *skb)
if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex)
return TC_ACT_SHOT;
if (tcp) {
+ if (test_pkt_write_access_subprog(skb, (void *)tcp - data))
+ return TC_ACT_SHOT;
if (((void *)(tcp) + 20) > data_end || proto != 6)
return TC_ACT_SHOT;
barrier(); /* to force ordering of checks */
diff --git a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c
new file mode 100644
index 000000000000..4c63cc87b9d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u32 count = 0;
+__u32 on_cpu = 0xffffffff;
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(rename, struct task_struct *task, char *comm)
+{
+
+ count++;
+ if ((__u64) task == 0x1234ULL && (__u64) comm == 0x5678ULL) {
+ on_cpu = bpf_get_smp_processor_id();
+ return (long)task + (long)comm;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index bbf8296f4d66..1032b292af5b 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -19,6 +19,17 @@
#define IP6(aaaa, bbbb, cccc, dddd) \
{ bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
+/* Macros for least-significant byte and word accesses. */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define LSE_INDEX(index, size) (index)
+#else
+#define LSE_INDEX(index, size) ((size) - (index) - 1)
+#endif
+#define LSB(value, index) \
+ (((__u8 *)&(value))[LSE_INDEX((index), sizeof(value))])
+#define LSW(value, index) \
+ (((__u16 *)&(value))[LSE_INDEX((index), sizeof(value) / 2)])
+
#define MAX_SOCKS 32
struct {
@@ -369,171 +380,146 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, family;
- __u16 *half;
- __u8 *byte;
bool v4;
v4 = (ctx->family == AF_INET);
/* Narrow loads from family field */
- byte = (__u8 *)&ctx->family;
- half = (__u16 *)&ctx->family;
- if (byte[0] != (v4 ? AF_INET : AF_INET6) ||
- byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+ if (LSB(ctx->family, 0) != (v4 ? AF_INET : AF_INET6) ||
+ LSB(ctx->family, 1) != 0 || LSB(ctx->family, 2) != 0 || LSB(ctx->family, 3) != 0)
return SK_DROP;
- if (half[0] != (v4 ? AF_INET : AF_INET6))
+ if (LSW(ctx->family, 0) != (v4 ? AF_INET : AF_INET6))
return SK_DROP;
- byte = (__u8 *)&ctx->protocol;
- if (byte[0] != IPPROTO_TCP ||
- byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+ /* Narrow loads from protocol field */
+ if (LSB(ctx->protocol, 0) != IPPROTO_TCP ||
+ LSB(ctx->protocol, 1) != 0 || LSB(ctx->protocol, 2) != 0 || LSB(ctx->protocol, 3) != 0)
return SK_DROP;
- half = (__u16 *)&ctx->protocol;
- if (half[0] != IPPROTO_TCP)
+ if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
return SK_DROP;
/* Narrow loads from remote_port field. Expect non-0 value. */
- byte = (__u8 *)&ctx->remote_port;
- if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0)
+ if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
+ LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
return SK_DROP;
- half = (__u16 *)&ctx->remote_port;
- if (half[0] == 0)
+ if (LSW(ctx->remote_port, 0) == 0)
return SK_DROP;
/* Narrow loads from local_port field. Expect DST_PORT. */
- byte = (__u8 *)&ctx->local_port;
- if (byte[0] != ((DST_PORT >> 0) & 0xff) ||
- byte[1] != ((DST_PORT >> 8) & 0xff) ||
- byte[2] != 0 || byte[3] != 0)
+ if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) ||
+ LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) ||
+ LSB(ctx->local_port, 2) != 0 || LSB(ctx->local_port, 3) != 0)
return SK_DROP;
- half = (__u16 *)&ctx->local_port;
- if (half[0] != DST_PORT)
+ if (LSW(ctx->local_port, 0) != DST_PORT)
return SK_DROP;
/* Narrow loads from IPv4 fields */
if (v4) {
/* Expect non-0.0.0.0 in remote_ip4 */
- byte = (__u8 *)&ctx->remote_ip4;
- if (byte[0] == 0 && byte[1] == 0 &&
- byte[2] == 0 && byte[3] == 0)
+ if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
+ LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
return SK_DROP;
- half = (__u16 *)&ctx->remote_ip4;
- if (half[0] == 0 && half[1] == 0)
+ if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
return SK_DROP;
/* Expect DST_IP4 in local_ip4 */
- byte = (__u8 *)&ctx->local_ip4;
- if (byte[0] != ((DST_IP4 >> 0) & 0xff) ||
- byte[1] != ((DST_IP4 >> 8) & 0xff) ||
- byte[2] != ((DST_IP4 >> 16) & 0xff) ||
- byte[3] != ((DST_IP4 >> 24) & 0xff))
+ if (LSB(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xff) ||
+ LSB(ctx->local_ip4, 1) != ((DST_IP4 >> 8) & 0xff) ||
+ LSB(ctx->local_ip4, 2) != ((DST_IP4 >> 16) & 0xff) ||
+ LSB(ctx->local_ip4, 3) != ((DST_IP4 >> 24) & 0xff))
return SK_DROP;
- half = (__u16 *)&ctx->local_ip4;
- if (half[0] != ((DST_IP4 >> 0) & 0xffff) ||
- half[1] != ((DST_IP4 >> 16) & 0xffff))
+ if (LSW(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xffff) ||
+ LSW(ctx->local_ip4, 1) != ((DST_IP4 >> 16) & 0xffff))
return SK_DROP;
} else {
/* Expect 0.0.0.0 IPs when family != AF_INET */
- byte = (__u8 *)&ctx->remote_ip4;
- if (byte[0] != 0 || byte[1] != 0 &&
- byte[2] != 0 || byte[3] != 0)
+ if (LSB(ctx->remote_ip4, 0) != 0 || LSB(ctx->remote_ip4, 1) != 0 ||
+ LSB(ctx->remote_ip4, 2) != 0 || LSB(ctx->remote_ip4, 3) != 0)
return SK_DROP;
- half = (__u16 *)&ctx->remote_ip4;
- if (half[0] != 0 || half[1] != 0)
+ if (LSW(ctx->remote_ip4, 0) != 0 || LSW(ctx->remote_ip4, 1) != 0)
return SK_DROP;
- byte = (__u8 *)&ctx->local_ip4;
- if (byte[0] != 0 || byte[1] != 0 &&
- byte[2] != 0 || byte[3] != 0)
+ if (LSB(ctx->local_ip4, 0) != 0 || LSB(ctx->local_ip4, 1) != 0 ||
+ LSB(ctx->local_ip4, 2) != 0 || LSB(ctx->local_ip4, 3) != 0)
return SK_DROP;
- half = (__u16 *)&ctx->local_ip4;
- if (half[0] != 0 || half[1] != 0)
+ if (LSW(ctx->local_ip4, 0) != 0 || LSW(ctx->local_ip4, 1) != 0)
return SK_DROP;
}
/* Narrow loads from IPv6 fields */
if (!v4) {
- /* Expenct non-:: IP in remote_ip6 */
- byte = (__u8 *)&ctx->remote_ip6;
- if (byte[0] == 0 && byte[1] == 0 &&
- byte[2] == 0 && byte[3] == 0 &&
- byte[4] == 0 && byte[5] == 0 &&
- byte[6] == 0 && byte[7] == 0 &&
- byte[8] == 0 && byte[9] == 0 &&
- byte[10] == 0 && byte[11] == 0 &&
- byte[12] == 0 && byte[13] == 0 &&
- byte[14] == 0 && byte[15] == 0)
+ /* Expect non-:: IP in remote_ip6 */
+ if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
+ LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
+ LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
+ LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
+ LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
+ LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
+ LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
+ LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
return SK_DROP;
- half = (__u16 *)&ctx->remote_ip6;
- if (half[0] == 0 && half[1] == 0 &&
- half[2] == 0 && half[3] == 0 &&
- half[4] == 0 && half[5] == 0 &&
- half[6] == 0 && half[7] == 0)
+ if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
+ LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
+ LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
+ LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
return SK_DROP;
-
/* Expect DST_IP6 in local_ip6 */
- byte = (__u8 *)&ctx->local_ip6;
- if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) ||
- byte[1] != ((DST_IP6[0] >> 8) & 0xff) ||
- byte[2] != ((DST_IP6[0] >> 16) & 0xff) ||
- byte[3] != ((DST_IP6[0] >> 24) & 0xff) ||
- byte[4] != ((DST_IP6[1] >> 0) & 0xff) ||
- byte[5] != ((DST_IP6[1] >> 8) & 0xff) ||
- byte[6] != ((DST_IP6[1] >> 16) & 0xff) ||
- byte[7] != ((DST_IP6[1] >> 24) & 0xff) ||
- byte[8] != ((DST_IP6[2] >> 0) & 0xff) ||
- byte[9] != ((DST_IP6[2] >> 8) & 0xff) ||
- byte[10] != ((DST_IP6[2] >> 16) & 0xff) ||
- byte[11] != ((DST_IP6[2] >> 24) & 0xff) ||
- byte[12] != ((DST_IP6[3] >> 0) & 0xff) ||
- byte[13] != ((DST_IP6[3] >> 8) & 0xff) ||
- byte[14] != ((DST_IP6[3] >> 16) & 0xff) ||
- byte[15] != ((DST_IP6[3] >> 24) & 0xff))
+ if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[0], 2) != ((DST_IP6[0] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[0], 3) != ((DST_IP6[0] >> 24) & 0xff) ||
+ LSB(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[1], 2) != ((DST_IP6[1] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[1], 3) != ((DST_IP6[1] >> 24) & 0xff) ||
+ LSB(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[2], 2) != ((DST_IP6[2] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[2], 3) != ((DST_IP6[2] >> 24) & 0xff) ||
+ LSB(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[3], 2) != ((DST_IP6[3] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[3], 3) != ((DST_IP6[3] >> 24) & 0xff))
return SK_DROP;
- half = (__u16 *)&ctx->local_ip6;
- if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) ||
- half[1] != ((DST_IP6[0] >> 16) & 0xffff) ||
- half[2] != ((DST_IP6[1] >> 0) & 0xffff) ||
- half[3] != ((DST_IP6[1] >> 16) & 0xffff) ||
- half[4] != ((DST_IP6[2] >> 0) & 0xffff) ||
- half[5] != ((DST_IP6[2] >> 16) & 0xffff) ||
- half[6] != ((DST_IP6[3] >> 0) & 0xffff) ||
- half[7] != ((DST_IP6[3] >> 16) & 0xffff))
+ if (LSW(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 16) & 0xffff) ||
+ LSW(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 16) & 0xffff) ||
+ LSW(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 16) & 0xffff) ||
+ LSW(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 16) & 0xffff))
return SK_DROP;
} else {
/* Expect :: IPs when family != AF_INET6 */
- byte = (__u8 *)&ctx->remote_ip6;
- if (byte[0] != 0 || byte[1] != 0 ||
- byte[2] != 0 || byte[3] != 0 ||
- byte[4] != 0 || byte[5] != 0 ||
- byte[6] != 0 || byte[7] != 0 ||
- byte[8] != 0 || byte[9] != 0 ||
- byte[10] != 0 || byte[11] != 0 ||
- byte[12] != 0 || byte[13] != 0 ||
- byte[14] != 0 || byte[15] != 0)
+ if (LSB(ctx->remote_ip6[0], 0) != 0 || LSB(ctx->remote_ip6[0], 1) != 0 ||
+ LSB(ctx->remote_ip6[0], 2) != 0 || LSB(ctx->remote_ip6[0], 3) != 0 ||
+ LSB(ctx->remote_ip6[1], 0) != 0 || LSB(ctx->remote_ip6[1], 1) != 0 ||
+ LSB(ctx->remote_ip6[1], 2) != 0 || LSB(ctx->remote_ip6[1], 3) != 0 ||
+ LSB(ctx->remote_ip6[2], 0) != 0 || LSB(ctx->remote_ip6[2], 1) != 0 ||
+ LSB(ctx->remote_ip6[2], 2) != 0 || LSB(ctx->remote_ip6[2], 3) != 0 ||
+ LSB(ctx->remote_ip6[3], 0) != 0 || LSB(ctx->remote_ip6[3], 1) != 0 ||
+ LSB(ctx->remote_ip6[3], 2) != 0 || LSB(ctx->remote_ip6[3], 3) != 0)
return SK_DROP;
- half = (__u16 *)&ctx->remote_ip6;
- if (half[0] != 0 || half[1] != 0 ||
- half[2] != 0 || half[3] != 0 ||
- half[4] != 0 || half[5] != 0 ||
- half[6] != 0 || half[7] != 0)
+ if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+ LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+ LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+ LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
return SK_DROP;
- byte = (__u8 *)&ctx->local_ip6;
- if (byte[0] != 0 || byte[1] != 0 ||
- byte[2] != 0 || byte[3] != 0 ||
- byte[4] != 0 || byte[5] != 0 ||
- byte[6] != 0 || byte[7] != 0 ||
- byte[8] != 0 || byte[9] != 0 ||
- byte[10] != 0 || byte[11] != 0 ||
- byte[12] != 0 || byte[13] != 0 ||
- byte[14] != 0 || byte[15] != 0)
+ if (LSB(ctx->local_ip6[0], 0) != 0 || LSB(ctx->local_ip6[0], 1) != 0 ||
+ LSB(ctx->local_ip6[0], 2) != 0 || LSB(ctx->local_ip6[0], 3) != 0 ||
+ LSB(ctx->local_ip6[1], 0) != 0 || LSB(ctx->local_ip6[1], 1) != 0 ||
+ LSB(ctx->local_ip6[1], 2) != 0 || LSB(ctx->local_ip6[1], 3) != 0 ||
+ LSB(ctx->local_ip6[2], 0) != 0 || LSB(ctx->local_ip6[2], 1) != 0 ||
+ LSB(ctx->local_ip6[2], 2) != 0 || LSB(ctx->local_ip6[2], 3) != 0 ||
+ LSB(ctx->local_ip6[3], 0) != 0 || LSB(ctx->local_ip6[3], 1) != 0 ||
+ LSB(ctx->local_ip6[3], 2) != 0 || LSB(ctx->local_ip6[3], 3) != 0)
return SK_DROP;
- half = (__u16 *)&ctx->local_ip6;
- if (half[0] != 0 || half[1] != 0 ||
- half[2] != 0 || half[3] != 0 ||
- half[4] != 0 || half[5] != 0 ||
- half[6] != 0 || half[7] != 0)
+ if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+ LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+ LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+ LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
return SK_DROP;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 9bcaa37f476a..81b57b9aaaea 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -7,19 +7,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-
-enum bpf_addr_array_idx {
- ADDR_SRV_IDX,
- ADDR_CLI_IDX,
- __NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
- EGRESS_SRV_IDX,
- EGRESS_CLI_IDX,
- INGRESS_LISTEN_IDX,
- __NR_BPF_RESULT_ARRAY_IDX,
-};
+#include "bpf_tcp_helpers.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
@@ -29,27 +17,6 @@ enum bpf_linum_array_idx {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX);
- __type(key, __u32);
- __type(value, struct sockaddr_in6);
-} addr_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
- __type(key, __u32);
- __type(value, struct bpf_sock);
-} sock_result_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
- __type(key, __u32);
- __type(value, struct bpf_tcp_sock);
-} tcp_sock_result_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
__type(key, __u32);
__type(value, __u32);
@@ -74,6 +41,17 @@ struct {
__type(value, struct bpf_spinlock_cnt);
} sk_pkt_out_cnt10 SEC(".maps");
+struct bpf_tcp_sock listen_tp = {};
+struct sockaddr_in6 srv_sa6 = {};
+struct bpf_tcp_sock cli_tp = {};
+struct bpf_tcp_sock srv_tp = {};
+struct bpf_sock listen_sk = {};
+struct bpf_sock srv_sk = {};
+struct bpf_sock cli_sk = {};
+__u64 parent_cg_id = 0;
+__u64 child_cg_id = 0;
+__u64 lsndtime = 0;
+
static bool is_loopback6(__u32 *a6)
{
return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
@@ -130,62 +108,86 @@ static void tpcpy(struct bpf_tcp_sock *dst,
dst->bytes_acked = src->bytes_acked;
}
-#define RETURN { \
+/* Always return CG_OK so that no pkt will be filtered out */
+#define CG_OK 1
+
+#define RET_LOG() ({ \
linum = __LINE__; \
- bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \
- return 1; \
-}
+ bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \
+ return CG_OK; \
+})
SEC("cgroup_skb/egress")
int egress_read_sock_fields(struct __sk_buff *skb)
{
struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
- __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
- struct sockaddr_in6 *srv_sa6, *cli_sa6;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
__u32 linum, linum_idx;
+ struct tcp_sock *ktp;
linum_idx = EGRESS_LINUM_IDX;
sk = skb->sk;
- if (!sk || sk->state == 10)
- RETURN;
+ if (!sk)
+ RET_LOG();
+ /* Not the testing egress traffic or
+ * TCP_LISTEN (10) socket will be copied at the ingress side.
+ */
+ if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+ sk->state == 10)
+ return CG_OK;
+
+ if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
+ /* Server socket */
+ sk_ret = &srv_sk;
+ tp_ret = &srv_tp;
+ } else if (sk->dst_port == srv_sa6.sin6_port) {
+ /* Client socket */
+ sk_ret = &cli_sk;
+ tp_ret = &cli_tp;
+ } else {
+ /* Not the testing egress traffic */
+ return CG_OK;
+ }
+
+ /* It must be a fullsock for cgroup_skb/egress prog */
sk = bpf_sk_fullsock(sk);
- if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
- !is_loopback6(sk->src_ip6))
- RETURN;
+ if (!sk)
+ RET_LOG();
+
+ /* Not the testing egress traffic */
+ if (sk->protocol != IPPROTO_TCP)
+ return CG_OK;
tp = bpf_tcp_sock(sk);
if (!tp)
- RETURN;
+ RET_LOG();
- srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
- cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
- if (!srv_sa6 || !cli_sa6)
- RETURN;
+ skcpy(sk_ret, sk);
+ tpcpy(tp_ret, tp);
- if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
- result_idx = EGRESS_SRV_IDX;
- else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
- result_idx = EGRESS_CLI_IDX;
- else
- RETURN;
+ if (sk_ret == &srv_sk) {
+ ktp = bpf_skc_to_tcp_sock(sk);
- sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
- tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
- if (!sk_ret || !tp_ret)
- RETURN;
+ if (!ktp)
+ RET_LOG();
- skcpy(sk_ret, sk);
- tpcpy(tp_ret, tp);
+ lsndtime = ktp->lsndtime;
+
+ child_cg_id = bpf_sk_cgroup_id(ktp);
+ if (!child_cg_id)
+ RET_LOG();
+
+ parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
+ if (!parent_cg_id)
+ RET_LOG();
- if (result_idx == EGRESS_SRV_IDX) {
/* The userspace has created it for srv sk */
- pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0);
- pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk,
+ pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
+ pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
0, 0);
} else {
pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
@@ -197,7 +199,7 @@ int egress_read_sock_fields(struct __sk_buff *skb)
}
if (!pkt_out_cnt || !pkt_out_cnt10)
- RETURN;
+ RET_LOG();
/* Even both cnt and cnt10 have lock defined in their BTF,
* intentionally one cnt takes lock while one does not
@@ -208,48 +210,44 @@ int egress_read_sock_fields(struct __sk_buff *skb)
pkt_out_cnt10->cnt += 10;
bpf_spin_unlock(&pkt_out_cnt10->lock);
- RETURN;
+ return CG_OK;
}
SEC("cgroup_skb/ingress")
int ingress_read_sock_fields(struct __sk_buff *skb)
{
- __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
- struct bpf_tcp_sock *tp, *tp_ret;
- struct bpf_sock *sk, *sk_ret;
- struct sockaddr_in6 *srv_sa6;
+ struct bpf_tcp_sock *tp;
__u32 linum, linum_idx;
+ struct bpf_sock *sk;
linum_idx = INGRESS_LINUM_IDX;
sk = skb->sk;
- if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
- RETURN;
+ if (!sk)
+ RET_LOG();
- srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
- if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
- RETURN;
+ /* Not the testing ingress traffic to the server */
+ if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+ sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
+ return CG_OK;
- if (sk->state != 10 && sk->state != 12)
- RETURN;
+ /* Only interested in TCP_LISTEN */
+ if (sk->state != 10)
+ return CG_OK;
- sk = bpf_get_listener_sock(sk);
+ /* It must be a fullsock for cgroup_skb/ingress prog */
+ sk = bpf_sk_fullsock(sk);
if (!sk)
- RETURN;
+ RET_LOG();
tp = bpf_tcp_sock(sk);
if (!tp)
- RETURN;
-
- sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
- tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
- if (!sk_ret || !tp_ret)
- RETURN;
+ RET_LOG();
- skcpy(sk_ret, sk);
- tpcpy(tp_ret, tp);
+ skcpy(&listen_sk, sk);
+ tpcpy(&listen_tp, tp);
- RETURN;
+ return CG_OK;
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
new file mode 100644
index 000000000000..02a59e220cbc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} map SEC(".maps");
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+ __u32 key = 0;
+
+ if (skops->sk)
+ bpf_map_update_elem(&map, &key, skops->sk, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 3dca4c2e2418..1858435de7aa 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -131,39 +131,55 @@ int bpf_prog2(struct __sk_buff *skb)
}
-SEC("sk_skb3")
-int bpf_prog3(struct __sk_buff *skb)
+static inline void bpf_write_pass(struct __sk_buff *skb, int offset)
{
- const int one = 1;
- int err, *f, ret = SK_PASS;
+ int err = bpf_skb_pull_data(skb, 6 + offset);
void *data_end;
char *c;
- err = bpf_skb_pull_data(skb, 19);
if (err)
- goto tls_out;
+ return;
c = (char *)(long)skb->data;
data_end = (void *)(long)skb->data_end;
- if (c + 18 < data_end)
- memcpy(&c[13], "PASS", 4);
+ if (c + 5 + offset < data_end)
+ memcpy(c + offset, "PASS", 4);
+}
+
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+ int err, *f, ret = SK_PASS;
+ const int one = 1;
+
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
if (f && *f) {
__u64 flags = 0;
ret = 0;
flags = *f;
+
+ err = bpf_skb_adjust_room(skb, -13, 0, 0);
+ if (err)
+ return SK_DROP;
+ err = bpf_skb_adjust_room(skb, 4, 0, 0);
+ if (err)
+ return SK_DROP;
+ bpf_write_pass(skb, 0);
#ifdef SOCKMAP
return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
#else
return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
#endif
}
-
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
if (f && *f)
ret = SK_DROP;
+ err = bpf_skb_adjust_room(skb, 4, 0, 0);
+ if (err)
+ return SK_DROP;
+ bpf_write_pass(skb, 13);
tls_out:
return ret;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
new file mode 100644
index 000000000000..9d0c9f28cab2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} src SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst_sock_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst_sock_hash SEC(".maps");
+
+SEC("classifier/copy_sock_map")
+int copy_sock_map(void *ctx)
+{
+ struct bpf_sock *sk;
+ bool failed = false;
+ __u32 key = 0;
+
+ sk = bpf_map_lookup_elem(&src, &key);
+ if (!sk)
+ return SK_DROP;
+
+ if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0))
+ failed = true;
+
+ if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0))
+ failed = true;
+
+ bpf_sk_release(sk);
+ return failed ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
new file mode 100644
index 000000000000..d3c5673c0218
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -0,0 +1,103 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+const char LICENSE[] SEC("license") = "GPL";
+
+__noinline int sub1(int x)
+{
+ return x + 1;
+}
+
+static __noinline int sub5(int v);
+
+__noinline int sub2(int y)
+{
+ return sub5(y + 2);
+}
+
+static __noinline int sub3(int z)
+{
+ return z + 3 + sub1(4);
+}
+
+static __noinline int sub4(int w)
+{
+ return w + sub3(5) + sub1(6);
+}
+
+/* sub5() is an identitify function, just to test weirder functions layout and
+ * call patterns
+ */
+static __noinline int sub5(int v)
+{
+ return sub1(v) - 1; /* compensates sub1()'s + 1 */
+}
+
+/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer
+ * type, so we need to accept pointer as integer and then cast it inside the
+ * function
+ */
+__noinline int get_task_tgid(uintptr_t t)
+{
+ /* this ensures that CO-RE relocs work in multi-subprogs .text */
+ return BPF_CORE_READ((struct task_struct *)(void *)t, tgid);
+}
+
+int res1 = 0;
+int res2 = 0;
+int res3 = 0;
+int res4 = 0;
+
+SEC("raw_tp/sys_enter")
+int prog1(void *ctx)
+{
+ /* perform some CO-RE relocations to ensure they work with multi-prog
+ * sections correctly
+ */
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res1 = sub1(1) + sub3(2); /* (1 + 1) + (2 + 3 + (4 + 1)) = 12 */
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res2 = sub2(3) + sub3(4); /* (3 + 2) + (4 + 3 + (4 + 1)) = 17 */
+ return 0;
+}
+
+/* prog3 has the same section name as prog1 */
+SEC("raw_tp/sys_enter")
+int prog3(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */
+ return 0;
+}
+
+/* prog4 has the same section name as prog2 */
+SEC("raw_tp/sys_exit")
+int prog4(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res4 = sub4(7) + sub1(8); /* (7 + (5 + 3 + (4 + 1)) + (6 + 1)) + (8 + 1) = 36 */
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 458b0d69133e..553a282d816a 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index b2e6f9b0894d..2b64bc563a12 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 50525235380e..5489823c83fc 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -19,11 +19,11 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- char tcp_mem_name[] = "net/ipv4/tcp_mem";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
new file mode 100644
index 000000000000..fe182616b112
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef ctx_ptr
+# define ctx_ptr(field) (void *)(long)(field)
+#endif
+
+#define ip4_src 0xac100164 /* 172.16.1.100 */
+#define ip4_dst 0xac100264 /* 172.16.2.100 */
+
+#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+
+#ifndef v6_equal
+# define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
+ a.s6_addr32[1] == b.s6_addr32[1] && \
+ a.s6_addr32[2] == b.s6_addr32[2] && \
+ a.s6_addr32[3] == b.s6_addr32[3])
+#endif
+
+enum {
+ dev_src,
+ dev_dst,
+};
+
+struct bpf_map_def SEC("maps") ifindex_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2,
+};
+
+static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
+ __be32 addr)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct iphdr *ip4h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return false;
+
+ ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip4h + 1) > data_end)
+ return false;
+
+ return ip4h->daddr == addr;
+}
+
+static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
+ struct in6_addr addr)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct ipv6hdr *ip6h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return false;
+
+ ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip6h + 1) > data_end)
+ return false;
+
+ return v6_equal(ip6h->daddr, addr);
+}
+
+static __always_inline int get_dev_ifindex(int which)
+{
+ int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
+
+ return ifindex ? *ifindex : 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ __u32 *raw = data;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return TC_ACT_SHOT;
+
+ return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+ __u8 zero[ETH_ALEN * 2];
+ bool redirect = false;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src));
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src);
+ break;
+ }
+
+ if (!redirect)
+ return TC_ACT_OK;
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+ __u8 zero[ETH_ALEN * 2];
+ bool redirect = false;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst));
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst);
+ break;
+ }
+
+ if (!redirect)
+ return TC_ACT_OK;
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
new file mode 100644
index 000000000000..fc84a7685aa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+
+enum {
+ dev_src,
+ dev_dst,
+};
+
+struct bpf_map_def SEC("maps") ifindex_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2,
+};
+
+static __always_inline int get_dev_ifindex(int which)
+{
+ int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
+
+ return ifindex ? *ifindex : 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+ return TC_ACT_SHOT;
+}
+
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
new file mode 100644
index 000000000000..678bd0fad29e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+#ifndef sizeof_field
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#endif
+
+__u8 test_kind = TCPOPT_EXP;
+__u16 test_magic = 0xeB9F;
+__u32 inherit_cb_flags = 0;
+
+struct bpf_test_option passive_synack_out = {};
+struct bpf_test_option passive_fin_out = {};
+
+struct bpf_test_option passive_estab_in = {};
+struct bpf_test_option passive_fin_in = {};
+
+struct bpf_test_option active_syn_out = {};
+struct bpf_test_option active_fin_out = {};
+
+struct bpf_test_option active_estab_in = {};
+struct bpf_test_option active_fin_in = {};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct hdr_stg);
+} hdr_stg_map SEC(".maps");
+
+static bool skops_want_cookie(const struct bpf_sock_ops *skops)
+{
+ return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
+}
+
+static bool skops_current_mss(const struct bpf_sock_ops *skops)
+{
+ return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
+}
+
+static __u8 option_total_len(__u8 flags)
+{
+ __u8 i, len = 1; /* +1 for flags */
+
+ if (!flags)
+ return 0;
+
+ /* RESEND bit does not use a byte */
+ for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
+ len += !!TEST_OPTION_FLAGS(flags, i);
+
+ if (test_kind == TCPOPT_EXP)
+ return len + TCP_BPF_EXPOPT_BASE_LEN;
+ else
+ return len + 2; /* +1 kind, +1 kind-len */
+}
+
+static void write_test_option(const struct bpf_test_option *test_opt,
+ __u8 *data)
+{
+ __u8 offset = 0;
+
+ data[offset++] = test_opt->flags;
+ if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
+ data[offset++] = test_opt->max_delack_ms;
+
+ if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
+ data[offset++] = test_opt->rand;
+}
+
+static int store_option(struct bpf_sock_ops *skops,
+ const struct bpf_test_option *test_opt)
+{
+ union {
+ struct tcp_exprm_opt exprm;
+ struct tcp_opt regular;
+ } write_opt;
+ int err;
+
+ if (test_kind == TCPOPT_EXP) {
+ write_opt.exprm.kind = TCPOPT_EXP;
+ write_opt.exprm.len = option_total_len(test_opt->flags);
+ write_opt.exprm.magic = __bpf_htons(test_magic);
+ write_opt.exprm.data32 = 0;
+ write_test_option(test_opt, write_opt.exprm.data);
+ err = bpf_store_hdr_opt(skops, &write_opt.exprm,
+ sizeof(write_opt.exprm), 0);
+ } else {
+ write_opt.regular.kind = test_kind;
+ write_opt.regular.len = option_total_len(test_opt->flags);
+ write_opt.regular.data32 = 0;
+ write_test_option(test_opt, write_opt.regular.data);
+ err = bpf_store_hdr_opt(skops, &write_opt.regular,
+ sizeof(write_opt.regular), 0);
+ }
+
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
+{
+ opt->flags = *start++;
+
+ if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
+ opt->max_delack_ms = *start++;
+
+ if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
+ opt->rand = *start++;
+
+ return 0;
+}
+
+static int load_option(struct bpf_sock_ops *skops,
+ struct bpf_test_option *test_opt, bool from_syn)
+{
+ union {
+ struct tcp_exprm_opt exprm;
+ struct tcp_opt regular;
+ } search_opt;
+ int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+
+ if (test_kind == TCPOPT_EXP) {
+ search_opt.exprm.kind = TCPOPT_EXP;
+ search_opt.exprm.len = 4;
+ search_opt.exprm.magic = __bpf_htons(test_magic);
+ search_opt.exprm.data32 = 0;
+ ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
+ sizeof(search_opt.exprm), load_flags);
+ if (ret < 0)
+ return ret;
+ return parse_test_option(test_opt, search_opt.exprm.data);
+ } else {
+ search_opt.regular.kind = test_kind;
+ search_opt.regular.len = 0;
+ search_opt.regular.data32 = 0;
+ ret = bpf_load_hdr_opt(skops, &search_opt.regular,
+ sizeof(search_opt.regular), load_flags);
+ if (ret < 0)
+ return ret;
+ return parse_test_option(test_opt, search_opt.regular.data);
+ }
+}
+
+static int synack_opt_len(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option test_opt = {};
+ __u8 optlen;
+ int err;
+
+ if (!passive_synack_out.flags)
+ return CG_OK;
+
+ err = load_option(skops, &test_opt, true);
+
+ /* bpf_test_option is not found */
+ if (err == -ENOMSG)
+ return CG_OK;
+
+ if (err)
+ RET_CG_ERR(err);
+
+ optlen = option_total_len(passive_synack_out.flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_synack_opt(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option opt;
+
+ if (!passive_synack_out.flags)
+ /* We should not even be called since no header
+ * space has been reserved.
+ */
+ RET_CG_ERR(0);
+
+ opt = passive_synack_out;
+ if (skops_want_cookie(skops))
+ SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
+
+ return store_option(skops, &opt);
+}
+
+static int syn_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 optlen;
+ int err;
+
+ if (!active_syn_out.flags)
+ return CG_OK;
+
+ optlen = option_total_len(active_syn_out.flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_syn_opt(struct bpf_sock_ops *skops)
+{
+ if (!active_syn_out.flags)
+ RET_CG_ERR(0);
+
+ return store_option(skops, &active_syn_out);
+}
+
+static int fin_opt_len(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option *opt;
+ struct hdr_stg *hdr_stg;
+ __u8 optlen;
+ int err;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->active)
+ opt = &active_fin_out;
+ else
+ opt = &passive_fin_out;
+
+ optlen = option_total_len(opt->flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_fin_opt(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option *opt;
+ struct hdr_stg *hdr_stg;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->active)
+ opt = &active_fin_out;
+ else
+ opt = &passive_fin_out;
+
+ if (!opt->flags)
+ RET_CG_ERR(0);
+
+ return store_option(skops, opt);
+}
+
+static int resend_in_ack(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg *hdr_stg;
+
+ if (!skops->sk)
+ return -1;
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ return -1;
+
+ return !!hdr_stg->resend_syn;
+}
+
+static int nodata_opt_len(struct bpf_sock_ops *skops)
+{
+ int resend;
+
+ resend = resend_in_ack(skops);
+ if (resend < 0)
+ RET_CG_ERR(0);
+
+ if (resend)
+ return syn_opt_len(skops);
+
+ return CG_OK;
+}
+
+static int write_nodata_opt(struct bpf_sock_ops *skops)
+{
+ int resend;
+
+ resend = resend_in_ack(skops);
+ if (resend < 0)
+ RET_CG_ERR(0);
+
+ if (resend)
+ return write_syn_opt(skops);
+
+ return CG_OK;
+}
+
+static int data_opt_len(struct bpf_sock_ops *skops)
+{
+ /* Same as the nodata version. Mostly to show
+ * an example usage on skops->skb_len.
+ */
+ return nodata_opt_len(skops);
+}
+
+static int write_data_opt(struct bpf_sock_ops *skops)
+{
+ return write_nodata_opt(skops);
+}
+
+static int current_mss_opt_len(struct bpf_sock_ops *skops)
+{
+ /* Reserve maximum that may be needed */
+ int err;
+
+ err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ return synack_opt_len(skops);
+
+ if (tcp_flags & TCPHDR_SYN)
+ return syn_opt_len(skops);
+
+ if (tcp_flags & TCPHDR_FIN)
+ return fin_opt_len(skops);
+
+ if (skops_current_mss(skops))
+ /* The kernel is calculating the MSS */
+ return current_mss_opt_len(skops);
+
+ if (skops->skb_len)
+ return data_opt_len(skops);
+
+ return nodata_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+ struct tcphdr *th;
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ return write_synack_opt(skops);
+
+ if (tcp_flags & TCPHDR_SYN)
+ return write_syn_opt(skops);
+
+ if (tcp_flags & TCPHDR_FIN)
+ return write_fin_opt(skops);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (skops->skb_len > tcp_hdrlen(th))
+ return write_data_opt(skops);
+
+ return write_nodata_opt(skops);
+}
+
+static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
+{
+ __u32 max_delack_us = max_delack_ms * 1000;
+
+ return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
+ &max_delack_us, sizeof(max_delack_us));
+}
+
+static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
+{
+ __u32 min_rto_us = peer_max_delack_ms * 1000;
+
+ return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
+ sizeof(min_rto_us));
+}
+
+static int handle_active_estab(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg init_stg = {
+ .active = true,
+ };
+ int err;
+
+ err = load_option(skops, &active_estab_in, false);
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+
+ init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
+ OPTION_RESEND);
+ if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
+ &init_stg,
+ BPF_SK_STORAGE_GET_F_CREATE))
+ RET_CG_ERR(0);
+
+ if (init_stg.resend_syn)
+ /* Don't clear the write_hdr cb now because
+ * the ACK may get lost and retransmit may
+ * be needed.
+ *
+ * PARSE_ALL_HDR cb flag is set to learn if this
+ * resend_syn option has received by the peer.
+ *
+ * The header option will be resent until a valid
+ * packet is received at handle_parse_hdr()
+ * and all hdr cb flags will be cleared in
+ * handle_parse_hdr().
+ */
+ set_parse_all_hdr_cb_flags(skops);
+ else if (!active_fin_out.flags)
+ /* No options will be written from now */
+ clear_hdr_cb_flags(skops);
+
+ if (active_syn_out.max_delack_ms) {
+ err = set_delack_max(skops, active_syn_out.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ if (active_estab_in.max_delack_ms) {
+ err = set_rto_min(skops, active_estab_in.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg init_stg = {};
+ struct tcphdr *th;
+ int err;
+
+ inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
+
+ err = load_option(skops, &passive_estab_in, true);
+ if (err == -ENOENT) {
+ /* saved_syn is not found. It was in syncookie mode.
+ * We have asked the active side to resend the options
+ * in ACK, so try to find the bpf_test_option from ACK now.
+ */
+ err = load_option(skops, &passive_estab_in, false);
+ init_stg.syncookie = true;
+ }
+
+ /* ENOMSG: The bpf_test_option is not found which is fine.
+ * Bail out now for all other errors.
+ */
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (th->syn) {
+ /* Fastopen */
+
+ /* Cannot clear cb_flags to stop write_hdr cb.
+ * synack is not sent yet for fast open.
+ * Even it was, the synack may need to be retransmitted.
+ *
+ * PARSE_ALL_HDR cb flag is set to learn
+ * if synack has reached the peer.
+ * All cb_flags will be cleared in handle_parse_hdr().
+ */
+ set_parse_all_hdr_cb_flags(skops);
+ init_stg.fastopen = true;
+ } else if (!passive_fin_out.flags) {
+ /* No options will be written from now */
+ clear_hdr_cb_flags(skops);
+ }
+
+ if (!skops->sk ||
+ !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
+ BPF_SK_STORAGE_GET_F_CREATE))
+ RET_CG_ERR(0);
+
+ if (passive_synack_out.max_delack_ms) {
+ err = set_delack_max(skops, passive_synack_out.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ if (passive_estab_in.max_delack_ms) {
+ err = set_rto_min(skops, passive_estab_in.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg *hdr_stg;
+ struct tcphdr *th;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->resend_syn || hdr_stg->fastopen)
+ /* The PARSE_ALL_HDR cb flag was turned on
+ * to ensure that the previously written
+ * options have reached the peer.
+ * Those previously written option includes:
+ * - Active side: resend_syn in ACK during syncookie
+ * or
+ * - Passive side: SYNACK during fastopen
+ *
+ * A valid packet has been received here after
+ * the 3WHS, so the PARSE_ALL_HDR cb flag
+ * can be cleared now.
+ */
+ clear_parse_all_hdr_cb_flags(skops);
+
+ if (hdr_stg->resend_syn && !active_fin_out.flags)
+ /* Active side resent the syn option in ACK
+ * because the server was in syncookie mode.
+ * A valid packet has been received, so
+ * clear header cb flags if there is no
+ * more option to send.
+ */
+ clear_hdr_cb_flags(skops);
+
+ if (hdr_stg->fastopen && !passive_fin_out.flags)
+ /* Passive side was in fastopen.
+ * A valid packet has been received, so
+ * the SYNACK has reached the peer.
+ * Clear header cb flags if there is no more
+ * option to send.
+ */
+ clear_hdr_cb_flags(skops);
+
+ if (th->fin) {
+ struct bpf_test_option *fin_opt;
+ int err;
+
+ if (hdr_stg->active)
+ fin_opt = &active_fin_in;
+ else
+ fin_opt = &passive_fin_in;
+
+ err = load_option(skops, fin_opt, false);
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+SEC("sockops/estab")
+int estab(struct bpf_sock_ops *skops)
+{
+ int true_val = 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+ &true_val, sizeof(true_val));
+ set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ set_hdr_cb_flags(skops, 0);
+ break;
+ case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+ return handle_parse_hdr(skops);
+ case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+ return handle_hdr_opt_len(skops);
+ case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+ return handle_write_hdr_opt(skops);
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ return handle_passive_estab(skops);
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ return handle_active_estab(skops);
+ }
+
+ return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext.c b/tools/testing/selftests/bpf/progs/test_trace_ext.c
new file mode 100644
index 000000000000..d19a634d0e78
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trace_ext.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 ext_called = 0;
+
+SEC("freplace/test_pkt_md_access")
+int test_pkt_md_access_new(struct __sk_buff *skb)
+{
+ ext_called = skb->len;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c
new file mode 100644
index 000000000000..52f3baf98f20
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 fentry_called = 0;
+
+SEC("fentry/test_pkt_md_access_new")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+ fentry_called = skb->len;
+ return 0;
+}
+
+__u64 fexit_called = 0;
+
+SEC("fexit/test_pkt_md_access_new")
+int BPF_PROG(fexit, struct sk_buff *skb)
+{
+ fexit_called = skb->len;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 29fa09d6a6c6..e9dfa0313d1b 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep")
int handle__tp(struct trace_event_raw_sys_enter *args)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (args->id != __NR_nanosleep)
return 0;
ts = (void *)args->args[0];
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
tp_called = true;
@@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter")
int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (id != __NR_nanosleep)
return 0;
ts = (void *)PT_REGS_PARM1_CORE(regs);
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
raw_tp_called = true;
@@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter")
int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (id != __NR_nanosleep)
return 0;
ts = (void *)PT_REGS_PARM1_CORE(regs);
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
tp_btf_called = true;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 8beecec166d9..3a67921f62b5 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -16,7 +16,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
@@ -49,7 +49,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
-static __attribute__ ((noinline))
+static __noinline
u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
@@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
-__attribute__ ((noinline))
+__noinline
u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
@@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
-__attribute__ ((noinline))
+__noinline
u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
@@ -213,7 +213,7 @@ struct eth_hdr {
unsigned short eth_proto;
};
-static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp)
{
__u64 off = sizeof(struct eth_hdr);
if (is_ipv6) {
@@ -797,8 +797,8 @@ out:
return XDP_DROP;
}
-__attribute__ ((section("xdp-test"), used))
-int balancer_ingress(struct xdp_md *ctx)
+SEC("xdp-test-v4")
+int balancer_ingress_v4(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
@@ -812,11 +812,27 @@ int balancer_ingress(struct xdp_md *ctx)
eth_proto = bpf_ntohs(eth->eth_proto);
if (eth_proto == ETH_P_IP)
return process_packet(data, nh_off, data_end, 0, ctx);
- else if (eth_proto == ETH_P_IPV6)
+ else
+ return XDP_DROP;
+}
+
+SEC("xdp-test-v6")
+int balancer_ingress_v6(struct xdp_md *ctx)
+{
+ void *data = (void *)(long)ctx->data;
+ void *data_end = (void *)(long)ctx->data_end;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+ eth_proto = bpf_ntohs(eth->eth_proto);
+ if (eth_proto == ETH_P_IPV6)
return process_packet(data, nh_off, data_end, 1, ctx);
else
return XDP_DROP;
}
-char _license[] __attribute__ ((section("license"), used)) = "GPL";
-int _version __attribute__ ((section("version"), used)) = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 8b36b6640e7e..9a4d09590b3d 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -39,6 +39,13 @@ int bench_trigger_fentry(void *ctx)
return 0;
}
+SEC("fentry.s/__x64_sys_getpgid")
+int bench_trigger_fentry_sleep(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
SEC("fmod_ret/__x64_sys_getpgid")
int bench_trigger_fmodret(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index ac349a5cea7e..2db3c60e1e61 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -85,6 +85,23 @@ make_with_tmpdir() {
echo
}
+make_doc_and_clean() {
+ echo -e "\$PWD: $PWD"
+ echo -e "command: make -s $* doc >/dev/null"
+ RST2MAN_OPTS="--exit-status=1" make $J -s $* doc
+ if [ $? -ne 0 ] ; then
+ ERROR=1
+ printf "FAILURE: Errors or warnings when building documentation\n"
+ fi
+ (
+ if [ $# -ge 1 ] ; then
+ cd ${@: -1}
+ fi
+ make -s doc-clean
+ )
+ echo
+}
+
echo "Trying to build bpftool"
echo -e "... through kbuild\n"
@@ -145,3 +162,7 @@ make_and_clean
make_with_tmpdir OUTPUT
make_with_tmpdir O
+
+echo -e "Checking documentation build\n"
+# From tools/bpf/bpftool
+make_doc_and_clean
diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
new file mode 100755
index 000000000000..1bf81b49457a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME=bpftool_metadata
+BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_DIR=$BPF_FS/test_$TESTNAME
+
+_cleanup()
+{
+ set +e
+ rm -rf $BPF_DIR 2> /dev/null
+}
+
+cleanup_skip()
+{
+ echo "selftests: $TESTNAME [SKIP]"
+ _cleanup
+
+ exit $ksft_skip
+}
+
+cleanup()
+{
+ if [ "$?" = 0 ]; then
+ echo "selftests: $TESTNAME [PASS]"
+ else
+ echo "selftests: $TESTNAME [FAILED]"
+ fi
+ _cleanup
+}
+
+if [ $(id -u) -ne 0 ]; then
+ echo "selftests: $TESTNAME [SKIP] Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ -z "$BPF_FS" ]; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
+ exit $ksft_skip
+fi
+
+if ! bpftool version > /dev/null 2>&1; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
+ exit $ksft_skip
+fi
+
+set -e
+
+trap cleanup_skip EXIT
+
+mkdir $BPF_DIR
+
+trap cleanup EXIT
+
+bpftool prog load metadata_unused.o $BPF_DIR/unused
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/unused
+
+bpftool prog load metadata_used.o $BPF_DIR/used
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/used
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
index ed253f252cd0..ec53b1ef90d2 100644
--- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
+++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
@@ -156,4 +156,5 @@ cleanup:
bpf_object__close(obj);
}
}
+ return 0;
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index dbb820dde138..238f5f61189e 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -130,6 +130,69 @@ extern int test__join_cgroup(const char *path);
#define CHECK_ATTR(condition, tag, format...) \
_CHECK(condition, tag, tattr.duration, format)
+#define ASSERT_EQ(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act == ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld != expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_STREQ(actual, expected, name) ({ \
+ static int duration = 0; \
+ const char *___act = actual; \
+ const char *___exp = expected; \
+ bool ___ok = strcmp(___act, ___exp) == 0; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual '%s' != expected '%s'\n", \
+ (name), ___act, ___exp); \
+ ___ok; \
+})
+
+#define ASSERT_OK(res, name) ({ \
+ static int duration = 0; \
+ long long ___res = (res); \
+ bool ___ok = ___res == 0; \
+ CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \
+ ___ok; \
+})
+
+#define ASSERT_ERR(res, name) ({ \
+ static int duration = 0; \
+ long long ___res = (res); \
+ bool ___ok = ___res < 0; \
+ CHECK(!___ok, (name), "unexpected success: %lld\n", ___res); \
+ ___ok; \
+})
+
+#define ASSERT_NULL(ptr, name) ({ \
+ static int duration = 0; \
+ const void *___res = (ptr); \
+ bool ___ok = !___res; \
+ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
+ ___ok; \
+})
+
+#define ASSERT_OK_PTR(ptr, name) ({ \
+ static int duration = 0; \
+ const void *___res = (ptr); \
+ bool ___ok = !IS_ERR_OR_NULL(___res); \
+ CHECK(!___ok, (name), \
+ "unexpected error: %ld\n", PTR_ERR(___res)); \
+ ___ok; \
+})
+
+#define ASSERT_ERR_PTR(ptr, name) ({ \
+ static int duration = 0; \
+ const void *___res = (ptr); \
+ bool ___ok = IS_ERR(___res) \
+ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
+ ___ok; \
+})
+
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
deleted file mode 100644
index 6c9f269c396d..000000000000
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ /dev/null
@@ -1,482 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2019 Facebook */
-
-#include <sys/socket.h>
-#include <sys/epoll.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-
-enum bpf_addr_array_idx {
- ADDR_SRV_IDX,
- ADDR_CLI_IDX,
- __NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
- EGRESS_SRV_IDX,
- EGRESS_CLI_IDX,
- INGRESS_LISTEN_IDX,
- __NR_BPF_RESULT_ARRAY_IDX,
-};
-
-enum bpf_linum_array_idx {
- EGRESS_LINUM_IDX,
- INGRESS_LINUM_IDX,
- __NR_BPF_LINUM_ARRAY_IDX,
-};
-
-struct bpf_spinlock_cnt {
- struct bpf_spin_lock lock;
- __u32 cnt;
-};
-
-#define CHECK(condition, tag, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
- printf(format); \
- printf("\n"); \
- exit(-1); \
- } \
-})
-
-#define TEST_CGROUP "/test-bpf-sock-fields"
-#define DATA "Hello BPF!"
-#define DATA_LEN sizeof(DATA)
-
-static struct sockaddr_in6 srv_sa6, cli_sa6;
-static int sk_pkt_out_cnt10_fd;
-static int sk_pkt_out_cnt_fd;
-static int linum_map_fd;
-static int addr_map_fd;
-static int tp_map_fd;
-static int sk_map_fd;
-
-static __u32 addr_srv_idx = ADDR_SRV_IDX;
-static __u32 addr_cli_idx = ADDR_CLI_IDX;
-
-static __u32 egress_srv_idx = EGRESS_SRV_IDX;
-static __u32 egress_cli_idx = EGRESS_CLI_IDX;
-static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
-
-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
-
-static void init_loopback6(struct sockaddr_in6 *sa6)
-{
- memset(sa6, 0, sizeof(*sa6));
- sa6->sin6_family = AF_INET6;
- sa6->sin6_addr = in6addr_loopback;
-}
-
-static void print_sk(const struct bpf_sock *sk)
-{
- char src_ip4[24], dst_ip4[24];
- char src_ip6[64], dst_ip6[64];
-
- inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
- inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
- inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
- inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
-
- printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
- "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
- "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
- sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
- sk->mark, sk->priority,
- sk->src_ip4, src_ip4,
- sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
- src_ip6, sk->src_port,
- sk->dst_ip4, dst_ip4,
- sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
- dst_ip6, ntohs(sk->dst_port));
-}
-
-static void print_tp(const struct bpf_tcp_sock *tp)
-{
- printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
- "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
- "rate_delivered:%u rate_interval_us:%u packets_out:%u "
- "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
- "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
- "bytes_received:%llu bytes_acked:%llu\n",
- tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
- tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
- tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
- tp->packets_out, tp->retrans_out, tp->total_retrans,
- tp->segs_in, tp->data_segs_in, tp->segs_out,
- tp->data_segs_out, tp->lost_out, tp->sacked_out,
- tp->bytes_received, tp->bytes_acked);
-}
-
-static void check_result(void)
-{
- struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
- struct bpf_sock srv_sk, cli_sk, listen_sk;
- __u32 ingress_linum, egress_linum;
- int err;
-
- err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
- &egress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
- &ingress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
- "err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
- "err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
- "err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
- "err:%d errno:%d", err, errno);
-
- printf("listen_sk: ");
- print_sk(&listen_sk);
- printf("\n");
-
- printf("srv_sk: ");
- print_sk(&srv_sk);
- printf("\n");
-
- printf("cli_sk: ");
- print_sk(&cli_sk);
- printf("\n");
-
- printf("listen_tp: ");
- print_tp(&listen_tp);
- printf("\n");
-
- printf("srv_tp: ");
- print_tp(&srv_tp);
- printf("\n");
-
- printf("cli_tp: ");
- print_tp(&cli_tp);
- printf("\n");
-
- CHECK(listen_sk.state != 10 ||
- listen_sk.family != AF_INET6 ||
- listen_sk.protocol != IPPROTO_TCP ||
- memcmp(listen_sk.src_ip6, &in6addr_loopback,
- sizeof(listen_sk.src_ip6)) ||
- listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
- listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
- listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
- listen_sk.dst_port,
- "Unexpected listen_sk",
- "Check listen_sk output. ingress_linum:%u",
- ingress_linum);
-
- CHECK(srv_sk.state == 10 ||
- !srv_sk.state ||
- srv_sk.family != AF_INET6 ||
- srv_sk.protocol != IPPROTO_TCP ||
- memcmp(srv_sk.src_ip6, &in6addr_loopback,
- sizeof(srv_sk.src_ip6)) ||
- memcmp(srv_sk.dst_ip6, &in6addr_loopback,
- sizeof(srv_sk.dst_ip6)) ||
- srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
- srv_sk.dst_port != cli_sa6.sin6_port,
- "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
- egress_linum);
-
- CHECK(cli_sk.state == 10 ||
- !cli_sk.state ||
- cli_sk.family != AF_INET6 ||
- cli_sk.protocol != IPPROTO_TCP ||
- memcmp(cli_sk.src_ip6, &in6addr_loopback,
- sizeof(cli_sk.src_ip6)) ||
- memcmp(cli_sk.dst_ip6, &in6addr_loopback,
- sizeof(cli_sk.dst_ip6)) ||
- cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
- cli_sk.dst_port != srv_sa6.sin6_port,
- "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
- egress_linum);
-
- CHECK(listen_tp.data_segs_out ||
- listen_tp.data_segs_in ||
- listen_tp.total_retrans ||
- listen_tp.bytes_acked,
- "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
- ingress_linum);
-
- CHECK(srv_tp.data_segs_out != 2 ||
- srv_tp.data_segs_in ||
- srv_tp.snd_cwnd != 10 ||
- srv_tp.total_retrans ||
- srv_tp.bytes_acked != 2 * DATA_LEN,
- "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
- egress_linum);
-
- CHECK(cli_tp.data_segs_out ||
- cli_tp.data_segs_in != 2 ||
- cli_tp.snd_cwnd != 10 ||
- cli_tp.total_retrans ||
- cli_tp.bytes_received != 2 * DATA_LEN,
- "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
- egress_linum);
-}
-
-static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
-{
- struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
- int err;
-
- pkt_out_cnt.cnt = ~0;
- pkt_out_cnt10.cnt = ~0;
- err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
- if (!err)
- err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
- &pkt_out_cnt10);
-
- /* The bpf prog only counts for fullsock and
- * passive conneciton did not become fullsock until 3WHS
- * had been finished.
- * The bpf prog only counted two data packet out but we
- * specially init accept_fd's pkt_out_cnt by 2 in
- * init_sk_storage(). Hence, 4 here.
- */
- CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40,
- "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
- "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
- err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-
- pkt_out_cnt.cnt = ~0;
- pkt_out_cnt10.cnt = ~0;
- err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
- if (!err)
- err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
- &pkt_out_cnt10);
- /* Active connection is fullsock from the beginning.
- * 1 SYN and 1 ACK during 3WHS
- * 2 Acks on data packet.
- *
- * The bpf_prog initialized it to 0xeB9F.
- */
- CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 ||
- pkt_out_cnt10.cnt != 0xeB9F + 40,
- "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
- "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
- err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-}
-
-static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
-{
- struct bpf_spinlock_cnt scnt = {};
- int err;
-
- scnt.cnt = pkt_out_cnt;
- err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
- BPF_NOEXIST);
- CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
- "err:%d errno:%d", err, errno);
-
- scnt.cnt *= 10;
- err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
- BPF_NOEXIST);
- CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
- "err:%d errno:%d", err, errno);
-}
-
-static void test(void)
-{
- int listen_fd, cli_fd, accept_fd, epfd, err;
- struct epoll_event ev;
- socklen_t addrlen;
- int i;
-
- addrlen = sizeof(struct sockaddr_in6);
- ev.events = EPOLLIN;
-
- epfd = epoll_create(1);
- CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno);
-
- /* Prepare listen_fd */
- listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
- CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d",
- listen_fd, errno);
-
- init_loopback6(&srv_sa6);
- err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6));
- CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno);
-
- err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
- CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno);
-
- err = listen(listen_fd, 1);
- CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno);
-
- /* Prepare cli_fd */
- cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
- CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno);
-
- init_loopback6(&cli_sa6);
- err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6));
- CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno);
-
- err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
- CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d",
- err, errno);
-
- /* Update addr_map with srv_sa6 and cli_sa6 */
- err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
- CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
- err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
- CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
- /* Connect from cli_sa6 to srv_sa6 */
- err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen);
- printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n",
- ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port));
- CHECK(err && errno != EINPROGRESS,
- "connect(cli_fd)", "err:%d errno:%d", err, errno);
-
- ev.data.fd = listen_fd;
- err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev);
- CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d",
- err, errno);
-
- /* Accept the connection */
- /* Have some timeout in accept(listen_fd). Just in case. */
- err = epoll_wait(epfd, &ev, 1, 1000);
- CHECK(err != 1 || ev.data.fd != listen_fd,
- "epoll_wait(listen_fd)",
- "err:%d errno:%d ev.data.fd:%d listen_fd:%d",
- err, errno, ev.data.fd, listen_fd);
-
- accept_fd = accept(listen_fd, NULL, NULL);
- CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d",
- accept_fd, errno);
- close(listen_fd);
-
- ev.data.fd = cli_fd;
- err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
- CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
- err, errno);
-
- init_sk_storage(accept_fd, 2);
-
- for (i = 0; i < 2; i++) {
- /* Send some data from accept_fd to cli_fd */
- err = send(accept_fd, DATA, DATA_LEN, 0);
- CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
- err, errno);
-
- /* Have some timeout in recv(cli_fd). Just in case. */
- err = epoll_wait(epfd, &ev, 1, 1000);
- CHECK(err != 1 || ev.data.fd != cli_fd,
- "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
- err, errno, ev.data.fd, cli_fd);
-
- err = recv(cli_fd, NULL, 0, MSG_TRUNC);
- CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
- }
-
- check_sk_pkt_out_cnt(accept_fd, cli_fd);
-
- close(epfd);
- close(accept_fd);
- close(cli_fd);
-
- check_result();
-}
-
-int main(int argc, char **argv)
-{
- struct bpf_prog_load_attr attr = {
- .file = "test_sock_fields_kern.o",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .prog_flags = BPF_F_TEST_RND_HI32,
- };
- int cgroup_fd, egress_fd, ingress_fd, err;
- struct bpf_program *ingress_prog;
- struct bpf_object *obj;
- struct bpf_map *map;
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
- CHECK(cgroup_fd < 0, "cgroup_setup_and_join()",
- "cgroup_fd:%d errno:%d", cgroup_fd, errno);
- atexit(cleanup_cgroup_environment);
-
- err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
- CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
-
- ingress_prog = bpf_object__find_program_by_title(obj,
- "cgroup_skb/ingress");
- CHECK(!ingress_prog,
- "bpf_object__find_program_by_title(cgroup_skb/ingress)",
- "not found");
- ingress_fd = bpf_program__fd(ingress_prog);
-
- err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
- CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
- "err:%d errno%d", err, errno);
-
- err = bpf_prog_attach(ingress_fd, cgroup_fd,
- BPF_CGROUP_INET_INGRESS, 0);
- CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
- "err:%d errno%d", err, errno);
- close(cgroup_fd);
-
- map = bpf_object__find_map_by_name(obj, "addr_map");
- CHECK(!map, "cannot find addr_map", "(null)");
- addr_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "sock_result_map");
- CHECK(!map, "cannot find sock_result_map", "(null)");
- sk_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map");
- CHECK(!map, "cannot find tcp_sock_result_map", "(null)");
- tp_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "linum_map");
- CHECK(!map, "cannot find linum_map", "(null)");
- linum_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt");
- CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)");
- sk_pkt_out_cnt_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10");
- CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)");
- sk_pkt_out_cnt10_fd = bpf_map__fd(map);
-
- test();
-
- bpf_object__close(obj);
- cleanup_cgroup_environment();
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index 154a8fd2a48d..ca7ca87e91aa 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -151,7 +151,7 @@ static int run_test(int cgfd)
}
bpf_object__for_each_program(prog, pobj) {
- prog_name = bpf_program__title(prog, /*needs_copy*/ false);
+ prog_name = bpf_program__section_name(prog);
if (libbpf_attach_type_by_name(prog_name, &attach_type))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 9b6fb00dc7a0..0fa1e421c3d7 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -86,6 +86,7 @@ int txmsg_ktls_skb_redir;
int ktls;
int peek_flag;
int skb_use_parser;
+int txmsg_omit_skb_parser;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
@@ -111,6 +112,7 @@ static const struct option long_options[] = {
{"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
{"ktls", no_argument, &ktls, 1 },
{"peek", no_argument, &peek_flag, 1 },
+ {"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1},
{"whitelist", required_argument, NULL, 'n' },
{"blacklist", required_argument, NULL, 'b' },
{0, 0, NULL, 0 }
@@ -175,6 +177,7 @@ static void test_reset(void)
txmsg_apply = txmsg_cork = 0;
txmsg_ingress = txmsg_redir_skb = 0;
txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
+ txmsg_omit_skb_parser = 0;
skb_use_parser = 0;
}
@@ -518,28 +521,13 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
if (i == 0 && txmsg_ktls_skb) {
if (msg->msg_iov[i].iov_len < 4)
return -EIO;
- if (txmsg_ktls_skb_redir) {
- if (memcmp(&d[13], "PASS", 4) != 0) {
- fprintf(stderr,
- "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
- return -EIO;
- }
- d[13] = 0;
- d[14] = 1;
- d[15] = 2;
- d[16] = 3;
- j = 13;
- } else if (txmsg_ktls_skb) {
- if (memcmp(d, "PASS", 4) != 0) {
- fprintf(stderr,
- "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
- return -EIO;
- }
- d[0] = 0;
- d[1] = 1;
- d[2] = 2;
- d[3] = 3;
+ if (memcmp(d, "PASS", 4) != 0) {
+ fprintf(stderr,
+ "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
+ i, 0, d[0], d[1], d[2], d[3]);
+ return -EIO;
}
+ j = 4; /* advance index past PASS header */
}
for (; j < msg->msg_iov[i].iov_len && size; j++) {
@@ -927,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
goto run;
/* Attach programs to sockmap */
- err = bpf_prog_attach(prog_fd[0], map_fd[0],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
- prog_fd[0], map_fd[0], err, strerror(errno));
- return err;
+ if (!txmsg_omit_skb_parser) {
+ err = bpf_prog_attach(prog_fd[0], map_fd[0],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[0], err, strerror(errno));
+ return err;
+ }
}
err = bpf_prog_attach(prog_fd[1], map_fd[0],
@@ -946,13 +936,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
/* Attach programs to TLS sockmap */
if (txmsg_ktls_skb) {
- err = bpf_prog_attach(prog_fd[0], map_fd[8],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
- prog_fd[0], map_fd[8], err, strerror(errno));
- return err;
+ if (!txmsg_omit_skb_parser) {
+ err = bpf_prog_attach(prog_fd[0], map_fd[8],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[8], err, strerror(errno));
+ return err;
+ }
}
err = bpf_prog_attach(prog_fd[2], map_fd[8],
@@ -1480,12 +1472,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
txmsg_ktls_skb_drop = 0;
txmsg_ktls_skb_redir = 1;
test_exec(cgrp, opt);
+ txmsg_ktls_skb_redir = 0;
+
+ /* Tests that omit skb_parser */
+ txmsg_omit_skb_parser = 1;
+ ktls = 0;
+ txmsg_ktls_skb = 0;
+ test_exec(cgrp, opt);
+
+ txmsg_ktls_skb_drop = 1;
+ test_exec(cgrp, opt);
+ txmsg_ktls_skb_drop = 0;
+
+ txmsg_ktls_skb_redir = 1;
+ test_exec(cgrp, opt);
+
+ ktls = 1;
+ test_exec(cgrp, opt);
+ txmsg_omit_skb_parser = 0;
opt->data_test = data;
ktls = k;
}
-
/* Test cork with hung data. This tests poor usage patterns where
* cork can leave data on the ring if user program is buggy and
* doesn't flush them somehow. They do take some time however
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
new file mode 100755
index 000000000000..6d7482562140
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tc_redirect.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+# between src and dst. The netns fwd has veth links to each src and dst. The
+# client is in src and server in dst. The test installs a TC BPF program to each
+# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
+# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
+# switch from ingress side; it also installs a checker prog on the egress side
+# to drop unexpected traffic.
+
+if [[ $EUID -ne 0 ]]; then
+ echo "This script must be run as root"
+ echo "FAIL"
+ exit 1
+fi
+
+# check that needed tools are present
+command -v nc >/dev/null 2>&1 || \
+ { echo >&2 "nc is not available"; exit 1; }
+command -v dd >/dev/null 2>&1 || \
+ { echo >&2 "dd is not available"; exit 1; }
+command -v timeout >/dev/null 2>&1 || \
+ { echo >&2 "timeout is not available"; exit 1; }
+command -v ping >/dev/null 2>&1 || \
+ { echo >&2 "ping is not available"; exit 1; }
+command -v ping6 >/dev/null 2>&1 || \
+ { echo >&2 "ping6 is not available"; exit 1; }
+command -v perl >/dev/null 2>&1 || \
+ { echo >&2 "perl is not available"; exit 1; }
+command -v jq >/dev/null 2>&1 || \
+ { echo >&2 "jq is not available"; exit 1; }
+command -v bpftool >/dev/null 2>&1 || \
+ { echo >&2 "bpftool is not available"; exit 1; }
+
+readonly GREEN='\033[0;92m'
+readonly RED='\033[0;31m'
+readonly NC='\033[0m' # No Color
+
+readonly PING_ARG="-c 3 -w 10 -q"
+
+readonly TIMEOUT=10
+
+readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
+readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
+readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
+
+readonly IP4_SRC="172.16.1.100"
+readonly IP4_DST="172.16.2.100"
+
+readonly IP6_SRC="::1:dead:beef:cafe"
+readonly IP6_DST="::2:dead:beef:cafe"
+
+readonly IP4_SLL="169.254.0.1"
+readonly IP4_DLL="169.254.0.2"
+readonly IP4_NET="169.254.0.0"
+
+netns_cleanup()
+{
+ ip netns del ${NS_SRC}
+ ip netns del ${NS_FWD}
+ ip netns del ${NS_DST}
+}
+
+netns_setup()
+{
+ ip netns add "${NS_SRC}"
+ ip netns add "${NS_FWD}"
+ ip netns add "${NS_DST}"
+
+ ip link add veth_src type veth peer name veth_src_fwd
+ ip link add veth_dst type veth peer name veth_dst_fwd
+
+ ip link set veth_src netns ${NS_SRC}
+ ip link set veth_src_fwd netns ${NS_FWD}
+
+ ip link set veth_dst netns ${NS_DST}
+ ip link set veth_dst_fwd netns ${NS_FWD}
+
+ ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
+ ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
+
+ # The fwd netns automatically get a v6 LL address / routes, but also
+ # needs v4 one in order to start ARP probing. IP4_NET route is added
+ # to the endpoints so that the ARP processing will reply.
+
+ ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
+ ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
+
+ ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
+ ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
+
+ ip -netns ${NS_SRC} link set dev veth_src up
+ ip -netns ${NS_FWD} link set dev veth_src_fwd up
+
+ ip -netns ${NS_DST} link set dev veth_dst up
+ ip -netns ${NS_FWD} link set dev veth_dst_fwd up
+
+ ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
+ ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
+ ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
+
+ ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
+ ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
+
+ ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
+ ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
+ ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
+
+ ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
+ ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
+
+ fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
+ fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
+
+ ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
+ ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
+
+ ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
+ ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
+}
+
+netns_test_connectivity()
+{
+ set +e
+
+ ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
+ ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
+
+ TEST="TCPv4 connectivity test"
+ ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
+ if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+ fi
+ echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+ TEST="TCPv6 connectivity test"
+ ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
+ if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+ fi
+ echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+ TEST="ICMPv4 connectivity test"
+ ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
+ if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+ fi
+ echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+ TEST="ICMPv6 connectivity test"
+ ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
+ if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+ fi
+ echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+ set -e
+}
+
+hex_mem_str()
+{
+ perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
+}
+
+netns_setup_bpf()
+{
+ local obj=$1
+
+ ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
+ ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
+ ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress
+
+ ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
+ ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
+ ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
+
+ veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
+ veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
+
+ progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
+ for prog in $progs; do
+ map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
+ if [ ! -z "$map" ]; then
+ bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
+ bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
+ fi
+ done
+}
+
+trap netns_cleanup EXIT
+set -e
+
+netns_setup
+netns_setup_bpf test_tc_neigh.o
+netns_test_connectivity
+netns_cleanup
+netns_setup
+netns_setup_bpf test_tc_peer.o
+netns_test_connectivity
diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
new file mode 100644
index 000000000000..6118e3ab61fc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+
+#ifndef _TEST_TCP_HDR_OPTIONS_H
+#define _TEST_TCP_HDR_OPTIONS_H
+
+struct bpf_test_option {
+ __u8 flags;
+ __u8 max_delack_ms;
+ __u8 rand;
+} __attribute__((packed));
+
+enum {
+ OPTION_RESEND,
+ OPTION_MAX_DELACK_MS,
+ OPTION_RAND,
+ __NR_OPTION_FLAGS,
+};
+
+#define OPTION_F_RESEND (1 << OPTION_RESEND)
+#define OPTION_F_MAX_DELACK_MS (1 << OPTION_MAX_DELACK_MS)
+#define OPTION_F_RAND (1 << OPTION_RAND)
+#define OPTION_MASK ((1 << __NR_OPTION_FLAGS) - 1)
+
+#define TEST_OPTION_FLAGS(flags, option) (1 & ((flags) >> (option)))
+#define SET_OPTION_FLAGS(flags, option) ((flags) |= (1 << (option)))
+
+/* Store in bpf_sk_storage */
+struct hdr_stg {
+ bool active;
+ bool resend_syn; /* active side only */
+ bool syncookie; /* passive side only */
+ bool fastopen; /* passive side only */
+};
+
+struct linum_err {
+ unsigned int linum;
+ int err;
+};
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+#define TCPHDR_SYNACK (TCPHDR_SYN | TCPHDR_ACK)
+
+#define TCPOPT_EOL 0
+#define TCPOPT_NOP 1
+#define TCPOPT_WINDOW 3
+#define TCPOPT_EXP 254
+
+#define TCP_BPF_EXPOPT_BASE_LEN 4
+#define MAX_TCP_HDR_LEN 60
+#define MAX_TCP_OPTION_SPACE 40
+
+#ifdef BPF_PROG_TEST_TCP_HDR_OPTIONS
+
+#define CG_OK 1
+#define CG_ERR 0
+
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+struct tcp_exprm_opt {
+ __u8 kind;
+ __u8 len;
+ __u16 magic;
+ union {
+ __u8 data[4];
+ __u32 data32;
+ };
+} __attribute__((packed));
+
+struct tcp_opt {
+ __u8 kind;
+ __u8 len;
+ union {
+ __u8 data[4];
+ __u32 data32;
+ };
+} __attribute__((packed));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct linum_err);
+} lport_linum_map SEC(".maps");
+
+static inline unsigned int tcp_hdrlen(const struct tcphdr *th)
+{
+ return th->doff << 2;
+}
+
+static inline __u8 skops_tcp_flags(const struct bpf_sock_ops *skops)
+{
+ return skops->skb_tcp_flags;
+}
+
+static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~(BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG));
+}
+
+static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags |
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
+ extra);
+}
+static inline void
+clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+static inline void
+set_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags |
+ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+#define RET_CG_ERR(__err) ({ \
+ struct linum_err __linum_err; \
+ int __lport; \
+ \
+ __linum_err.linum = __LINE__; \
+ __linum_err.err = __err; \
+ __lport = skops->local_port; \
+ bpf_map_update_elem(&lport_linum_map, &__lport, &__linum_err, BPF_NOEXIST); \
+ clear_hdr_cb_flags(skops); \
+ clear_parse_all_hdr_cb_flags(skops); \
+ return CG_ERR; \
+})
+
+#endif /* BPF_PROG_TEST_TCP_HDR_OPTIONS */
+
+#endif /* _TEST_TCP_HDR_OPTIONS_H */
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 78a6bae56ea6..9be395d9dc64 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -114,6 +114,7 @@ struct bpf_test {
bpf_testdata_struct_t retvals[MAX_TEST_RUNS];
};
enum bpf_attach_type expected_attach_type;
+ const char *kfunc;
};
/* Note we want this to be 64 bit aligned so that the end of our array is
@@ -984,8 +985,24 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
attr.log_level = 4;
attr.prog_flags = pflags;
+ if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
+ attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
+ attr.expected_attach_type);
+ if (attr.attach_btf_id < 0) {
+ printf("FAIL\nFailed to find BTF ID for '%s'!\n",
+ test->kfunc);
+ (*errors)++;
+ return;
+ }
+ }
+
fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
- if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
+
+ /* BPF_PROG_TYPE_TRACING requires more setup and
+ * bpf_probe_prog_type won't give correct answer
+ */
+ if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
+ !bpf_probe_prog_type(prog_type, 0)) {
printf("SKIP (unsupported program type %d)\n", prog_type);
skips++;
goto close_fds;
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 4d0e913bbb22..1bbd1d9830c8 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -90,6 +90,33 @@ long ksym_get_addr(const char *name)
return 0;
}
+/* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
+ * this is faster than load + find.
+ */
+int kallsyms_find(const char *sym, unsigned long long *addr)
+{
+ char type, name[500];
+ unsigned long long value;
+ int err = 0;
+ FILE *f;
+
+ f = fopen("/proc/kallsyms", "r");
+ if (!f)
+ return -EINVAL;
+
+ while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
+ if (strcmp(name, sym) == 0) {
+ *addr = value;
+ goto out;
+ }
+ }
+ err = -ENOENT;
+
+out:
+ fclose(f);
+ return err;
+}
+
void read_trace_pipe(void)
{
int trace_fd;
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 25ef597dd03f..f62fdef9e589 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -12,6 +12,10 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
+
+/* open kallsyms and find addresses on the fly, faster than load + search. */
+int kallsyms_find(const char *sym, unsigned long long *addr);
+
void read_trace_pipe(void);
#endif
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index d781bc86e100..ca8fdb1b3f01 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -48,3 +48,19 @@
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "check known subreg with unknown reg",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234),
+ /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */
+ BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0
+},
diff --git a/tools/testing/selftests/bpf/verifier/basic.c b/tools/testing/selftests/bpf/verifier/basic.c
index b8d18642653a..de84f0d57082 100644
--- a/tools/testing/selftests/bpf/verifier/basic.c
+++ b/tools/testing/selftests/bpf/verifier/basic.c
@@ -2,7 +2,7 @@
"empty prog",
.insns = {
},
- .errstr = "unknown opcode 00",
+ .errstr = "last insn is not an exit or jmp",
.result = REJECT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 4d6645f2874c..dac40de3f868 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -557,3 +557,149 @@
.result = ACCEPT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "bounds check for reg = 0, reg xor 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg32 = 0, reg32 xor 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 1),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg = 2, reg xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg = any, reg xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value",
+ .errstr_unpriv = "invalid access to map value",
+},
+{
+ "bounds check for reg32 = any, reg32 xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value",
+ .errstr_unpriv = "invalid access to map value",
+},
+{
+ "bounds check for reg > 0, reg xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg32 > 0, reg32 xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP32_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+ BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP32_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 94258c6b5235..c4f5d909e58a 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -647,13 +647,14 @@
.result = REJECT,
},
{
- "calls: ld_abs with changing ctx data in callee",
+ "calls: subprog call with ld_abs in main prog",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LD_ABS(BPF_B, 0),
BPF_LD_ABS(BPF_H, 0),
BPF_LD_ABS(BPF_W, 0),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
BPF_LD_ABS(BPF_B, 0),
@@ -666,8 +667,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
- .result = REJECT,
+ .result = ACCEPT,
},
{
"calls: two calls with bad fallthrough",
diff --git a/tools/testing/selftests/bpf/verifier/d_path.c b/tools/testing/selftests/bpf/verifier/d_path.c
new file mode 100644
index 000000000000..b988396379a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/d_path.c
@@ -0,0 +1,37 @@
+{
+ "d_path accept",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+ BPF_LD_IMM64(BPF_REG_3, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "dentry_open",
+},
+{
+ "d_path reject",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+ BPF_LD_IMM64(BPF_REG_3, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "helper call is not allowed in probe",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "d_path",
+},
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index 2c5fbe7bcd27..ae72536603fe 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -529,7 +529,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
+ .errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)",
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c
index 3856dba733e9..f9297900cea6 100644
--- a/tools/testing/selftests/bpf/verifier/ld_imm64.c
+++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c
@@ -51,14 +51,6 @@
.result = REJECT,
},
{
- "test5 ld_imm64",
- .insns = {
- BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
- },
- .errstr = "invalid bpf_ld_imm64 insn",
- .result = REJECT,
-},
-{
"test6 ld_imm64",
.insns = {
BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
index b52209db8250..637f9293bda8 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -60,3 +60,35 @@
.result = ACCEPT,
.retval = 1,
},
+{
+ "bpf_map_ptr: r = 0, map_ptr = map_ptr + r",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 4 },
+ .result = ACCEPT,
+},
+{
+ "bpf_map_ptr: r = 0, r = r + map_ptr",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 4 },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 056e0273bf12..006b5bd99c08 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -854,3 +854,50 @@
.errstr = "Unreleased reference",
.result = REJECT,
},
+{
+ "reference tracking: bpf_sk_release(btf_tcp_sock)",
+ .insns = {
+ BPF_SK_LOOKUP(sk_lookup_tcp),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "unknown func",
+},
+{
+ "reference tracking: use ptr from bpf_skc_to_tcp_sock() after release",
+ .insns = {
+ BPF_SK_LOOKUP(sk_lookup_tcp),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid mem access",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "unknown func",
+},
diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c
new file mode 100644
index 000000000000..4ad7e05de706
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/regalloc.c
@@ -0,0 +1,269 @@
+{
+ "regalloc basic",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc negative",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=48 size=1",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc src_reg mark",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc src_reg negative",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=44 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc and spill",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7),
+ /* r0 has upper bound that should propagate into r2 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
+ /* r3 has lower and upper bounds */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc and spill negative",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7),
+ /* r0 has upper bound that should propagate into r2 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
+ /* r3 has lower and upper bounds */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=48 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc three regs",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc after call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc in callee",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc, spill, JEQ",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), /* spill r0 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+ /* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 20, 0),
+ /* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 with map_value */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), /* skip ldx if map_value == NULL */
+ /* Buggy verifier will think that r3 == 20 here */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), /* read from map_value */
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index b7e6dec36173..42be3b925830 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -20,13 +20,6 @@
#include "../kselftest.h"
#include "clone3_selftests.h"
-/*
- * Different sizes of struct clone_args
- */
-#ifndef CLONE3_ARGS_SIZE_V0
-#define CLONE3_ARGS_SIZE_V0 64
-#endif
-
enum test_mode {
CLONE3_ARGS_NO_TEST,
CLONE3_ARGS_ALL_0,
@@ -38,13 +31,13 @@ enum test_mode {
static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
{
- struct clone_args args = {
+ struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
};
struct clone_args_extended {
- struct clone_args args;
+ struct __clone_args args;
__aligned_u64 excess_space[2];
} args_ext;
@@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
int status;
memset(&args_ext, 0, sizeof(args_ext));
- if (size > sizeof(struct clone_args))
+ if (size > sizeof(struct __clone_args))
args_ext.excess_space[1] = 1;
if (size == 0)
- size = sizeof(struct clone_args);
+ size = sizeof(struct __clone_args);
switch (test_mode) {
case CLONE3_ARGS_ALL_0:
@@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
break;
}
- memcpy(&args_ext.args, &args, sizeof(struct clone_args));
+ memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
- pid = sys_clone3((struct clone_args *)&args_ext, size);
+ pid = sys_clone3((struct __clone_args *)&args_ext, size);
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));
@@ -144,14 +137,14 @@ int main(int argc, char *argv[])
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */
- test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST);
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
+ test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */
- test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
+ test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with sizeof(struct clone_args) + 8 */
- test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+ test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with exit_signal having highest 32 bits non-zero */
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
@@ -165,31 +158,31 @@ int main(int argc, char *argv[])
/* Do a clone3() with NSIG < exit_signal < CSIG */
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
- test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+ test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
- test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG,
+ test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
CLONE3_ARGS_ALL_0);
- test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG,
+ test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
CLONE3_ARGS_ALL_0);
/* Do a clone3() with > page size */
test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
if (uid == 0)
- test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0,
+ test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
CLONE3_ARGS_NO_TEST);
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */
- test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL,
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
+ test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
CLONE3_ARGS_NO_TEST);
/* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
if (uid == 0)
- test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0,
+ test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
CLONE3_ARGS_NO_TEST);
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 9562425aa0a9..55bd387ce7ec 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata,
int status;
pid_t pid = -1;
- struct clone_args args = {
+ struct __clone_args args = {
.exit_signal = SIGCHLD,
.set_tid = ptr_to_u64(set_tid),
.set_tid_size = set_tid_size,
};
- pid = sys_clone3(&args, sizeof(struct clone_args));
+ pid = sys_clone3(&args, sizeof(args));
if (pid < 0) {
TH_LOG("%s - Failed to create new process", strerror(errno));
return -errno;
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index db5fc9c5edcf..47a8c0fc3676 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void)
{
int ret;
pid_t pid;
- struct clone_args args = {};
+ struct __clone_args args = {};
struct sigaction act;
/*
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index 91c1a78ddb39..e81ffaaee02b 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -19,13 +19,11 @@
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
#endif
-#ifndef CLONE_ARGS_SIZE_VER0
-#define CLONE_ARGS_SIZE_VER0 64
-#endif
-
#ifndef __NR_clone3
#define __NR_clone3 -1
-struct clone_args {
+#endif
+
+struct __clone_args {
__aligned_u64 flags;
__aligned_u64 pidfd;
__aligned_u64 child_tid;
@@ -34,15 +32,21 @@ struct clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
-#define CLONE_ARGS_SIZE_VER1 80
+#ifndef CLONE_ARGS_SIZE_VER0
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
+#endif
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
-#define CLONE_ARGS_SIZE_VER2 88
+#ifndef CLONE_ARGS_SIZE_VER1
+#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
+#endif
__aligned_u64 cgroup;
+#ifndef CLONE_ARGS_SIZE_VER2
+#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
+#endif
};
-#endif /* __NR_clone3 */
-static pid_t sys_clone3(struct clone_args *args, size_t size)
+static pid_t sys_clone3(struct __clone_args *args, size_t size)
{
fflush(stdout);
fflush(stderr);
@@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size)
static inline void test_clone3_supported(void)
{
pid_t pid;
- struct clone_args args = {};
+ struct __clone_args args = {};
if (__NR_clone3 < 0)
ksft_exit_skip("clone3() syscall is not supported\n");
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 5831c1082d6d..0229e9ebb995 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid,
int status;
pid_t pid = -1;
- struct clone_args args = {
+ struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
.set_tid = ptr_to_u64(set_tid),
.set_tid_size = set_tid_size,
};
- pid = sys_clone3(&args, sizeof(struct clone_args));
+ pid = sys_clone3(&args, sizeof(args));
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
index 47edf099a17e..508a702f0021 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -207,7 +207,7 @@ __rate_test()
RET=0
- devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
devlink trap group set $DEVLINK_DEV group l3_drops policer $id
# Send packets at highest possible rate and make sure they are dropped
@@ -220,8 +220,8 @@ __rate_test()
rate=$(trap_rate_get)
pct=$((100 * (rate - 1000) / 1000))
- ((-5 <= pct && pct <= 5))
- check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%"
+ ((-10 <= pct && pct <= 10))
+ check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%"
log_info "Expected rate 1000 pps, measured rate $rate pps"
drop_rate=$(policer_drop_rate_get $id)
@@ -288,35 +288,12 @@ __burst_test()
RET=0
- devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
devlink trap group set $DEVLINK_DEV group l3_drops policer $id
- # Send a burst of 64 packets and make sure that about 32 are received
- # and the rest are dropped by the policer
- log_info "=== Tx burst size: 64, Policer burst size: 32 pps ==="
-
- t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
- t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
- start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
-
- t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
- t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
- rx=$((t1_rx - t0_rx))
- pct=$((100 * (rx - 32) / 32))
- ((-20 <= pct && pct <= 20))
- check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%"
- log_info "Expected burst size of 32 packets, measured burst size of $rx packets"
-
- drop=$((t1_drop - t0_drop))
- (( drop > 0 ))
- check_err $? "Expected non-zero policer drops, got 0"
- log_info "Measured policer drops of $drop packets"
-
# Send a burst of 16 packets and make sure that 16 are received
# and that none are dropped by the policer
- log_info "=== Tx burst size: 16, Policer burst size: 32 pps ==="
+ log_info "=== Tx burst size: 16, Policer burst size: 512 ==="
t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
index 6d1790b5de7a..e9f8718af979 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -147,17 +147,26 @@ switch_create()
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
+ devlink_pool_size_thtype_save 0
devlink_pool_size_thtype_set 0 dynamic 10000000
+ devlink_pool_size_thtype_save 4
devlink_pool_size_thtype_set 4 dynamic 10000000
+ devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 6
+ devlink_tc_bind_pool_th_save $swp1 1 ingress
devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
+ devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 6
+ devlink_tc_bind_pool_th_save $swp2 2 ingress
devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
+ devlink_tc_bind_pool_th_save $swp3 1 egress
devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
+ devlink_tc_bind_pool_th_save $swp3 2 egress
devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
+ devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 7
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
new file mode 100755
index 000000000000..27de3d9ed08e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -0,0 +1,379 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ test_defaults
+ test_dcb_ets
+ test_mtu
+ test_pfc
+ test_int_buf
+ test_tc_priomap
+ test_tc_mtu
+ test_tc_sizes
+ test_tc_int_buf
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+ pre_cleanup
+}
+
+get_prio_pg()
+{
+ __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
+ grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
+}
+
+get_prio_pfc()
+{
+ __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
+ grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
+}
+
+get_prio_tc()
+{
+ __mlnx_qos -i $swp | sed -n '/^tc/,$p' |
+ awk '/^tc/ { TC = $2 }
+ /priority:/ { PRIO[$2]=TC }
+ END {
+ for (i in PRIO)
+ printf("%d ", PRIO[i])
+ }'
+}
+
+get_buf_size()
+{
+ local idx=$1; shift
+
+ __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
+}
+
+get_tot_size()
+{
+ __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
+}
+
+check_prio_pg()
+{
+ local expect=$1; shift
+
+ local current=$(get_prio_pg)
+ test "$current" = "$expect"
+ check_err $? "prio2buffer is '$current', expected '$expect'"
+}
+
+check_prio_pfc()
+{
+ local expect=$1; shift
+
+ local current=$(get_prio_pfc)
+ test "$current" = "$expect"
+ check_err $? "prio PFC is '$current', expected '$expect'"
+}
+
+check_prio_tc()
+{
+ local expect=$1; shift
+
+ local current=$(get_prio_tc)
+ test "$current" = "$expect"
+ check_err $? "prio_tc is '$current', expected '$expect'"
+}
+
+__check_buf_size()
+{
+ local idx=$1; shift
+ local expr=$1; shift
+ local what=$1; shift
+
+ local current=$(get_buf_size $idx)
+ ((current $expr))
+ check_err $? "${what}buffer $idx size is '$current', expected '$expr'"
+ echo $current
+}
+
+check_buf_size()
+{
+ __check_buf_size "$@" > /dev/null
+}
+
+test_defaults()
+{
+ RET=0
+
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+ check_prio_tc "0 0 0 0 0 0 0 0 "
+ check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+ log_test "Default headroom configuration"
+}
+
+test_dcb_ets()
+{
+ RET=0
+
+ __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
+
+ check_prio_pg "0 2 4 6 1 3 5 7 "
+ check_prio_tc "0 2 4 6 1 3 5 7 "
+ check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+ __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+ check_prio_tc "0 0 0 0 0 0 0 0 "
+
+ __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
+ check_fail $? "prio2buffer accepted in DCB mode"
+
+ log_test "Configuring headroom through ETS"
+}
+
+test_mtu()
+{
+ local what=$1; shift
+ local buf0size_2
+ local buf0size
+
+ RET=0
+ buf0size=$(__check_buf_size 0 "> 0")
+
+ mtu_set $swp 3000
+ buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ")
+ mtu_restore $swp
+
+ mtu_set $swp 6000
+ check_buf_size 0 "> $buf0size_2" "MTU 6000: "
+ mtu_restore $swp
+
+ check_buf_size 0 "== $buf0size"
+
+ log_test "${what}MTU impacts buffer size"
+}
+
+test_tc_mtu()
+{
+ # In TC mode, MTU still impacts the threshold below which a buffer is
+ # not permitted to go.
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ test_mtu "TC: "
+ tc qdisc delete dev $swp root
+}
+
+test_pfc()
+{
+ RET=0
+
+ __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
+
+ local buf0size=$(get_buf_size 0)
+ local buf1size=$(get_buf_size 1)
+ local buf2size=$(get_buf_size 2)
+ local buf3size=$(get_buf_size 3)
+ check_buf_size 0 "> 0"
+ check_buf_size 1 "> 0"
+ check_buf_size 2 "> 0"
+ check_buf_size 3 "> 0"
+ check_buf_size 4 "== 0"
+ check_buf_size 5 "== 0"
+ check_buf_size 6 "== 0"
+ check_buf_size 7 "== 0"
+
+ log_test "Buffer size sans PFC"
+
+ RET=0
+
+ __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
+
+ check_prio_pg "0 0 0 0 0 1 2 3 "
+ check_prio_pfc "0 0 0 0 0 1 1 1 "
+ check_buf_size 0 "== $buf0size"
+ check_buf_size 1 "> $buf1size"
+ check_buf_size 2 "> $buf2size"
+ check_buf_size 3 "> $buf3size"
+
+ local buf1size=$(get_buf_size 1)
+ check_buf_size 2 "== $buf1size"
+ check_buf_size 3 "== $buf1size"
+
+ log_test "PFC: Cable length 0"
+
+ RET=0
+
+ __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
+
+ check_buf_size 0 "== $buf0size"
+ check_buf_size 1 "> $buf1size"
+ check_buf_size 2 "> $buf1size"
+ check_buf_size 3 "> $buf1size"
+
+ log_test "PFC: Cable length 1000"
+
+ RET=0
+
+ __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
+ __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+ check_prio_tc "0 0 0 0 0 0 0 0 "
+ check_buf_size 0 "> 0"
+ check_buf_size 1 "== 0"
+ check_buf_size 2 "== 0"
+ check_buf_size 3 "== 0"
+ check_buf_size 4 "== 0"
+ check_buf_size 5 "== 0"
+ check_buf_size 6 "== 0"
+ check_buf_size 7 "== 0"
+
+ log_test "PFC: Restore defaults"
+}
+
+test_tc_priomap()
+{
+ RET=0
+
+ __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
+ check_prio_pg "0 1 2 3 4 5 6 7 "
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+
+ __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
+ check_prio_pg "1 3 5 7 0 2 4 6 "
+
+ tc qdisc delete dev $swp root
+ check_prio_pg "0 1 2 3 4 5 6 7 "
+
+ # Clean up.
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
+ tc qdisc delete dev $swp root
+ __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+ log_test "TC: priomap"
+}
+
+test_tc_sizes()
+{
+ local cell_size=$(devlink_cell_size_get)
+ local size=$((cell_size * 1000))
+
+ RET=0
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ check_fail $? "buffer_size should fail before qdisc is added"
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ check_err $? "buffer_size should pass after qdisc is added"
+ check_buf_size 0 "== $size" "set size: "
+
+ mtu_set $swp 6000
+ check_buf_size 0 "== $size" "set MTU: "
+ mtu_restore $swp
+
+ __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+
+ # After replacing the qdisc for the same kind, buffer_size still has to
+ # work.
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1M
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ check_buf_size 0 "== $size" "post replace, set size: "
+
+ __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+
+ # Likewise after replacing for a different kind.
+ tc qdisc replace dev $swp root handle 2: prio bands 8
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ check_buf_size 0 "== $size" "post replace different kind, set size: "
+
+ tc qdisc delete dev $swp root
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ check_fail $? "buffer_size should fail after qdisc is deleted"
+
+ log_test "TC: buffer size"
+}
+
+test_int_buf()
+{
+ local what=$1; shift
+
+ RET=0
+
+ local buf0size=$(get_buf_size 0)
+ local tot_size=$(get_tot_size)
+
+ # Size of internal buffer and buffer 9.
+ local dsize=$((tot_size - buf0size))
+
+ tc qdisc add dev $swp clsact
+ tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp
+
+ local buf0size_2=$(get_buf_size 0)
+ local tot_size_2=$(get_tot_size)
+ local dsize_2=$((tot_size_2 - buf0size_2))
+
+ # Egress SPAN should have added to the "invisible" buffer configuration.
+ ((dsize_2 > dsize))
+ check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'"
+
+ mtu_set $swp 3000
+
+ local buf0size_3=$(get_buf_size 0)
+ local tot_size_3=$(get_tot_size)
+ local dsize_3=$((tot_size_3 - buf0size_3))
+
+ # MTU change might change buffer 0, which will show at total, but the
+ # hidden buffers should stay the same size.
+ ((dsize_3 == dsize_2))
+ check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'"
+
+ mtu_restore $swp
+ tc qdisc del dev $swp clsact
+
+ # After SPAN removal, hidden buffers should be back to the original sizes.
+ local buf0size_4=$(get_buf_size 0)
+ local tot_size_4=$(get_tot_size)
+ local dsize_4=$((tot_size_4 - buf0size_4))
+ ((dsize_4 == dsize))
+ check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'"
+
+ log_test "${what}internal buffer size"
+}
+
+test_tc_int_buf()
+{
+ local cell_size=$(devlink_cell_size_get)
+ local size=$((cell_size * 1000))
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ test_int_buf "TC: "
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ test_int_buf "TC+buffsize: "
+
+ __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ tc qdisc delete dev $swp root
+}
+
+trap cleanup EXIT
+
+bail_on_lldpad
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index faa51012cdac..0bf76f13c030 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -82,3 +82,17 @@ bail_on_lldpad()
fi
fi
}
+
+__mlnx_qos()
+{
+ local err
+
+ mlnx_qos "$@" 2>/dev/null
+ err=$?
+
+ if ((err)); then
+ echo "Error ($err) in mlnx_qos $@" >/dev/stderr
+ fi
+
+ return $err
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index b025daea062d..8f164c80e215 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -145,12 +145,17 @@ switch_create()
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
+ devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 5
+ devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
+ devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 5
+ devlink_tc_bind_pool_th_save $swp2 1 ingress
devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
+ devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 12
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
new file mode 100755
index 000000000000..4d900bc1f76c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -0,0 +1,403 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority
+# of 1. This stream is consistently prioritized as priority 1, is put to PG
+# buffer 1, and scheduled at TC 1.
+#
+# - the stream first ingresses through $swp1, where it is forwarded to $swp3
+#
+# - then it ingresses through $swp4. Here it is put to a lossless buffer and put
+# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is
+# shaped, and thus the PFC pool eventually fills, therefore the headroom
+# fills, and $swp3 is paused.
+#
+# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at
+# a pool ("overflow pool"). The overflow pool needs to be large enough to
+# contain the whole burst.
+#
+# - eventually the PFC pool gets some traffic out, headroom therefore gets some
+# traffic to the pool, and $swp3 is unpaused again. This way the traffic is
+# gradually forwarded from the overflow pool, through the PFC pool, out of
+# $swp2, and eventually to $h2.
+#
+# - if PFC works, all lossless flow packets that ingress through $swp1 should
+# also be seen ingressing $h2. If it doesn't, there will be drops due to
+# discrepancy between the speeds of $swp1 and $h2.
+#
+# - it should all play out relatively quickly, so that SLL and HLL will not
+# cause drops.
+#
+# +-----------------------+
+# | H1 |
+# | + $h1.111 |
+# | | 192.0.2.33/28 |
+# | | |
+# | + $h1 |
+# +---|-------------------+ +--------------------+
+# | | |
+# +---|----------------------|--------------------|---------------------------+
+# | + $swp1 $swp3 + + $swp4 |
+# | | iPOOL1 iPOOL0 | | iPOOL2 |
+# | | ePOOL4 ePOOL5 | | ePOOL4 |
+# | | 1Gbps | | 1Gbps |
+# | | PFC:enabled=1 | | PFC:enabled=1 |
+# | +-|----------------------|-+ +-|------------------------+ |
+# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | |
+# | | | | | |
+# | | BR1 | | BR2 | |
+# | | | | | |
+# | | | | + $swp2.111 | |
+# | +--------------------------+ +---------|----------------+ |
+# | | |
+# | iPOOL0: 500KB dynamic | |
+# | iPOOL1: 10MB static | |
+# | iPOOL2: 1MB static + $swp2 |
+# | ePOOL4: 500KB dynamic | iPOOL0 |
+# | ePOOL5: 10MB static | ePOOL6 |
+# | ePOOL6: "infinite" static | 200Mbps shaper |
+# +-------------------------------------------------------|-------------------+
+# |
+# +---|-------------------+
+# | + $h2 H2 |
+# | | |
+# | + $h2.111 |
+# | 192.0.2.34/28 |
+# +-----------------------+
+#
+# iPOOL0+ePOOL4 is a helper pool for control traffic etc.
+# iPOOL1+ePOOL5 are overflow pools.
+# iPOOL2+ePOOL6 are PFC pools.
+
+ALL_TESTS="
+ ping_ipv4
+ test_qos_pfc
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+_1KB=1000
+_100KB=$((100 * _1KB))
+_500KB=$((500 * _1KB))
+_1MB=$((1000 * _1KB))
+_10MB=$((10 * _1MB))
+
+h1_create()
+{
+ simple_if_init $h1
+ mtu_set $h1 10000
+
+ vlan_create $h1 111 v$h1 192.0.2.33/28
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 111
+
+ mtu_restore $h1
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ mtu_set $h2 10000
+
+ vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 111
+
+ mtu_restore $h2
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ # pools
+ # -----
+
+ devlink_pool_size_thtype_save 0
+ devlink_pool_size_thtype_save 4
+ devlink_pool_size_thtype_save 1
+ devlink_pool_size_thtype_save 5
+ devlink_pool_size_thtype_save 2
+ devlink_pool_size_thtype_save 6
+
+ devlink_port_pool_th_save $swp1 1
+ devlink_port_pool_th_save $swp2 6
+ devlink_port_pool_th_save $swp3 5
+ devlink_port_pool_th_save $swp4 2
+
+ devlink_tc_bind_pool_th_save $swp1 1 ingress
+ devlink_tc_bind_pool_th_save $swp2 1 egress
+ devlink_tc_bind_pool_th_save $swp3 1 egress
+ devlink_tc_bind_pool_th_save $swp4 1 ingress
+
+ # Control traffic pools. Just reduce the size. Keep them dynamic so that
+ # we don't need to change all the uninteresting quotas.
+ devlink_pool_size_thtype_set 0 dynamic $_500KB
+ devlink_pool_size_thtype_set 4 dynamic $_500KB
+
+ # Overflow pools.
+ devlink_pool_size_thtype_set 1 static $_10MB
+ devlink_pool_size_thtype_set 5 static $_10MB
+
+ # PFC pools. As per the writ, the size of egress PFC pool should be
+ # infinice, but actually it just needs to be large enough to not matter
+ # in practice, so reuse the 10MB limit.
+ devlink_pool_size_thtype_set 2 static $_1MB
+ devlink_pool_size_thtype_set 6 static $_10MB
+
+ # $swp1
+ # -----
+
+ ip link set dev $swp1 up
+ mtu_set $swp1 10000
+ vlan_create $swp1 111
+ ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp1 1 $_10MB
+ devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB
+
+ # Configure qdisc so that we can configure PG and therefore pool
+ # assignment.
+ tc qdisc replace dev $swp1 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+
+ # $swp2
+ # -----
+
+ ip link set dev $swp2 up
+ mtu_set $swp2 10000
+ vlan_create $swp2 111
+ ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp2 6 $_10MB
+ devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB
+
+ # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped.
+ tc qdisc replace dev $swp2 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ tc qdisc replace dev $swp2 parent 1:7 handle 17: \
+ tbf rate 200Mbit burst 131072 limit 1M
+
+ # $swp3
+ # -----
+
+ ip link set dev $swp3 up
+ mtu_set $swp3 10000
+ vlan_create $swp3 111
+ ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp3 5 $_10MB
+ devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB
+
+ # prio 0->TC0 (band 7), 1->TC1 (band 6)
+ tc qdisc replace dev $swp3 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+
+ # Need to enable PFC so that PAUSE takes effect. Therefore need to put
+ # the lossless prio into a buffer of its own. Don't bother with buffer
+ # sizes though, there is not going to be any pressure in the "backward"
+ # direction.
+ __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+
+ # $swp4
+ # -----
+
+ ip link set dev $swp4 up
+ mtu_set $swp4 10000
+ vlan_create $swp4 111
+ ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp4 2 $_1MB
+ devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB
+
+ # Configure qdisc so that we can hand-tune headroom.
+ tc qdisc replace dev $swp4 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
+ # is (-2*MTU) about 80K of delay provision.
+ __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
+
+ # bridges
+ # -------
+
+ ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev $swp1.111 master br1
+ ip link set dev $swp3.111 master br1
+ ip link set dev br1 up
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev $swp2.111 master br2
+ ip link set dev $swp4.111 master br2
+ ip link set dev br2 up
+}
+
+switch_destroy()
+{
+ # Do this first so that we can reset the limits to values that are only
+ # valid for the original static / dynamic setting.
+ devlink_pool_size_thtype_restore 6
+ devlink_pool_size_thtype_restore 5
+ devlink_pool_size_thtype_restore 4
+ devlink_pool_size_thtype_restore 2
+ devlink_pool_size_thtype_restore 1
+ devlink_pool_size_thtype_restore 0
+
+ # bridges
+ # -------
+
+ ip link set dev br2 down
+ ip link set dev $swp4.111 nomaster
+ ip link set dev $swp2.111 nomaster
+ ip link del dev br2
+
+ ip link set dev br1 down
+ ip link set dev $swp3.111 nomaster
+ ip link set dev $swp1.111 nomaster
+ ip link del dev br1
+
+ # $swp4
+ # -----
+
+ __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ tc qdisc del dev $swp4 root
+
+ devlink_tc_bind_pool_th_restore $swp4 1 ingress
+ devlink_port_pool_th_restore $swp4 2
+
+ vlan_destroy $swp4 111
+ mtu_restore $swp4
+ ip link set dev $swp4 down
+
+ # $swp3
+ # -----
+
+ __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ tc qdisc del dev $swp3 root
+
+ devlink_tc_bind_pool_th_restore $swp3 1 egress
+ devlink_port_pool_th_restore $swp3 5
+
+ vlan_destroy $swp3 111
+ mtu_restore $swp3
+ ip link set dev $swp3 down
+
+ # $swp2
+ # -----
+
+ tc qdisc del dev $swp2 parent 1:7
+ tc qdisc del dev $swp2 root
+
+ devlink_tc_bind_pool_th_restore $swp2 1 egress
+ devlink_port_pool_th_restore $swp2 6
+
+ vlan_destroy $swp2 111
+ mtu_restore $swp2
+ ip link set dev $swp2 down
+
+ # $swp1
+ # -----
+
+ __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ tc qdisc del dev $swp1 root
+
+ devlink_tc_bind_pool_th_restore $swp1 1 ingress
+ devlink_port_pool_th_restore $swp1 1
+
+ vlan_destroy $swp1 111
+ mtu_restore $swp1
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ swp4=${NETIFS[p6]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.34
+}
+
+test_qos_pfc()
+{
+ RET=0
+
+ # 10M pool, each packet is 8K of payload + headers
+ local pkts=$((_10MB / 8050))
+ local size=$((pkts * 8050))
+ local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+ local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+ $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \
+ -a own -b $h2mac -c $pkts -t udp -q
+ sleep 2
+
+ local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+ local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+ local din=$((in1 - in0))
+ local dout=$((out1 - out0))
+
+ local pct_in=$((din * 100 / size))
+
+ ((pct_in > 95 && pct_in < 105))
+ check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%"
+
+ ((dout == din))
+ check_err $? "$((din - dout)) bytes out of $din ingressed got lost"
+
+ log_test "PFC"
+}
+
+trap cleanup EXIT
+
+bail_on_lldpad
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index 94c37124a840..af64bc9ea8ab 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -27,11 +27,17 @@ switch_create()
# amount of traffic that is admitted to the shared buffers. This makes
# sure that there is always enough traffic of all types to select from
# for the DWRR process.
+ devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 12
+ devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
+ devlink_port_pool_th_save $swp2 4
devlink_port_pool_th_set $swp2 4 12
+ devlink_tc_bind_pool_th_save $swp2 7 egress
devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
+ devlink_tc_bind_pool_th_save $swp2 6 egress
devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
+ devlink_tc_bind_pool_th_save $swp2 5 egress
devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
# Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 517297a14ecf..b0cb1aaffdda 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -208,6 +208,7 @@ switch_create()
ip link set dev br2_11 up
local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+ devlink_port_pool_th_save $swp3 8
devlink_port_pool_th_set $swp3 8 $size
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
index 4b96561c462f..3e3e06ea5703 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -24,6 +24,13 @@ tc_police_switch_destroy()
simple_if_fini $swp1
}
+tc_police_addr()
+{
+ local num=$1; shift
+
+ printf "2001:db8:1::%x" $num
+}
+
tc_police_rules_create()
{
local count=$1; shift
@@ -34,8 +41,9 @@ tc_police_rules_create()
for ((i = 0; i < count; ++i)); do
cat >> $TC_POLICE_BATCH_FILE <<-EOF
filter add dev $swp1 ingress \
- prot ip \
- flower skip_sw \
+ prot ipv6 \
+ pref 1000 \
+ flower skip_sw dst_ip $(tc_police_addr $i) \
action police rate 10mbit burst 100k \
conform-exceed drop/ok
EOF
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index de4b32fc4223..40909c254365 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -23,6 +23,27 @@ fw_flash_test()
devlink dev flash $DL_HANDLE file dummy
check_err $? "Failed to flash with status updates on"
+ devlink dev flash $DL_HANDLE file dummy component fw.mgmt
+ check_err $? "Failed to flash with component attribute"
+
+ devlink dev flash $DL_HANDLE file dummy overwrite settings
+ check_fail $? "Flash with overwrite settings should be rejected"
+
+ echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask
+ check_err $? "Failed to change allowed overwrite mask"
+
+ devlink dev flash $DL_HANDLE file dummy overwrite settings
+ check_err $? "Failed to flash with settings overwrite enabled"
+
+ devlink dev flash $DL_HANDLE file dummy overwrite identifiers
+ check_fail $? "Flash with overwrite settings should be identifiers"
+
+ echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask
+ check_err $? "Failed to change allowed overwrite mask"
+
+ devlink dev flash $DL_HANDLE file dummy overwrite identifiers overwrite settings
+ check_err $? "Failed to flash with settings and identifiers overwrite enabled"
+
echo "n"> $DEBUGFS_DIR/fw_update_status
check_err $? "Failed to disable status updates"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
new file mode 100755
index 000000000000..25c896b9e2eb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0
+NSIM_NETDEV=
+num_passes=0
+num_errors=0
+
+function cleanup_nsim {
+ if [ -e $NSIM_DEV_SYS ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/del_device
+ fi
+}
+
+function cleanup {
+ cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function get_netdev_name {
+ local -n old=$1
+
+ new=$(ls /sys/class/net)
+
+ for netdev in $new; do
+ for check in $old; do
+ [ $netdev == $check ] && break
+ done
+
+ if [ $netdev != $check ]; then
+ echo $netdev
+ break
+ fi
+ done
+}
+
+function check {
+ local code=$1
+ local str=$2
+ local exp_str=$3
+
+ if [ $code -ne 0 ]; then
+ ((num_errors++))
+ return
+ fi
+
+ if [ "$str" != "$exp_str" ]; then
+ echo -e "Expected: '$exp_str', got '$str'"
+ ((num_errors++))
+ return
+ fi
+
+ ((num_passes++))
+}
+
+# Bail if ethtool is too old
+if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then
+ echo "SKIP: No --include-statistics support in ethtool"
+ exit 4
+fi
+
+# Make a netdevsim
+old_netdevs=$(ls /sys/class/net)
+
+modprobe netdevsim
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+set -o pipefail
+
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "null"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "{}"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "1"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "2"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.rx_pause_frames')
+check $? "$s" "1"
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index ba1d53b9f815..1b08e042cf94 100644..100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -7,6 +7,7 @@ NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID
NSIM_NETDEV=
HAS_ETHTOOL=
+STATIC_ENTRIES=
EXIT_STATUS=0
num_cases=0
num_errors=0
@@ -193,6 +194,21 @@ function check_tables {
sleep 0.02
((retries--))
done
+
+ if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then
+ fail=0
+ for i in "${!STATIC_ENTRIES[@]}"; do
+ pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}`
+ cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected")
+ if [ $cnt -ne 1 ]; then
+ err_cnt "ethtool static entry: $pfx - $msg"
+ echo " check_table: ethtool does not contain '$pp_expected'"
+ ethtool --show-tunnels $NSIM_NETDEV
+ fail=1
+ fi
+ done
+ [ $fail == 0 ] && pass_cnt
+ fi
}
function print_table {
@@ -775,6 +791,157 @@ for port in 0 1; do
exp1=( 0 0 0 0 )
done
+cleanup_nsim
+
+# shared port tables
+pfx="table sharing"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
+echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+echo 1 > $NSIM_DEV_DFS/udp_ports_shared
+
+old_netdevs=$(ls /sys/class/net)
+echo 1 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+old_netdevs=$(ls /sys/class/net)
+echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV2=`get_netdev_name old_netdevs`
+
+msg="VxLAN v4 devices"
+exp0=( `mke 4789 1` 0 0 0 )
+exp1=( 0 0 0 0 )
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV2
+
+msg="VxLAN v4 devices go down"
+exp0=( 0 0 0 0 )
+ifconfig vxlan1 down
+ifconfig vxlan0 down
+check_tables
+
+for ifc in vxlan0 vxlan1; do
+ ifconfig $ifc up
+done
+
+msg="VxLAN v6 device"
+exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+msg="Geneve device"
+exp1=( `mke 6081 2` 0 0 0 )
+new_geneve gnv0 6081
+
+msg="NIC device goes down"
+ifconfig $NSIM_NETDEV down
+check_tables
+
+msg="NIC device goes up again"
+ifconfig $NSIM_NETDEV up
+check_tables
+
+for i in `seq 2`; do
+ msg="turn feature off - 1, rep $i"
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn feature off - 2, rep $i"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn feature on - 1, rep $i"
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ exp1=( `mke 6081 2` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="turn feature on - 2, rep $i"
+ ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on
+ check_tables
+done
+
+msg="tunnels destroyed 1"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+overflow_table0 "overflow NIC table"
+
+msg="re-add a port"
+
+echo 2 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/new_port
+check_tables
+
+msg="replace VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+del_dev vxlan1
+
+msg="vacate VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+del_dev vxlan2
+
+echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+check_tables
+
+msg="tunnels destroyed 2"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+echo 1 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/del_port
+
+cleanup_nsim
+
+# Static IANA port
+pfx="static IANA vxlan"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan
+STATIC_ENTRIES=( `mke 4789 1` )
+
+port=1
+old_netdevs=$(ls /sys/class/net)
+echo $port > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="check empty"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="add on static port"
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+msg="add on different port"
+exp0=( `mke 4790 1` 0 0 0 )
+new_vxlan vxlan2 4790 $NSIM_NETDEV
+
+cleanup_tuns
+
+msg="tunnels destroyed"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="different type"
+new_geneve gnv0 4789
+
+cleanup_tuns
+cleanup_nsim
+
+# END
+
modprobe -r netdevsim
if [ $num_errors -eq 0 ]; then
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
new file mode 100755
index 000000000000..beee0d5646a6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -0,0 +1,316 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2020 NXP Semiconductors
+
+WAIT_TIME=1
+NUM_NETIFS=4
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command tcpdump
+
+#
+# +---------------------------------------------+
+# | DUT ports Generator ports |
+# | +--------+ +--------+ +--------+ +--------+ |
+# | | | | | | | | | |
+# | | eth0 | | eth1 | | eth2 | | eth3 | |
+# | | | | | | | | | |
+# +-+--------+-+--------+-+--------+-+--------+-+
+# | | | |
+# | | | |
+# | +-----------+ |
+# | |
+# +--------------------------------+
+
+eth0=${NETIFS[p1]}
+eth1=${NETIFS[p2]}
+eth2=${NETIFS[p3]}
+eth3=${NETIFS[p4]}
+
+eth0_mac="de:ad:be:ef:00:00"
+eth1_mac="de:ad:be:ef:00:01"
+eth2_mac="de:ad:be:ef:00:02"
+eth3_mac="de:ad:be:ef:00:03"
+
+# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number
+# used by the kernel driver. The numbers are:
+# VCAP IS1 lookup 0: 10000
+# VCAP IS1 lookup 1: 11000
+# VCAP IS1 lookup 2: 12000
+# VCAP IS2 lookup 0 policy 0: 20000
+# VCAP IS2 lookup 0 policy 1: 20001
+# VCAP IS2 lookup 0 policy 255: 20255
+# VCAP IS2 lookup 1 policy 0: 21000
+# VCAP IS2 lookup 1 policy 1: 21001
+# VCAP IS2 lookup 1 policy 255: 21255
+IS1()
+{
+ local lookup=$1
+
+ echo $((10000 + 1000 * lookup))
+}
+
+IS2()
+{
+ local lookup=$1
+ local pag=$2
+
+ echo $((20000 + 1000 * lookup + pag))
+}
+
+ES0()
+{
+ echo 0
+}
+
+# The Ocelot switches have a fixed ingress pipeline composed of:
+#
+# +----------------------------------------------+ +-----------------------------------------+
+# | VCAP IS1 | | VCAP IS2 |
+# | | | |
+# | +----------+ +----------+ +----------+ | | +----------+ +----------+ |
+# | | Lookup 0 | | Lookup 1 | | Lookup 2 | | --+------> PAG 0: | Lookup 0 | -> | Lookup 1 | |
+# | +----------+ -> +----------+ -> +----------+ | | | +----------+ +----------+ |
+# | |key&action| |key&action| |key&action| | | | |key&action| |key&action| |
+# | |key&action| |key&action| |key&action| | | | | .. | | .. | |
+# | | .. | | .. | | .. | | | | +----------+ +----------+ |
+# | +----------+ +----------+ +----------+ | | | |
+# | selects PAG | | | +----------+ +----------+ |
+# +----------------------------------------------+ +------> PAG 1: | Lookup 0 | -> | Lookup 1 | |
+# | | +----------+ +----------+ |
+# | | |key&action| |key&action| |
+# | | | .. | | .. | |
+# | | +----------+ +----------+ |
+# | | ... |
+# | | |
+# | | +----------+ +----------+ |
+# +----> PAG 254: | Lookup 0 | -> | Lookup 1 | |
+# | | +----------+ +----------+ |
+# | | |key&action| |key&action| |
+# | | | .. | | .. | |
+# | | +----------+ +----------+ |
+# | | |
+# | | +----------+ +----------+ |
+# +----> PAG 255: | Lookup 0 | -> | Lookup 1 | |
+# | +----------+ +----------+ |
+# | |key&action| |key&action| |
+# | | .. | | .. | |
+# | +----------+ +----------+ |
+# +-----------------------------------------+
+#
+# Both the VCAP IS1 (Ingress Stage 1) and IS2 (Ingress Stage 2) are indexed
+# (looked up) multiple times: IS1 3 times, and IS2 2 times. Each filter
+# (key and action pair) can be configured to only match during the first, or
+# second, etc, lookup.
+#
+# During one TCAM lookup, the filter processing stops at the first entry that
+# matches, then the pipeline jumps to the next lookup.
+# The driver maps each individual lookup of each individual ingress TCAM to a
+# separate chain number. For correct rule offloading, it is mandatory that each
+# filter installed in one TCAM is terminated by a non-optional GOTO action to
+# the next lookup from the fixed pipeline.
+#
+# A chain can only be used if there is a GOTO action correctly set up from the
+# prior lookup in the processing pipeline. Setting up all chains is not
+# mandatory.
+
+# NOTE: VCAP IS1 currently uses only S1_NORMAL half keys and VCAP IS2
+# dynamically chooses between MAC_ETYPE, ARP, IP4_TCP_UDP, IP4_OTHER, which are
+# all half keys as well.
+
+create_tcam_skeleton()
+{
+ local eth=$1
+
+ tc qdisc add dev $eth clsact
+
+ # VCAP IS1 is the Ingress Classification TCAM and can offload the
+ # following actions:
+ # - skbedit priority
+ # - vlan pop
+ # - vlan modify
+ # - goto (only in lookup 2, the last IS1 lookup)
+ tc filter add dev $eth ingress chain 0 pref 49152 flower \
+ skip_sw action goto chain $(IS1 0)
+ tc filter add dev $eth ingress chain $(IS1 0) pref 49152 \
+ flower skip_sw action goto chain $(IS1 1)
+ tc filter add dev $eth ingress chain $(IS1 1) pref 49152 \
+ flower skip_sw action goto chain $(IS1 2)
+ tc filter add dev $eth ingress chain $(IS1 2) pref 49152 \
+ flower skip_sw action goto chain $(IS2 0 0)
+
+ # VCAP IS2 is the Security Enforcement ingress TCAM and can offload the
+ # following actions:
+ # - trap
+ # - drop
+ # - police
+ # The two VCAP IS2 lookups can be segmented into up to 256 groups of
+ # rules, called Policies. A Policy is selected through the Policy
+ # Association Group (PAG) action of VCAP IS1 (which is the
+ # GOTO offload).
+ tc filter add dev $eth ingress chain $(IS2 0 0) pref 49152 \
+ flower skip_sw action goto chain $(IS2 1 0)
+}
+
+setup_prepare()
+{
+ create_tcam_skeleton $eth0
+
+ ip link add br0 type bridge
+ ip link set $eth0 master br0
+ ip link set $eth1 master br0
+ ip link set br0 up
+
+ ip link add link $eth3 name $eth3.100 type vlan id 100
+ ip link set $eth3.100 up
+
+ ip link add link $eth3 name $eth3.200 type vlan id 200
+ ip link set $eth3.200 up
+
+ tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \
+ protocol 802.1Q flower skip_sw vlan_id 100 \
+ action vlan pop \
+ action goto chain $(IS1 2)
+
+ tc filter add dev $eth0 egress chain $(ES0) pref 1 \
+ flower skip_sw indev $eth1 \
+ action vlan push protocol 802.1Q id 100
+
+ tc filter add dev $eth0 ingress chain $(IS1 0) pref 2 \
+ protocol ipv4 flower skip_sw src_ip 10.1.1.2 \
+ action skbedit priority 7 \
+ action goto chain $(IS1 1)
+
+ tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
+ protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
+ action police rate 50mbit burst 64k \
+ action goto chain $(IS2 1 0)
+}
+
+cleanup()
+{
+ ip link del $eth3.200
+ ip link del $eth3.100
+ tc qdisc del dev $eth0 clsact
+ ip link del br0
+}
+
+test_vlan_pop()
+{
+ printf "Testing VLAN pop.. "
+
+ tcpdump_start $eth2
+
+ # Work around Mausezahn VLAN builder bug
+ # (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using
+ # an 8021q upper
+ $MZ $eth3.100 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop
+
+ if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
+ echo "OK"
+ else
+ echo "FAIL"
+ fi
+
+ tcpdump_cleanup
+}
+
+test_vlan_push()
+{
+ printf "Testing VLAN push.. "
+
+ tcpdump_start $eth3.100
+
+ $MZ $eth2 -q -c 1 -p 64 -a $eth2_mac -b $eth3_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop
+
+ if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then
+ echo "OK"
+ else
+ echo "FAIL"
+ fi
+
+ tcpdump_cleanup
+}
+
+test_vlan_modify()
+{
+ printf "Testing VLAN modification.. "
+
+ ip link set br0 type bridge vlan_filtering 1
+ bridge vlan add dev $eth0 vid 200
+ bridge vlan add dev $eth0 vid 300
+ bridge vlan add dev $eth1 vid 300
+
+ tc filter add dev $eth0 ingress chain $(IS1 2) pref 3 \
+ protocol 802.1Q flower skip_sw vlan_id 200 \
+ action vlan modify id 300 \
+ action goto chain $(IS2 0 0)
+
+ tcpdump_start $eth2
+
+ $MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop
+
+ if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
+ echo "OK"
+ else
+ echo "FAIL"
+ fi
+
+ tcpdump_cleanup
+
+ tc filter del dev $eth0 ingress chain $(IS1 2) pref 3
+
+ bridge vlan del dev $eth0 vid 200
+ bridge vlan del dev $eth0 vid 300
+ bridge vlan del dev $eth1 vid 300
+ ip link set br0 type bridge vlan_filtering 0
+}
+
+test_skbedit_priority()
+{
+ local num_pkts=100
+
+ printf "Testing frame prioritization.. "
+
+ before=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+
+ $MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2
+
+ after=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+
+ if [ $((after - before)) = $num_pkts ]; then
+ echo "OK"
+ else
+ echo "FAIL"
+ fi
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_vlan_pop
+ test_vlan_push
+ test_vlan_modify
+ test_skbedit_priority
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index 344a99c6da1b..9e2f00343f15 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -7,6 +7,7 @@ execveat.moved
execveat.path.ephemeral
execveat.ephemeral
execveat.denatured
+/load_address_*
/recursion-depth
xxxxxxxx*
pipe
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 0a13b110c1e6..cf69b2fcce59 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -4,7 +4,7 @@ CFLAGS += -Wno-nonnull
CFLAGS += -D_GNU_SOURCE
TEST_PROGS := binfmt_script non-regular
-TEST_GEN_PROGS := execveat
+TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216
TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
@@ -27,4 +27,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
cp $< $@
chmod -x $@
-
+$(OUTPUT)/load_address_4096: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@
+$(OUTPUT)/load_address_2097152: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@
+$(OUTPUT)/load_address_16777216: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
new file mode 100644
index 000000000000..d487c2f6a615
--- /dev/null
+++ b/tools/testing/selftests/exec/load_address.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct Statistics {
+ unsigned long long load_address;
+ unsigned long long alignment;
+};
+
+int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
+{
+ struct Statistics *stats = (struct Statistics *) data;
+ int i;
+
+ if (info->dlpi_name != NULL && info->dlpi_name[0] != '\0') {
+ // Ignore headers from other than the executable.
+ return 2;
+ }
+
+ stats->load_address = (unsigned long long) info->dlpi_addr;
+ stats->alignment = 0;
+
+ for (i = 0; i < info->dlpi_phnum; i++) {
+ if (info->dlpi_phdr[i].p_type != PT_LOAD)
+ continue;
+
+ if (info->dlpi_phdr[i].p_align > stats->alignment)
+ stats->alignment = info->dlpi_phdr[i].p_align;
+ }
+
+ return 1; // Terminate dl_iterate_phdr.
+}
+
+int main(int argc, char **argv)
+{
+ struct Statistics extracted;
+ unsigned long long misalign;
+ int ret;
+
+ ret = dl_iterate_phdr(ExtractStatistics, &extracted);
+ if (ret != 1) {
+ fprintf(stderr, "FAILED\n");
+ return 1;
+ }
+
+ if (extracted.alignment == 0) {
+ fprintf(stderr, "No alignment found\n");
+ return 1;
+ } else if (extracted.alignment & (extracted.alignment - 1)) {
+ fprintf(stderr, "Alignment is not a power of 2\n");
+ return 1;
+ }
+
+ misalign = extracted.load_address & (extracted.alignment - 1);
+ if (misalign) {
+ printf("alignment = %llu, load_address = %llu\n",
+ extracted.alignment, extracted.load_address);
+ fprintf(stderr, "FAILED\n");
+ return 1;
+ }
+
+ fprintf(stderr, "PASS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/firmware/.gitignore b/tools/testing/selftests/firmware/.gitignore
new file mode 100644
index 000000000000..62abc92a94c4
--- /dev/null
+++ b/tools/testing/selftests/firmware/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+fw_namespace
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index fcc281373b4d..c2a2a100114b 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -149,6 +149,26 @@ config_unset_into_buf()
echo 0 > $DIR/config_into_buf
}
+config_set_buf_size()
+{
+ echo $1 > $DIR/config_buf_size
+}
+
+config_set_file_offset()
+{
+ echo $1 > $DIR/config_file_offset
+}
+
+config_set_partial()
+{
+ echo 1 > $DIR/config_partial
+}
+
+config_unset_partial()
+{
+ echo 0 > $DIR/config_partial
+}
+
config_set_sync_direct()
{
echo 1 > $DIR/config_sync_direct
@@ -207,6 +227,35 @@ read_firmwares()
done
}
+read_partial_firmwares()
+{
+ if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+ fwfile="${FW_INTO_BUF}"
+ else
+ fwfile="${FW}"
+ fi
+
+ if [ "$1" = "xzonly" ]; then
+ fwfile="${fwfile}-orig"
+ fi
+
+ # Strip fwfile down to match partial offset and length
+ partial_data="$(cat $fwfile)"
+ partial_data="${partial_data:$2:$3}"
+
+ for i in $(seq 0 3); do
+ config_set_read_fw_idx $i
+
+ read_firmware="$(cat $DIR/read_firmware)"
+
+ # Verify the contents are what we expect.
+ if [ $read_firmware != $partial_data ]; then
+ echo "request #$i: partial firmware was not loaded" >&2
+ exit 1
+ fi
+ done
+}
+
read_firmwares_expect_nofile()
{
for i in $(seq 0 3); do
@@ -242,6 +291,21 @@ test_batched_request_firmware_into_buf_nofile()
echo "OK"
}
+test_request_partial_firmware_into_buf_nofile()
+{
+ echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_set_into_buf
+ config_set_partial
+ config_set_buf_size $2
+ config_set_file_offset $1
+ config_trigger_sync
+ read_firmwares_expect_nofile
+ release_all_firmware
+ echo "OK"
+}
+
test_batched_request_firmware_direct_nofile()
{
echo -n "Batched request_firmware_direct() nofile try #$1: "
@@ -356,6 +420,21 @@ test_request_firmware_nowait_custom()
echo "OK"
}
+test_request_partial_firmware_into_buf()
+{
+ echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: "
+ config_reset
+ config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+ config_set_into_buf
+ config_set_partial
+ config_set_buf_size $2
+ config_set_file_offset $1
+ config_trigger_sync
+ read_partial_firmwares normal $1 $2
+ release_all_firmware
+ echo "OK"
+}
+
# Only continue if batched request triggers are present on the
# test-firmware driver
test_config_present
@@ -383,6 +462,12 @@ for i in $(seq 1 5); do
test_request_firmware_nowait_custom $i normal
done
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf 0 10
+test_request_partial_firmware_into_buf 0 5
+test_request_partial_firmware_into_buf 1 6
+test_request_partial_firmware_into_buf 2 10
+
# Test for file not found, errors are expected, the failure would be
# a hung task, which would require a hard reset.
echo
@@ -407,6 +492,12 @@ for i in $(seq 1 5); do
test_request_firmware_nowait_custom_nofile $i
done
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf_nofile 0 10
+test_request_partial_firmware_into_buf_nofile 0 5
+test_request_partial_firmware_into_buf_nofile 1 6
+test_request_partial_firmware_into_buf_nofile 2 10
+
test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
# test with both files present
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
index 68550f97d3c3..3bcd4c3624ee 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=kernel_clone
echo "p:myevent1 $PLACE" >> dynamic_events
echo "r:myevent2 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
index c969be9eb7de..438961971b7e 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=kernel_clone
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
index 16d543eaac88..a8603bd23e0d 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=kernel_clone
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 0f41e441c203..98305d76bd04 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -4,9 +4,9 @@
# requires: set_ftrace_filter
# flags: instance
-echo _do_fork:stacktrace >> set_ftrace_filter
+echo kernel_clone:stacktrace >> set_ftrace_filter
-grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
+grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter
(echo "forked"; sleep 1)
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index eba858c21815..9737cd0578a7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -3,7 +3,7 @@
# description: Kprobe dynamic event - adding and removing
# requires: kprobe_events
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
grep myevent kprobe_events
test -d events/kprobes/myevent
echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index d10bf4f05bc8..f9a40af76888 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -3,7 +3,7 @@
# description: Kprobe dynamic event - busy event check
# requires: kprobe_events
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
test -d events/kprobes/myevent
echo 1 > events/kprobes/myevent/enable
echo > kprobe_events && exit_fail # this must fail
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index 61f2ac441aec..eb543d3cfe5f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -3,13 +3,13 @@
# description: Kprobe dynamic event with arguments
# requires: kprobe_events
-echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
+echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events
grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
test -d events/kprobes/testprobe
echo 1 > events/kprobes/testprobe/enable
( echo "forked")
-grep testprobe trace | grep '_do_fork' | \
+grep testprobe trace | grep 'kernel_clone' | \
grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
echo 0 > events/kprobes/testprobe/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
index 05aaeed6987f..4e5b63be51c9 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -5,7 +5,7 @@
grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
-echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events
+echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events
grep testprobe kprobe_events | grep -q 'comm=$comm'
test -d events/kprobes/testprobe
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index b5fa05443b39..a1d70588ab21 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -30,13 +30,13 @@ esac
: "Test get argument (1)"
echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\"" trace
echo 0 > events/kprobes/testprobe/enable
: "Test get argument (2)"
echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
index b8c75a3d003c..bd25dd0ba0d0 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then
fi
: "Test get basic types symbol argument"
-echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
-echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
if grep -q "x8/16/32/64" README; then
- echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+ echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
fi
-echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
@@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
grep "testprobe_bf:.* arg1=.*" trace
: "Test get string symbol argument"
-echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events
+echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 0610e0b5587c..91fcce1c241c 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events "x8/16/32/64":README
gen_event() { # Bitsize
- echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+ echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
}
check_types() { # s-type u-type x-type bf-type width
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index 81d8b58c03bc..0d179094191f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -5,29 +5,29 @@
# prepare
echo nop > current_tracer
-echo _do_fork > set_ftrace_filter
-echo 'p:testprobe _do_fork' > kprobe_events
+echo kernel_clone > set_ftrace_filter
+echo 'p:testprobe kernel_clone' > kprobe_events
# kprobe on / ftrace off
echo 1 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
# kprobe on / ftrace on
echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
# kprobe off / ftrace on
echo 0 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
! grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
# kprobe on / ftrace on
echo 1 > events/kprobes/testprobe/enable
@@ -35,11 +35,11 @@ echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
# kprobe on / ftrace off
echo nop > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
index 366b7e1b6718..45d90b6c763d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events "Create/append/":README
# Choose 2 symbols for target
-SYM1=_do_fork
+SYM1=kernel_clone
SYM2=do_exit
EVENT_NAME=kprobes/testevent
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index b4d834675e59..1b5550ef8a9b 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -86,15 +86,21 @@ esac
# multiprobe errors
if grep -q "Create/append/" README && grep -q "imm-value" README; then
-echo 'p:kprobes/testevent _do_fork' > kprobe_events
+echo 'p:kprobes/testevent kernel_clone' > kprobe_events
check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE
# Explicitly use printf "%s" to not interpret \1
-printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
-check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE
-check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE
+printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events
+check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE
+check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE
+fi
+
+# %return suffix errors
+if grep -q "place (kretprobe): .*%return.*" README; then
+check_error 'p vfs_read^%hoge' # BAD_ADDR_SUFFIX
+check_error 'p ^vfs_read+10%return' # BAD_RETPROBE
fi
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index 523fde6d1aa5..7ae492c204a4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -4,14 +4,14 @@
# requires: kprobe_events
# Add new kretprobe event
-echo 'r:testprobe2 _do_fork $retval' > kprobe_events
+echo 'r:testprobe2 kernel_clone $retval' > kprobe_events
grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
test -d events/kprobes/testprobe2
echo 1 > events/kprobes/testprobe2/enable
( echo "forked")
-cat trace | grep testprobe2 | grep -q '<- _do_fork'
+cat trace | grep testprobe2 | grep -q '<- kernel_clone'
echo 0 > events/kprobes/testprobe2/enable
echo '-:testprobe2' >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
new file mode 100644
index 000000000000..f07bd15cc033
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe %%return suffix test
+# requires: kprobe_events '<symbol>[+<offset>]%return':README
+
+# Test for kretprobe by "r"
+echo 'r:myprobeaccept vfs_read' > kprobe_events
+RESULT1=`cat kprobe_events`
+
+# Test for kretprobe by "%return"
+echo 'p:myprobeaccept vfs_read%return' > kprobe_events
+RESULT2=`cat kprobe_events`
+
+if [ "$RESULT1" != "$RESULT2" ]; then
+ echo "Error: %return suffix didn't make a return probe."
+ echo "r-command: $RESULT1"
+ echo "%return: $RESULT2"
+ exit_fail
+fi
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index ff6c44adc8a0..c4093fc1a773 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events
! grep -q 'myevent' kprobe_profile
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
echo 1 > events/kprobes/myevent/enable
( echo "forked" )
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
index 7b5b60c3c5a2..f5e3f9e4a01f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
@@ -17,4 +17,10 @@ check_error 'p /bin/sh:10(10)^a' # BAD_REFCNT_SUFFIX
check_error 'p /bin/sh:10 ^@+ab' # BAD_FILE_OFFS
check_error 'p /bin/sh:10 ^@symbol' # SYM_ON_UPROBE
+# %return suffix error
+if grep -q "place (uprobe): .*%return.*" README; then
+check_error 'p /bin/sh:10^%hoge' # BAD_ADDR_SUFFIX
+check_error 'p /bin/sh:10(10)^%return' # BAD_REFCNT_SUFFIX
+fi
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
index 7449a4b8f1f9..9098f1e7433f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -25,12 +25,12 @@ echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
-echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
-echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
-echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+echo 'waking_plus_wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking_plus_wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking_plus_wakeup_latency/trigger
ping $LOCALHOST -c 3
-if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+if ! grep -q "pid:" events/synthetic/waking_plus_wakeup_latency/hist; then
fail "Failed to create combined histogram"
fi
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
new file mode 100644
index 000000000000..3d65c856eca3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+echo "Test create synthetic event"
+
+echo 'ping_test_latency u64 lat; char filename[]' > synthetic_events
+if [ ! -d events/synthetic/ping_test_latency ]; then
+ fail "Failed to create ping_test_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event using trace action and dynamic strings"
+echo "Test histogram dynamic string variables,simple expression support and trace action"
+
+echo 'hist:key=pid:filenamevar=filename:ts0=common_timestamp.usecs' > events/sched/sched_process_exec/trigger
+echo 'hist:key=pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_process_exec).ping_test_latency($lat,$filenamevar) if comm == "ping"' > events/sched/sched_process_exit/trigger
+echo 'hist:keys=filename,lat:sort=filename,lat' > events/synthetic/ping_test_latency/trigger
+
+ping $LOCALHOST -c 5
+
+if ! grep -q "ping" events/synthetic/ping_test_latency/hist; then
+ fail "Failed to create dynamic string trace action inter-event histogram"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
new file mode 100644
index 000000000000..ada594fe16cb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser errors
+# requires: synthetic_events error_log
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events'
+}
+
+check_error 'myevent ^chr arg' # INVALID_TYPE
+check_error 'myevent ^char str[];; int v' # INVALID_TYPE
+check_error 'myevent char ^str]; int v' # INVALID_NAME
+check_error 'myevent char ^str;[]' # INVALID_NAME
+check_error 'myevent ^char str[; int v' # INVALID_TYPE
+check_error '^mye;vent char str[]' # BAD_NAME
+check_error 'myevent char str[]; ^int' # INVALID_FIELD
+check_error '^myevent' # INCOMPLETE_CMD
+
+exit 0
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 4f78e4805633..f19804df244c 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -971,6 +971,11 @@ void __run_test(struct __fixture_metadata *f,
ksft_print_msg(" RUN %s%s%s.%s ...\n",
f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+ /* Make sure output buffers are flushed before fork */
+ fflush(stdout);
+ fflush(stderr);
+
t->pid = fork();
if (t->pid < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 7a17ea815736..30848ca36555 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -47,9 +47,9 @@ ARCH ?= $(SUBARCH)
khdr:
ifndef KSFT_KHDR_INSTALL_DONE
ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
- make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
+ $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
else
- make --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \
+ $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \
ARCH=$(ARCH) -C $(top_srcdir) headers_install
endif
endif
@@ -107,9 +107,8 @@ endif
emit_tests:
for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
BASENAME_TEST=`basename $$TEST`; \
- echo " \\"; \
- echo -n " \"$$BASENAME_TEST\""; \
- done; \
+ echo "$(COLLECTION):$$BASENAME_TEST"; \
+ done
# define if isn't already. It is undefined in make O= case.
ifeq ($(RM),)
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 1aba83c87ad3..846c7ed71556 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -278,7 +278,7 @@ function check_result {
# help differentiate repeated testing runs. Remove them with a
# post-comparison sed filter.
- result=$(dmesg | comm -13 "$SAVED_DMESG" - | \
+ result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \
grep -e 'livepatch:' -e 'test_klp' | \
grep -v '\(tainting\|taints\) kernel' | \
sed 's/^\[[ 0-9.]*\] //')
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index 8383eb89d88a..bb7a1775307b 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -82,7 +82,7 @@ dmesg > "$DMESG"
($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
# Record and dump the results
-dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true
+dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true
cat "$LOG"
# Check for expected output
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 742c499328b2..61ae899cfc17 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+ipsec
msg_zerocopy
socket
psock_fanout
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 9491bbaa0831..ef352477cac6 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -19,6 +19,8 @@ TEST_PROGS += txtimestamp.sh
TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS += rxtimestamp.sh
TEST_PROGS += devlink_port_split.py
+TEST_PROGS += drop_monitor_tests.sh
+TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -29,6 +31,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
+TEST_GEN_FILES += ipsec
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3b42c06b5985..43649242adc0 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -24,10 +24,12 @@ CONFIG_IP_NF_NAT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NFT_NAT=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_NETEM=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_KALLSYMS=y
+CONFIG_TRACEPOINTS=y
+CONFIG_NET_DROP_MONITOR=m
+CONFIG_NETDEVSIM=m
diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh
new file mode 100755
index 000000000000..b7650e30d18b
--- /dev/null
+++ b/tools/testing/selftests/net/drop_monitor_tests.sh
@@ -0,0 +1,215 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking drop monitor functionality.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="
+ sw_drops
+ hw_drops
+"
+
+IP="ip -netns ns1"
+TC="tc -netns ns1"
+DEVLINK="devlink -N ns1"
+NS_EXEC="ip netns exec ns1"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ fi
+}
+
+setup()
+{
+ modprobe netdevsim &> /dev/null
+
+ set -e
+ ip netns add ns1
+ $IP link add dummy10 up type dummy
+
+ $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ udevadm settle
+ local netdev=$($NS_EXEC ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
+ $IP link set dev $netdev up
+
+ set +e
+}
+
+cleanup()
+{
+ $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ ip netns del ns1
+}
+
+sw_drops_test()
+{
+ echo
+ echo "Software drops test"
+
+ setup
+
+ local dir=$(mktemp -d)
+
+ $TC qdisc add dev dummy10 clsact
+ $TC filter add dev dummy10 egress pref 1 handle 101 proto ip \
+ flower dst_ip 192.0.2.10 action drop
+
+ $NS_EXEC mausezahn dummy10 -a 00:11:22:33:44:55 -b 00:aa:bb:cc:dd:ee \
+ -A 192.0.2.1 -B 192.0.2.10 -t udp sp=12345,dp=54321 -c 0 -q \
+ -d 100msec &
+ timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) != 0))
+ log_test $? 0 "Capturing active software drops"
+
+ rm ${dir}/packets.pcap
+
+ { kill %% && wait %%; } 2>/dev/null
+ timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0))
+ log_test $? 0 "Capturing inactive software drops"
+
+ rm -r $dir
+
+ cleanup
+}
+
+hw_drops_test()
+{
+ echo
+ echo "Hardware drops test"
+
+ setup
+
+ local dir=$(mktemp -d)
+
+ $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action trap
+ timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+ | wc -l) != 0))
+ log_test $? 0 "Capturing active hardware drops"
+
+ rm ${dir}/packets.pcap
+
+ $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action drop
+ timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+ | wc -l) == 0))
+ log_test $? 0 "Capturing inactive hardware drops"
+
+ rm -r $dir
+
+ cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v devlink)" ]; then
+ echo "SKIP: Could not run test without devlink tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tshark)" ]; then
+ echo "SKIP: Could not run test without tshark tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v dwdump)" ]; then
+ echo "SKIP: Could not run test without dwdump tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v udevadm)" ]; then
+ echo "SKIP: Could not run test without udevadm tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v timeout)" ]; then
+ echo "SKIP: Could not run test without timeout tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+tshark -G fields 2> /dev/null | grep -q net_dm
+if [ $? -ne 0 ]; then
+ echo "SKIP: tshark too old, missing net_dm dissector"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ sw_drops|sw) sw_drops_test;;
+ hw_drops|hw) hw_drops_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 22dc2f3d428b..eb693a3b7b4a 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -411,9 +411,16 @@ ipv6_fdb_grp_fcnal()
run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103"
log_test $? 2 "Route add with fdb nexthop group"
+ run_cmd "$IP nexthop del id 61"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 0 "Fdb entry after deleting a single nexthop"
+
run_cmd "$IP nexthop del id 102"
log_test $? 0 "Fdb nexthop delete"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 254 "Fdb entry after deleting a nexthop group"
+
$IP link del dev vx10
}
@@ -484,9 +491,16 @@ ipv4_fdb_grp_fcnal()
run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
log_test $? 2 "Route add with fdb nexthop group"
+ run_cmd "$IP nexthop del id 12"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 0 "Fdb entry after deleting a single nexthop"
+
run_cmd "$IP nexthop del id 102"
log_test $? 0 "Fdb nexthop delete"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 254 "Fdb entry after deleting a nexthop group"
+
$IP link del dev vx10
}
@@ -739,6 +753,36 @@ ipv6_fcnal_runtime()
run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop"
+ run_cmd "$IP nexthop add id 86 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 87"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after removing v4 gateways"
+
+ run_cmd "$IP ro delete 2001:db8:101::1/128"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop replace id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 88 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 87 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after replacing v4 gateways"
+
$IP nexthop flush >/dev/null 2>&1
#
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 75fe24bcb9cd..9c12c4fd3afc 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -5,7 +5,7 @@
# Defines
if [[ ! -v DEVLINK_DEV ]]; then
- DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
+ DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \
| jq -r '.port | keys[]' | cut -d/ -f-2)
if [ -z "$DEVLINK_DEV" ]; then
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
@@ -117,6 +117,12 @@ devlink_reload()
declare -A DEVLINK_ORIG
+# Changing pool type from static to dynamic causes reinterpretation of threshold
+# values. They therefore need to be saved before pool type is changed, then the
+# pool type can be changed, and then the new values need to be set up. Therefore
+# instead of saving the current state implicitly in the _set call, provide
+# functions for all three primitives: save, set, and restore.
+
devlink_port_pool_threshold()
{
local port=$1; shift
@@ -126,14 +132,21 @@ devlink_port_pool_threshold()
| jq '.port_pool."'"$port"'"[].threshold'
}
-devlink_port_pool_th_set()
+devlink_port_pool_th_save()
{
local port=$1; shift
local pool=$1; shift
- local th=$1; shift
local key="port_pool($port,$pool).threshold"
DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool)
+}
+
+devlink_port_pool_th_set()
+{
+ local port=$1; shift
+ local pool=$1; shift
+ local th=$1; shift
+
devlink sb port pool set $port pool $pool th $th
}
@@ -142,8 +155,13 @@ devlink_port_pool_th_restore()
local port=$1; shift
local pool=$1; shift
local key="port_pool($port,$pool).threshold"
+ local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]}
+ if [[ -z $orig ]]; then
+ echo "WARNING: Mismatched devlink_port_pool_th_restore"
+ else
+ devlink sb port pool set $port pool $pool th $orig
+ fi
}
devlink_pool_size_thtype()
@@ -154,14 +172,20 @@ devlink_pool_size_thtype()
| jq -r '.pool[][] | (.size, .thtype)'
}
+devlink_pool_size_thtype_save()
+{
+ local pool=$1; shift
+ local key="pool($pool).size_thtype"
+
+ DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
+}
+
devlink_pool_size_thtype_set()
{
local pool=$1; shift
local thtype=$1; shift
local size=$1; shift
- local key="pool($pool).size_thtype"
- DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype
}
@@ -171,8 +195,12 @@ devlink_pool_size_thtype_restore()
local key="pool($pool).size_thtype"
local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb pool set "$DEVLINK_DEV" pool $pool \
- size ${orig[0]} thtype ${orig[1]}
+ if [[ -z ${orig[0]} ]]; then
+ echo "WARNING: Mismatched devlink_pool_size_thtype_restore"
+ else
+ devlink sb pool set "$DEVLINK_DEV" pool $pool \
+ size ${orig[0]} thtype ${orig[1]}
+ fi
}
devlink_tc_bind_pool_th()
@@ -185,6 +213,16 @@ devlink_tc_bind_pool_th()
| jq -r '.tc_bind[][] | (.pool, .threshold)'
}
+devlink_tc_bind_pool_th_save()
+{
+ local port=$1; shift
+ local tc=$1; shift
+ local dir=$1; shift
+ local key="tc_bind($port,$dir,$tc).pool_th"
+
+ DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
+}
+
devlink_tc_bind_pool_th_set()
{
local port=$1; shift
@@ -192,9 +230,7 @@ devlink_tc_bind_pool_th_set()
local dir=$1; shift
local pool=$1; shift
local th=$1; shift
- local key="tc_bind($port,$dir,$tc).pool_th"
- DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
devlink sb tc bind set $port tc $tc type $dir pool $pool th $th
}
@@ -206,8 +242,12 @@ devlink_tc_bind_pool_th_restore()
local key="tc_bind($port,$dir,$tc).pool_th"
local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb tc bind set $port tc $tc type $dir \
- pool ${orig[0]} th ${orig[1]}
+ if [[ -z ${orig[0]} ]]; then
+ echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore"
+ else
+ devlink sb tc bind set $port tc $tc type $dir \
+ pool ${orig[0]} th ${orig[1]}
+ fi
}
devlink_traps_num_get()
@@ -509,3 +549,9 @@ devlink_cpu_port_get()
echo "$DEVLINK_DEV/$cpu_dl_port_num"
}
+
+devlink_cell_size_get()
+{
+ devlink sb pool show "$DEVLINK_DEV" pool 0 -j \
+ | jq '.pool[][].cell_size'
+}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 977fc2b326a2..927f9ba49e08 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1227,3 +1227,46 @@ stop_traffic()
# Suppress noise from killing mausezahn.
{ kill %% && wait %%; } 2>/dev/null
}
+
+tcpdump_start()
+{
+ local if_name=$1; shift
+ local ns=$1; shift
+
+ capfile=$(mktemp)
+ capout=$(mktemp)
+
+ if [ -z $ns ]; then
+ ns_cmd=""
+ else
+ ns_cmd="ip netns exec ${ns}"
+ fi
+
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ $ns_cmd tcpdump -e -n -Q in -i $if_name \
+ -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+ cappid=$!
+
+ sleep 1
+}
+
+tcpdump_stop()
+{
+ $ns_cmd kill $cappid
+ sleep 1
+}
+
+tcpdump_cleanup()
+{
+ rm $capfile $capout
+}
+
+tcpdump_show()
+{
+ tcpdump -e -n -r $capfile 2>&1
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index c33bfd7ba214..13db1cb50e57 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -31,7 +31,7 @@ mirror_test()
local t0=$(tc_rule_stats_get $dev $pref)
$MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
- -c 10 -d 100ms -t icmp type=8
+ -c 10 -d 100msec -t icmp type=8
sleep 0.5
local t1=$(tc_rule_stats_get $dev $pref)
local delta=$((t1 - t0))
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
new file mode 100644
index 000000000000..17ced7d6ce25
--- /dev/null
+++ b/tools/testing/selftests/net/ipsec.c
@@ -0,0 +1,2195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ipsec.c - Check xfrm on veth inside a net-ns.
+ * Copyright (c) 2018 Dmitry Safonov
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/limits.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <linux/veth.h>
+#include <linux/xfrm.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define printk(fmt, ...) \
+ ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
+
+#define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__)
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */
+#define MAX_PAYLOAD 2048
+#define XFRM_ALGO_KEY_BUF_SIZE 512
+#define MAX_PROCESSES (1 << 14) /* /16 mask divided by /30 subnets */
+#define INADDR_A ((in_addr_t) 0x0a000000) /* 10.0.0.0 */
+#define INADDR_B ((in_addr_t) 0xc0a80000) /* 192.168.0.0 */
+
+/* /30 mask for one veth connection */
+#define PREFIX_LEN 30
+#define child_ip(nr) (4*nr + 1)
+#define grchild_ip(nr) (4*nr + 2)
+
+#define VETH_FMT "ktst-%d"
+#define VETH_LEN 12
+
+static int nsfd_parent = -1;
+static int nsfd_childa = -1;
+static int nsfd_childb = -1;
+static long page_size;
+
+/*
+ * ksft_cnt is static in kselftest, so isn't shared with children.
+ * We have to send a test result back to parent and count there.
+ * results_fd is a pipe with test feedback from children.
+ */
+static int results_fd[2];
+
+const unsigned int ping_delay_nsec = 50 * 1000 * 1000;
+const unsigned int ping_timeout = 300;
+const unsigned int ping_count = 100;
+const unsigned int ping_success = 80;
+
+static void randomize_buffer(void *buf, size_t buflen)
+{
+ int *p = (int *)buf;
+ size_t words = buflen / sizeof(int);
+ size_t leftover = buflen % sizeof(int);
+
+ if (!buflen)
+ return;
+
+ while (words--)
+ *p++ = rand();
+
+ if (leftover) {
+ int tmp = rand();
+
+ memcpy(buf + buflen - leftover, &tmp, leftover);
+ }
+
+ return;
+}
+
+static int unshare_open(void)
+{
+ const char *netns_path = "/proc/self/ns/net";
+ int fd;
+
+ if (unshare(CLONE_NEWNET) != 0) {
+ pr_err("unshare()");
+ return -1;
+ }
+
+ fd = open(netns_path, O_RDONLY);
+ if (fd <= 0) {
+ pr_err("open(%s)", netns_path);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int switch_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWNET)) {
+ pr_err("setns()");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Running the test inside a new parent net namespace to bother less
+ * about cleanup on error-path.
+ */
+static int init_namespaces(void)
+{
+ nsfd_parent = unshare_open();
+ if (nsfd_parent <= 0)
+ return -1;
+
+ nsfd_childa = unshare_open();
+ if (nsfd_childa <= 0)
+ return -1;
+
+ if (switch_ns(nsfd_parent))
+ return -1;
+
+ nsfd_childb = unshare_open();
+ if (nsfd_childb <= 0)
+ return -1;
+
+ if (switch_ns(nsfd_parent))
+ return -1;
+ return 0;
+}
+
+static int netlink_sock(int *sock, uint32_t *seq_nr, int proto)
+{
+ if (*sock > 0) {
+ seq_nr++;
+ return 0;
+ }
+
+ *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
+ if (*sock <= 0) {
+ pr_err("socket(AF_NETLINK)");
+ return -1;
+ }
+
+ randomize_buffer(seq_nr, sizeof(*seq_nr));
+
+ return 0;
+}
+
+static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh)
+{
+ return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len));
+}
+
+static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */
+ struct rtattr *attr = rtattr_hdr(nh);
+ size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size);
+
+ if (req_sz < nl_size) {
+ printk("req buf is too small: %zu < %zu", req_sz, nl_size);
+ return -1;
+ }
+ nh->nlmsg_len = nl_size;
+
+ attr->rta_len = RTA_LENGTH(size);
+ attr->rta_type = rta_type;
+ memcpy(RTA_DATA(attr), payload, size);
+
+ return 0;
+}
+
+static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ struct rtattr *ret = rtattr_hdr(nh);
+
+ if (rtattr_pack(nh, req_sz, rta_type, payload, size))
+ return 0;
+
+ return ret;
+}
+
+static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type)
+{
+ return _rtattr_begin(nh, req_sz, rta_type, 0, 0);
+}
+
+static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+ char *nlmsg_end = (char *)nh + nh->nlmsg_len;
+
+ attr->rta_len = nlmsg_end - (char *)attr;
+}
+
+static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz,
+ const char *peer, int ns)
+{
+ struct ifinfomsg pi;
+ struct rtattr *peer_attr;
+
+ memset(&pi, 0, sizeof(pi));
+ pi.ifi_family = AF_UNSPEC;
+ pi.ifi_change = 0xFFFFFFFF;
+
+ peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi));
+ if (!peer_attr)
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer)))
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns)))
+ return -1;
+
+ rtattr_end(nh, peer_attr);
+
+ return 0;
+}
+
+static int netlink_check_answer(int sock)
+{
+ struct nlmsgerror {
+ struct nlmsghdr hdr;
+ int error;
+ struct nlmsghdr orig_msg;
+ } answer;
+
+ if (recv(sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type);
+ return -1;
+ } else if (answer.error) {
+ printk("NLMSG_ERROR: %d: %s",
+ answer.error, strerror(-answer.error));
+ return answer.error;
+ }
+
+ return 0;
+}
+
+static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a,
+ const char *peerb, int ns_b)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ const char veth_type[] = "veth";
+ struct rtattr *link_info, *info_data;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a)))
+ return -1;
+
+ link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+ if (!link_info)
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type)))
+ return -1;
+
+ info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+ if (!info_data)
+ return -1;
+
+ if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b))
+ return -1;
+
+ rtattr_end(&req.nh, info_data);
+ rtattr_end(&req.nh, link_info);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int ip4_addr_set(int sock, uint32_t seq, const char *intf,
+ struct in_addr addr, uint8_t prefix)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifaddrmsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWADDR;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifa_family = AF_INET;
+ req.info.ifa_prefixlen = prefix;
+ req.info.ifa_index = if_nametoindex(intf);
+
+#ifdef DEBUG
+ {
+ char addr_str[IPV4_STR_SZ] = {};
+
+ strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1);
+
+ printk("ip addr set %s", addr_str);
+ }
+#endif
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr)))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int link_set_up(int sock, uint32_t seq, const char *intf)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+ req.info.ifi_index = if_nametoindex(intf);
+ req.info.ifi_flags = IFF_UP;
+ req.info.ifi_change = IFF_UP;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int ip4_route_set(int sock, uint32_t seq, const char *intf,
+ struct in_addr src, struct in_addr dst)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rt;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ unsigned int index = if_nametoindex(intf);
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt));
+ req.nh.nlmsg_type = RTM_NEWROUTE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+ req.nh.nlmsg_seq = seq;
+ req.rt.rtm_family = AF_INET;
+ req.rt.rtm_dst_len = 32;
+ req.rt.rtm_table = RT_TABLE_MAIN;
+ req.rt.rtm_protocol = RTPROT_BOOT;
+ req.rt.rtm_scope = RT_SCOPE_LINK;
+ req.rt.rtm_type = RTN_UNICAST;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index)))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(sock);
+}
+
+static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ if (ip4_addr_set(route_sock, (*route_seq)++, "lo",
+ tunsrc, PREFIX_LEN)) {
+ printk("Failed to set ipv4 addr");
+ return -1;
+ }
+
+ if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) {
+ printk("Failed to set ipv4 route");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst)
+{
+ struct in_addr intsrc = inet_makeaddr(INADDR_B, src);
+ struct in_addr tunsrc = inet_makeaddr(INADDR_A, src);
+ struct in_addr tundst = inet_makeaddr(INADDR_A, dst);
+ int route_sock = -1, ret = -1;
+ uint32_t route_seq;
+
+ if (switch_ns(nsfd))
+ return -1;
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) {
+ printk("Failed to open netlink route socket in child");
+ return -1;
+ }
+
+ if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) {
+ printk("Failed to set ipv4 addr");
+ goto err;
+ }
+
+ if (link_set_up(route_sock, route_seq++, veth)) {
+ printk("Failed to bring up %s", veth);
+ goto err;
+ }
+
+ if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) {
+ printk("Failed to add tunnel route on %s", veth);
+ goto err;
+ }
+ ret = 0;
+
+err:
+ close(route_sock);
+ return ret;
+}
+
+#define ALGO_LEN 64
+enum desc_type {
+ CREATE_TUNNEL = 0,
+ ALLOCATE_SPI,
+ MONITOR_ACQUIRE,
+ EXPIRE_STATE,
+ EXPIRE_POLICY,
+};
+const char *desc_name[] = {
+ "create tunnel",
+ "alloc spi",
+ "monitor acquire",
+ "expire state",
+ "expire policy"
+};
+struct xfrm_desc {
+ enum desc_type type;
+ uint8_t proto;
+ char a_algo[ALGO_LEN];
+ char e_algo[ALGO_LEN];
+ char c_algo[ALGO_LEN];
+ char ae_algo[ALGO_LEN];
+ unsigned int icv_len;
+ /* unsigned key_len; */
+};
+
+enum msg_type {
+ MSG_ACK = 0,
+ MSG_EXIT,
+ MSG_PING,
+ MSG_XFRM_PREPARE,
+ MSG_XFRM_ADD,
+ MSG_XFRM_DEL,
+ MSG_XFRM_CLEANUP,
+};
+
+struct test_desc {
+ enum msg_type type;
+ union {
+ struct {
+ in_addr_t reply_ip;
+ unsigned int port;
+ } ping;
+ struct xfrm_desc xfrm_desc;
+ } body;
+};
+
+struct test_result {
+ struct xfrm_desc desc;
+ unsigned int res;
+};
+
+static void write_test_result(unsigned int res, struct xfrm_desc *d)
+{
+ struct test_result tr = {};
+ ssize_t ret;
+
+ tr.desc = *d;
+ tr.res = res;
+
+ ret = write(results_fd[1], &tr, sizeof(tr));
+ if (ret != sizeof(tr))
+ pr_err("Failed to write the result in pipe %zd", ret);
+}
+
+static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+ ssize_t bytes = write(fd, msg, sizeof(*msg));
+
+ /* Make sure that write/read is atomic to a pipe */
+ BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF);
+
+ if (bytes < 0) {
+ pr_err("write()");
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+ if (bytes != sizeof(*msg)) {
+ pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg));
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+}
+
+static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+ ssize_t bytes = read(fd, msg, sizeof(*msg));
+
+ if (bytes < 0) {
+ pr_err("read()");
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+ if (bytes != sizeof(*msg)) {
+ pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg));
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+}
+
+static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout,
+ unsigned int *server_port, int sock[2])
+{
+ struct sockaddr_in server;
+ struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout };
+ socklen_t s_len = sizeof(server);
+
+ sock[0] = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock[0] < 0) {
+ pr_err("socket()");
+ return -1;
+ }
+
+ server.sin_family = AF_INET;
+ server.sin_port = 0;
+ memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr));
+
+ if (bind(sock[0], (struct sockaddr *)&server, s_len)) {
+ pr_err("bind()");
+ goto err_close_server;
+ }
+
+ if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) {
+ pr_err("getsockname()");
+ goto err_close_server;
+ }
+
+ *server_port = ntohs(server.sin_port);
+
+ if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) {
+ pr_err("setsockopt()");
+ goto err_close_server;
+ }
+
+ sock[1] = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock[1] < 0) {
+ pr_err("socket()");
+ goto err_close_server;
+ }
+
+ return 0;
+
+err_close_server:
+ close(sock[0]);
+ return -1;
+}
+
+static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len)
+{
+ struct sockaddr_in server;
+ const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+ char *sock_buf[buf_len];
+ ssize_t r_bytes, s_bytes;
+
+ server.sin_family = AF_INET;
+ server.sin_port = htons(port);
+ server.sin_addr.s_addr = dest_ip;
+
+ s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+ if (s_bytes < 0) {
+ pr_err("sendto()");
+ return -1;
+ } else if (s_bytes != buf_len) {
+ printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+ return -1;
+ }
+
+ r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+ if (r_bytes < 0) {
+ if (errno != EAGAIN)
+ pr_err("recv()");
+ return -1;
+ } else if (r_bytes == 0) { /* EOF */
+ printk("EOF on reply to ping");
+ return -1;
+ } else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+ printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len)
+{
+ struct sockaddr_in server;
+ const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+ char *sock_buf[buf_len];
+ ssize_t r_bytes, s_bytes;
+
+ server.sin_family = AF_INET;
+ server.sin_port = htons(port);
+ server.sin_addr.s_addr = dest_ip;
+
+ r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+ if (r_bytes < 0) {
+ if (errno != EAGAIN)
+ pr_err("recv()");
+ return -1;
+ }
+ if (r_bytes == 0) { /* EOF */
+ printk("EOF on reply to ping");
+ return -1;
+ }
+ if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+ printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+ return -1;
+ }
+
+ s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+ if (s_bytes < 0) {
+ pr_err("sendto()");
+ return -1;
+ } else if (s_bytes != buf_len) {
+ printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+ return -1;
+ }
+
+ return 0;
+}
+
+typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len);
+static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from,
+ bool init_side, int d_port, in_addr_t to, ping_f func)
+{
+ struct test_desc msg;
+ unsigned int s_port, i, ping_succeeded = 0;
+ int ping_sock[2];
+ char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {};
+
+ if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) {
+ printk("Failed to init ping");
+ return -1;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_PING;
+ msg.body.ping.port = s_port;
+ memcpy(&msg.body.ping.reply_ip, &from, sizeof(from));
+
+ write_msg(cmd_fd, &msg, 0);
+ if (init_side) {
+ /* The other end sends ip to ping */
+ read_msg(cmd_fd, &msg, 0);
+ if (msg.type != MSG_PING)
+ return -1;
+ to = msg.body.ping.reply_ip;
+ d_port = msg.body.ping.port;
+ }
+
+ for (i = 0; i < ping_count ; i++) {
+ struct timespec sleep_time = {
+ .tv_sec = 0,
+ .tv_nsec = ping_delay_nsec,
+ };
+
+ ping_succeeded += !func(ping_sock, to, d_port, buf, page_size);
+ nanosleep(&sleep_time, 0);
+ }
+
+ close(ping_sock[0]);
+ close(ping_sock[1]);
+
+ strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1);
+ strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1);
+
+ if (ping_succeeded < ping_success) {
+ printk("ping (%s) %s->%s failed %u/%u times",
+ init_side ? "send" : "reply", from_str, to_str,
+ ping_count - ping_succeeded, ping_count);
+ return -1;
+ }
+
+#ifdef DEBUG
+ printk("ping (%s) %s->%s succeeded %u/%u times",
+ init_side ? "send" : "reply", from_str, to_str,
+ ping_succeeded, ping_count);
+#endif
+
+ return 0;
+}
+
+static int xfrm_fill_key(char *name, char *buf,
+ size_t buf_len, unsigned int *key_len)
+{
+ /* TODO: use set/map instead */
+ if (strncmp(name, "digest_null", ALGO_LEN) == 0)
+ *key_len = 0;
+ else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0)
+ *key_len = 0;
+ else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0)
+ *key_len = 64;
+ else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0)
+ *key_len = 192;
+ else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0)
+ *key_len = 288;
+ else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0)
+ *key_len = 384;
+ else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0)
+ *key_len = 448;
+ else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0)
+ *key_len = 512;
+ else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0)
+ *key_len = 152;
+ else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0)
+ *key_len = 224;
+ else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0)
+ *key_len = 224;
+ else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0)
+ *key_len = 216;
+ else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0)
+ *key_len = 288;
+ else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0)
+ *key_len = 288;
+ else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0)
+ *key_len = 280;
+ else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0)
+ *key_len = 0;
+
+ if (*key_len > buf_len) {
+ printk("Can't pack a key - too big for buffer");
+ return -1;
+ }
+
+ randomize_buffer(buf, *key_len);
+
+ return 0;
+}
+
+static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
+ struct xfrm_desc *desc)
+{
+ struct {
+ union {
+ struct xfrm_algo alg;
+ struct xfrm_algo_aead aead;
+ struct xfrm_algo_auth auth;
+ } u;
+ char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ } alg = {};
+ size_t alen, elen, clen, aelen;
+ unsigned short type;
+
+ alen = strlen(desc->a_algo);
+ elen = strlen(desc->e_algo);
+ clen = strlen(desc->c_algo);
+ aelen = strlen(desc->ae_algo);
+
+ /* Verify desc */
+ switch (desc->proto) {
+ case IPPROTO_AH:
+ if (!alen || elen || clen || aelen) {
+ printk("BUG: buggy ah desc");
+ return -1;
+ }
+ strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_AUTH;
+ break;
+ case IPPROTO_COMP:
+ if (!clen || elen || alen || aelen) {
+ printk("BUG: buggy comp desc");
+ return -1;
+ }
+ strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_COMP;
+ break;
+ case IPPROTO_ESP:
+ if (!((alen && elen) ^ aelen) || clen) {
+ printk("BUG: buggy esp desc");
+ return -1;
+ }
+ if (aelen) {
+ alg.u.aead.alg_icv_len = desc->icv_len;
+ strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key,
+ sizeof(alg.buf), &alg.u.aead.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_AEAD;
+ } else {
+
+ strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1);
+ type = XFRMA_ALG_CRYPT;
+ if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+ return -1;
+
+ strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN);
+ type = XFRMA_ALG_AUTH;
+ if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ }
+ break;
+ default:
+ printk("BUG: unknown proto in desc");
+ return -1;
+ }
+
+ if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+ return -1;
+
+ return 0;
+}
+
+static inline uint32_t gen_spi(struct in_addr src)
+{
+ return htonl(inet_lnaof(src));
+}
+
+static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_usersa_info info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = XFRM_MSG_NEWSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill selector. */
+ memcpy(&req.info.sel.daddr, &dst, sizeof(dst));
+ memcpy(&req.info.sel.saddr, &src, sizeof(src));
+ req.info.sel.family = AF_INET;
+ req.info.sel.prefixlen_d = PREFIX_LEN;
+ req.info.sel.prefixlen_s = PREFIX_LEN;
+
+ /* Fill id */
+ memcpy(&req.info.id.daddr, &dst, sizeof(dst));
+ /* Note: zero-spi cannot be deleted */
+ req.info.id.spi = spi;
+ req.info.id.proto = desc->proto;
+
+ memcpy(&req.info.saddr, &src, sizeof(src));
+
+ /* Fill lifteme_cfg */
+ req.info.lft.soft_byte_limit = XFRM_INF;
+ req.info.lft.hard_byte_limit = XFRM_INF;
+ req.info.lft.soft_packet_limit = XFRM_INF;
+ req.info.lft.hard_packet_limit = XFRM_INF;
+
+ req.info.family = AF_INET;
+ req.info.mode = XFRM_MODE_TUNNEL;
+
+ if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ if (memcmp(&info->sel.daddr, &dst, sizeof(dst)))
+ return false;
+
+ if (memcmp(&info->sel.saddr, &src, sizeof(src)))
+ return false;
+
+ if (info->sel.family != AF_INET ||
+ info->sel.prefixlen_d != PREFIX_LEN ||
+ info->sel.prefixlen_s != PREFIX_LEN)
+ return false;
+
+ if (info->id.spi != spi || info->id.proto != desc->proto)
+ return false;
+
+ if (memcmp(&info->id.daddr, &dst, sizeof(dst)))
+ return false;
+
+ if (memcmp(&info->saddr, &src, sizeof(src)))
+ return false;
+
+ if (info->lft.soft_byte_limit != XFRM_INF ||
+ info->lft.hard_byte_limit != XFRM_INF ||
+ info->lft.soft_packet_limit != XFRM_INF ||
+ info->lft.hard_packet_limit != XFRM_INF)
+ return false;
+
+ if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL)
+ return false;
+
+ /* XXX: check xfrm algo, see xfrm_state_pack_algo(). */
+
+ return true;
+}
+
+static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_usersa_info info;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } answer;
+ struct xfrm_address_filter filter = {};
+ bool found = false;
+
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(0);
+ req.nh.nlmsg_type = XFRM_MSG_GETSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.nh.nlmsg_seq = seq;
+
+ /*
+ * Add dump filter by source address as there may be other tunnels
+ * in this netns (if tests run in parallel).
+ */
+ filter.family = AF_INET;
+ filter.splen = 0x1f; /* 0xffffffff mask see addr_match() */
+ memcpy(&filter.saddr, &src, sizeof(src));
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER,
+ &filter, sizeof(filter)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ while (1) {
+ if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ }
+ if (answer.nh.nlmsg_type == NLMSG_ERROR) {
+ printk("NLMSG_ERROR: %d: %s",
+ answer.error, strerror(-answer.error));
+ return -1;
+ } else if (answer.nh.nlmsg_type == NLMSG_DONE) {
+ if (found)
+ return 0;
+ printk("didn't find allocated xfrm state in dump");
+ return -1;
+ } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+ if (xfrm_usersa_found(&answer.info, spi, src, dst, desc))
+ found = true;
+ }
+ }
+}
+
+static int xfrm_set(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst,
+ struct xfrm_desc *desc)
+{
+ int err;
+
+ err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+ if (err) {
+ printk("Failed to add xfrm state");
+ return -1;
+ }
+
+ err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+ if (err) {
+ printk("Failed to add xfrm state");
+ return -1;
+ }
+
+ /* Check dumps for XFRM_MSG_GETSA */
+ err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+ err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+ if (err) {
+ printk("Failed to check xfrm state");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst, uint8_t dir,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userpolicy_info info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrm_user_tmpl tmpl;
+
+ memset(&req, 0, sizeof(req));
+ memset(&tmpl, 0, sizeof(tmpl));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = XFRM_MSG_NEWPOLICY;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill selector. */
+ memcpy(&req.info.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc));
+ req.info.sel.family = AF_INET;
+ req.info.sel.prefixlen_d = PREFIX_LEN;
+ req.info.sel.prefixlen_s = PREFIX_LEN;
+
+ /* Fill lifteme_cfg */
+ req.info.lft.soft_byte_limit = XFRM_INF;
+ req.info.lft.hard_byte_limit = XFRM_INF;
+ req.info.lft.soft_packet_limit = XFRM_INF;
+ req.info.lft.hard_packet_limit = XFRM_INF;
+
+ req.info.dir = dir;
+
+ /* Fill tmpl */
+ memcpy(&tmpl.id.daddr, &dst, sizeof(dst));
+ /* Note: zero-spi cannot be deleted */
+ tmpl.id.spi = spi;
+ tmpl.id.proto = proto;
+ tmpl.family = AF_INET;
+ memcpy(&tmpl.saddr, &src, sizeof(src));
+ tmpl.mode = XFRM_MODE_TUNNEL;
+ tmpl.aalgos = (~(uint32_t)0);
+ tmpl.ealgos = (~(uint32_t)0);
+ tmpl.calgos = (~(uint32_t)0);
+
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_prepare(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst, proto)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src,
+ XFRM_POLICY_IN, tunsrc, tundst, proto)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_policy_del(int xfrm_sock, uint32_t seq,
+ struct in_addr src, struct in_addr dst, uint8_t dir,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userpolicy_id id;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id));
+ req.nh.nlmsg_type = XFRM_MSG_DELPOLICY;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill id */
+ memcpy(&req.id.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc));
+ req.id.sel.family = AF_INET;
+ req.id.sel.prefixlen_d = PREFIX_LEN;
+ req.id.sel.prefixlen_s = PREFIX_LEN;
+ req.id.dir = dir;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_cleanup(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src,
+ XFRM_POLICY_IN, tunsrc, tundst)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_usersa_id id;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ xfrm_address_t saddr = {};
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id));
+ req.nh.nlmsg_type = XFRM_MSG_DELSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ memcpy(&req.id.daddr, &dst, sizeof(dst));
+ req.id.family = AF_INET;
+ req.id.proto = proto;
+ /* Note: zero-spi cannot be deleted */
+ req.id.spi = spi;
+
+ memcpy(&saddr, &src, sizeof(src));
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_delete(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) {
+ printk("Failed to remove xfrm state");
+ return -1;
+ }
+
+ if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) {
+ printk("Failed to remove xfrm state");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq,
+ uint32_t spi, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userspi_info spi;
+ } req;
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_usersa_info info;
+ int error;
+ };
+ } answer;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.spi));
+ req.nh.nlmsg_type = XFRM_MSG_ALLOCSPI;
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ req.spi.info.family = AF_INET;
+ req.spi.min = spi;
+ req.spi.max = spi;
+ req.spi.info.id.proto = proto;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return KSFT_FAIL;
+ }
+
+ if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return KSFT_FAIL;
+ } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+ uint32_t new_spi = htonl(answer.info.id.spi);
+
+ if (new_spi != spi) {
+ printk("allocated spi is different from requested: %#x != %#x",
+ new_spi, spi);
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+ } else if (answer.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type);
+ return KSFT_FAIL;
+ }
+
+ printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error));
+ return (answer.error) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups)
+{
+ struct sockaddr_nl snl = {};
+ socklen_t addr_len;
+ int ret = -1;
+
+ snl.nl_family = AF_NETLINK;
+ snl.nl_groups = groups;
+
+ if (netlink_sock(sock, seq, proto)) {
+ printk("Failed to open xfrm netlink socket");
+ return -1;
+ }
+
+ if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) {
+ pr_err("bind()");
+ goto out_close;
+ }
+
+ addr_len = sizeof(snl);
+ if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) {
+ pr_err("getsockname()");
+ goto out_close;
+ }
+ if (addr_len != sizeof(snl)) {
+ printk("Wrong address length %d", addr_len);
+ goto out_close;
+ }
+ if (snl.nl_family != AF_NETLINK) {
+ printk("Wrong address family %d", snl.nl_family);
+ goto out_close;
+ }
+ return 0;
+
+out_close:
+ close(*sock);
+ return ret;
+}
+
+static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_acquire acq;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrm_user_tmpl xfrm_tmpl = {};
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.acq));
+ req.nh.nlmsg_type = XFRM_MSG_ACQUIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ req.acq.policy.sel.family = AF_INET;
+ req.acq.aalgos = 0xfeed;
+ req.acq.ealgos = 0xbaad;
+ req.acq.calgos = 0xbabe;
+
+ xfrm_tmpl.family = AF_INET;
+ xfrm_tmpl.id.proto = IPPROTO_ESP;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl)))
+ goto out_close;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad
+ || req.acq.calgos != 0xbabe) {
+ printk("xfrm_user_acquire has changed %x %x %x",
+ req.acq.aalgos, req.acq.ealgos, req.acq.calgos);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int xfrm_expire_state(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_expire expire;
+ int error;
+ };
+ } req;
+ struct in_addr src, dst;
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+
+ if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) {
+ printk("Failed to add xfrm state");
+ return KSFT_FAIL;
+ }
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire));
+ req.nh.nlmsg_type = XFRM_MSG_EXPIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst));
+ req.expire.state.id.spi = gen_spi(src);
+ req.expire.state.id.proto = desc->proto;
+ req.expire.state.family = AF_INET;
+ req.expire.hard = 0xff;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.expire.hard != 0x1) {
+ printk("expire.hard is not set: %x", req.expire.hard);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_polexpire expire;
+ int error;
+ };
+ } req;
+ struct in_addr src, dst, tunsrc, tundst;
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) {
+ printk("Failed to add xfrm policy");
+ return KSFT_FAIL;
+ }
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire));
+ req.nh.nlmsg_type = XFRM_MSG_POLEXPIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ /* Fill selector. */
+ memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc));
+ req.expire.pol.sel.family = AF_INET;
+ req.expire.pol.sel.prefixlen_d = PREFIX_LEN;
+ req.expire.pol.sel.prefixlen_s = PREFIX_LEN;
+ req.expire.pol.dir = XFRM_POLICY_OUT;
+ req.expire.hard = 0xff;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.expire.hard != 0x1) {
+ printk("expire.hard is not set: %x", req.expire.hard);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int child_serv(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
+{
+ struct in_addr src, dst, tunsrc, tundst;
+ struct test_desc msg;
+ int ret = KSFT_FAIL;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+ /* UDP pinging without xfrm */
+ if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) {
+ printk("ping failed before setting xfrm");
+ return KSFT_FAIL;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_PREPARE;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+
+ if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+ printk("failed to prepare xfrm");
+ goto cleanup;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_ADD;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+ if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+ printk("failed to set xfrm");
+ goto delete;
+ }
+
+ /* UDP pinging with xfrm tunnel */
+ if (do_ping(cmd_fd, buf, page_size, tunsrc,
+ true, 0, 0, udp_ping_send)) {
+ printk("ping failed for xfrm");
+ goto delete;
+ }
+
+ ret = KSFT_PASS;
+delete:
+ /* xfrm delete */
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_DEL;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+
+ if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+ printk("failed ping to remove xfrm");
+ ret = KSFT_FAIL;
+ }
+
+cleanup:
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_CLEANUP;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+ if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+ printk("failed ping to cleanup xfrm");
+ ret = KSFT_FAIL;
+ }
+ return ret;
+}
+
+static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
+{
+ struct xfrm_desc desc;
+ struct test_desc msg;
+ int xfrm_sock = -1;
+ uint32_t seq;
+
+ if (switch_ns(nsfd_childa))
+ exit(KSFT_FAIL);
+
+ if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+ printk("Failed to open xfrm netlink socket");
+ exit(KSFT_FAIL);
+ }
+
+ /* Check that seq sock is ready, just for sure. */
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_ACK;
+ write_msg(cmd_fd, &msg, 1);
+ read_msg(cmd_fd, &msg, 1);
+ if (msg.type != MSG_ACK) {
+ printk("Ack failed");
+ exit(KSFT_FAIL);
+ }
+
+ for (;;) {
+ ssize_t received = read(test_desc_fd, &desc, sizeof(desc));
+ int ret;
+
+ if (received == 0) /* EOF */
+ break;
+
+ if (received != sizeof(desc)) {
+ pr_err("read() returned %zd", received);
+ exit(KSFT_FAIL);
+ }
+
+ switch (desc.type) {
+ case CREATE_TUNNEL:
+ ret = child_serv(xfrm_sock, &seq, nr,
+ cmd_fd, buf, &desc);
+ break;
+ case ALLOCATE_SPI:
+ ret = xfrm_state_allocspi(xfrm_sock, &seq,
+ -1, desc.proto);
+ break;
+ case MONITOR_ACQUIRE:
+ ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr);
+ break;
+ case EXPIRE_STATE:
+ ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc);
+ break;
+ case EXPIRE_POLICY:
+ ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
+ break;
+ default:
+ printk("Unknown desc type %d", desc.type);
+ exit(KSFT_FAIL);
+ }
+ write_test_result(ret, &desc);
+ }
+
+ close(xfrm_sock);
+
+ msg.type = MSG_EXIT;
+ write_msg(cmd_fd, &msg, 1);
+ exit(KSFT_PASS);
+}
+
+static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
+ struct test_desc *msg, int xfrm_sock, uint32_t *seq)
+{
+ struct in_addr src, dst, tunsrc, tundst;
+ bool tun_reply;
+ struct xfrm_desc *desc = &msg->body.xfrm_desc;
+
+ src = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ dst = inet_makeaddr(INADDR_B, child_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, child_ip(nr));
+
+ switch (msg->type) {
+ case MSG_EXIT:
+ exit(KSFT_PASS);
+ case MSG_ACK:
+ write_msg(cmd_fd, msg, 1);
+ break;
+ case MSG_PING:
+ tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t));
+ /* UDP pinging without xfrm */
+ if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src,
+ false, msg->body.ping.port,
+ msg->body.ping.reply_ip, udp_ping_reply)) {
+ printk("ping failed before setting xfrm");
+ }
+ break;
+ case MSG_XFRM_PREPARE:
+ if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst,
+ desc->proto)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to prepare xfrm");
+ }
+ break;
+ case MSG_XFRM_ADD:
+ if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to set xfrm");
+ }
+ break;
+ case MSG_XFRM_DEL:
+ if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst,
+ desc->proto)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to remove xfrm");
+ }
+ break;
+ case MSG_XFRM_CLEANUP:
+ if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+ printk("failed to cleanup xfrm");
+ }
+ break;
+ default:
+ printk("got unknown msg type %d", msg->type);
+ };
+}
+
+static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)
+{
+ struct test_desc msg;
+ int xfrm_sock = -1;
+ uint32_t seq;
+
+ if (switch_ns(nsfd_childb))
+ exit(KSFT_FAIL);
+
+ if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+ printk("Failed to open xfrm netlink socket");
+ exit(KSFT_FAIL);
+ }
+
+ do {
+ read_msg(cmd_fd, &msg, 1);
+ grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq);
+ } while (1);
+
+ close(xfrm_sock);
+ exit(KSFT_FAIL);
+}
+
+static int start_child(unsigned int nr, char *veth, int test_desc_fd[2])
+{
+ int cmd_sock[2];
+ void *data_map;
+ pid_t child;
+
+ if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr)))
+ return -1;
+
+ if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr)))
+ return -1;
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ } else if (child) {
+ /* in parent - selftest */
+ return switch_ns(nsfd_parent);
+ }
+
+ if (close(test_desc_fd[1])) {
+ pr_err("close()");
+ return -1;
+ }
+
+ /* child */
+ data_map = mmap(0, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (data_map == MAP_FAILED) {
+ pr_err("mmap()");
+ return -1;
+ }
+
+ randomize_buffer(data_map, page_size);
+
+ if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) {
+ pr_err("socketpair()");
+ return -1;
+ }
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ } else if (child) {
+ if (close(cmd_sock[0])) {
+ pr_err("close()");
+ return -1;
+ }
+ return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map);
+ }
+ if (close(cmd_sock[1])) {
+ pr_err("close()");
+ return -1;
+ }
+ return grand_child_f(nr, cmd_sock[0], data_map);
+}
+
+static void exit_usage(char **argv)
+{
+ printk("Usage: %s [nr_process]", argv[0]);
+ exit(KSFT_FAIL);
+}
+
+static int __write_desc(int test_desc_fd, struct xfrm_desc *desc)
+{
+ ssize_t ret;
+
+ ret = write(test_desc_fd, desc, sizeof(*desc));
+
+ if (ret == sizeof(*desc))
+ return 0;
+
+ pr_err("Writing test's desc failed %ld", ret);
+
+ return -1;
+}
+
+static int write_desc(int proto, int test_desc_fd,
+ char *a, char *e, char *c, char *ae)
+{
+ struct xfrm_desc desc = {};
+
+ desc.type = CREATE_TUNNEL;
+ desc.proto = proto;
+
+ if (a)
+ strncpy(desc.a_algo, a, ALGO_LEN - 1);
+ if (e)
+ strncpy(desc.e_algo, e, ALGO_LEN - 1);
+ if (c)
+ strncpy(desc.c_algo, c, ALGO_LEN - 1);
+ if (ae)
+ strncpy(desc.ae_algo, ae, ALGO_LEN - 1);
+
+ return __write_desc(test_desc_fd, &desc);
+}
+
+int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP };
+char *ah_list[] = {
+ "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)",
+ "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)",
+ "xcbc(aes)", "cmac(aes)"
+};
+char *comp_list[] = {
+ "deflate",
+#if 0
+ /* No compression backend realization */
+ "lzs", "lzjh"
+#endif
+};
+char *e_list[] = {
+ "ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)",
+ "cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)",
+ "cbc(twofish)", "rfc3686(ctr(aes))"
+};
+char *ae_list[] = {
+#if 0
+ /* not implemented */
+ "rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))",
+ "rfc7539esp(chacha20,poly1305)"
+#endif
+};
+
+const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \
+ + (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \
+ + ARRAY_SIZE(ae_list);
+
+static int write_proto_plan(int fd, int proto)
+{
+ unsigned int i;
+
+ switch (proto) {
+ case IPPROTO_AH:
+ for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+ if (write_desc(proto, fd, ah_list[i], 0, 0, 0))
+ return -1;
+ }
+ break;
+ case IPPROTO_COMP:
+ for (i = 0; i < ARRAY_SIZE(comp_list); i++) {
+ if (write_desc(proto, fd, 0, 0, comp_list[i], 0))
+ return -1;
+ }
+ break;
+ case IPPROTO_ESP:
+ for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+ int j;
+
+ for (j = 0; j < ARRAY_SIZE(e_list); j++) {
+ if (write_desc(proto, fd, ah_list[i],
+ e_list[j], 0, 0))
+ return -1;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(ae_list); i++) {
+ if (write_desc(proto, fd, 0, 0, 0, ae_list[i]))
+ return -1;
+ }
+ break;
+ default:
+ printk("BUG: Specified unknown proto %d", proto);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Some structures in xfrm uapi header differ in size between
+ * 64-bit and 32-bit ABI:
+ *
+ * 32-bit UABI | 64-bit UABI
+ * -------------------------------------|-------------------------------------
+ * sizeof(xfrm_usersa_info) = 220 | sizeof(xfrm_usersa_info) = 224
+ * sizeof(xfrm_userpolicy_info) = 164 | sizeof(xfrm_userpolicy_info) = 168
+ * sizeof(xfrm_userspi_info) = 228 | sizeof(xfrm_userspi_info) = 232
+ * sizeof(xfrm_user_acquire) = 276 | sizeof(xfrm_user_acquire) = 280
+ * sizeof(xfrm_user_expire) = 224 | sizeof(xfrm_user_expire) = 232
+ * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176
+ *
+ * Check the affected by the UABI difference structures.
+ */
+const unsigned int compat_plan = 4;
+static int write_compat_struct_tests(int test_desc_fd)
+{
+ struct xfrm_desc desc = {};
+
+ desc.type = ALLOCATE_SPI;
+ desc.proto = IPPROTO_AH;
+ strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1);
+
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = MONITOR_ACQUIRE;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = EXPIRE_STATE;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = EXPIRE_POLICY;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ return 0;
+}
+
+static int write_test_plan(int test_desc_fd)
+{
+ unsigned int i;
+ pid_t child;
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ }
+ if (child) {
+ if (close(test_desc_fd))
+ printk("close(): %m");
+ return 0;
+ }
+
+ if (write_compat_struct_tests(test_desc_fd))
+ exit(KSFT_FAIL);
+
+ for (i = 0; i < ARRAY_SIZE(proto_list); i++) {
+ if (write_proto_plan(test_desc_fd, proto_list[i]))
+ exit(KSFT_FAIL);
+ }
+
+ exit(KSFT_PASS);
+}
+
+static int children_cleanup(void)
+{
+ unsigned ret = KSFT_PASS;
+
+ while (1) {
+ int status;
+ pid_t p = wait(&status);
+
+ if ((p < 0) && errno == ECHILD)
+ break;
+
+ if (p < 0) {
+ pr_err("wait()");
+ return KSFT_FAIL;
+ }
+
+ if (!WIFEXITED(status)) {
+ ret = KSFT_FAIL;
+ continue;
+ }
+
+ if (WEXITSTATUS(status) == KSFT_FAIL)
+ ret = KSFT_FAIL;
+ }
+
+ return ret;
+}
+
+typedef void (*print_res)(const char *, ...);
+
+static int check_results(void)
+{
+ struct test_result tr = {};
+ struct xfrm_desc *d = &tr.desc;
+ int ret = KSFT_PASS;
+
+ while (1) {
+ ssize_t received = read(results_fd[0], &tr, sizeof(tr));
+ print_res result;
+
+ if (received == 0) /* EOF */
+ break;
+
+ if (received != sizeof(tr)) {
+ pr_err("read() returned %zd", received);
+ return KSFT_FAIL;
+ }
+
+ switch (tr.res) {
+ case KSFT_PASS:
+ result = ksft_test_result_pass;
+ break;
+ case KSFT_FAIL:
+ default:
+ result = ksft_test_result_fail;
+ ret = KSFT_FAIL;
+ }
+
+ result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n",
+ desc_name[d->type], (unsigned int)d->proto, d->a_algo,
+ d->e_algo, d->c_algo, d->ae_algo, d->icv_len);
+ }
+
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int nr_process = 1;
+ int route_sock = -1, ret = KSFT_SKIP;
+ int test_desc_fd[2];
+ uint32_t route_seq;
+ unsigned int i;
+
+ if (argc > 2)
+ exit_usage(argv);
+
+ if (argc > 1) {
+ char *endptr;
+
+ errno = 0;
+ nr_process = strtol(argv[1], &endptr, 10);
+ if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN))
+ || (errno != 0 && nr_process == 0)
+ || (endptr == argv[1]) || (*endptr != '\0')) {
+ printk("Failed to parse [nr_process]");
+ exit_usage(argv);
+ }
+
+ if (nr_process > MAX_PROCESSES || !nr_process) {
+ printk("nr_process should be between [1; %u]",
+ MAX_PROCESSES);
+ exit_usage(argv);
+ }
+ }
+
+ srand(time(NULL));
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 1)
+ ksft_exit_skip("sysconf(): %m\n");
+
+ if (pipe2(test_desc_fd, O_DIRECT) < 0)
+ ksft_exit_skip("pipe(): %m\n");
+
+ if (pipe2(results_fd, O_DIRECT) < 0)
+ ksft_exit_skip("pipe(): %m\n");
+
+ if (init_namespaces())
+ ksft_exit_skip("Failed to create namespaces\n");
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+ ksft_exit_skip("Failed to open netlink route socket\n");
+
+ for (i = 0; i < nr_process; i++) {
+ char veth[VETH_LEN];
+
+ snprintf(veth, VETH_LEN, VETH_FMT, i);
+
+ if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) {
+ close(route_sock);
+ ksft_exit_fail_msg("Failed to create veth device");
+ }
+
+ if (start_child(i, veth, test_desc_fd)) {
+ close(route_sock);
+ ksft_exit_fail_msg("Child %u failed to start", i);
+ }
+ }
+
+ if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1]))
+ ksft_exit_fail_msg("close(): %m");
+
+ ksft_set_plan(proto_plan + compat_plan);
+
+ if (write_test_plan(test_desc_fd[1]))
+ ksft_exit_fail_msg("Failed to write test plan to pipe");
+
+ ret = check_results();
+
+ if (children_cleanup() == KSFT_FAIL)
+ exit(KSFT_FAIL);
+
+ exit(ret);
+}
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index aa254aefc2c3..00bb158b4a5d 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -5,7 +5,8 @@ KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+ simult_flows.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 090620c3e10c..77bb62feb872 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -54,6 +54,7 @@ static int pf = AF_INET;
static int cfg_sndbuf;
static int cfg_rcvbuf;
static bool cfg_join;
+static bool cfg_remove;
static int cfg_wait;
static void die_usage(void)
@@ -65,8 +66,8 @@ static void die_usage(void)
fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
fprintf(stderr, "\t-p num -- use port num\n");
- fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
- fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
+ fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
+ fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
@@ -271,6 +272,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
if (cfg_join && first && do_w > 100)
do_w = 100;
+ if (cfg_remove && do_w > 50)
+ do_w = 50;
+
bw = write(fd, buf, do_w);
if (bw < 0)
perror("write");
@@ -281,6 +285,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
first = false;
}
+ if (cfg_remove)
+ usleep(200000);
+
return bw;
}
@@ -428,7 +435,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
/* leave some time for late join/announce */
- if (cfg_join)
+ if (cfg_join || cfg_remove)
usleep(cfg_wait);
close(peerfd);
@@ -686,7 +693,7 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
- if (!cfg_join && (r & 1))
+ if (!(cfg_join || cfg_remove) && (r & 1))
close(fd);
}
@@ -822,13 +829,18 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) {
+ while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
cfg_mode = CFG_MODE_POLL;
cfg_wait = 400000;
break;
+ case 'r':
+ cfg_remove = true;
+ cfg_mode = CFG_MODE_POLL;
+ cfg_wait = 400000;
+ break;
case 'l':
listen_mode = true;
break;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 57d75b7f6220..2cfd87d94db8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -14,9 +14,8 @@ capture=false
timeout=30
ipv6=true
ethtool_random_on=true
-tc_delay="$((RANDOM%400))"
+tc_delay="$((RANDOM%50))"
tc_loss=$((RANDOM%101))
-tc_reorder=""
testmode=""
sndbuf=0
rcvbuf=0
@@ -444,9 +443,9 @@ do_transfer()
duration=$(printf "(duration %05sms)" $duration)
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2
- echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
- echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
cat "$capout"
@@ -628,30 +627,32 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns4" $sender dead:beef:3::1
done
-[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss
+[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
echo -n "INFO: Using loss of $tc_loss "
test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+reorder_delay=$(($tc_delay / 4))
+
if [ -z "${tc_reorder}" ]; then
reorder1=$((RANDOM%10))
reorder1=$((100 - reorder1))
reorder2=$((RANDOM%100))
- if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
+ if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
tc_reorder="reorder ${reorder1}% ${reorder2}%"
- echo -n "$tc_reorder "
+ echo -n "$tc_reorder with delay ${reorder_delay}ms "
fi
elif [ "$tc_reorder" = "0" ];then
tc_reorder=""
-elif [ "$tc_delay" -gt 0 ];then
+elif [ "$reorder_delay" -gt 0 ];then
# reordering requires some delay
tc_reorder="reorder $tc_reorder"
- echo -n "$tc_reorder "
+ echo -n "$tc_reorder with delay ${reorder_delay}ms "
fi
echo "on ns3eth4"
-tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder
+tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
for sender in $ns1 $ns2 $ns3 $ns4;do
run_tests_lo "$ns1" "$sender" 10.0.1.1 1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f39c1129ce5f..08f53d86dedc 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,6 +8,7 @@ cin=""
cout=""
ksft_skip=4
timeout=30
+mptcp_connect=""
capture=0
TEST_COUNT=0
@@ -132,6 +133,8 @@ do_transfer()
cl_proto="$3"
srv_proto="$4"
connect_addr="$5"
+ rm_nr_ns1="$6"
+ rm_nr_ns2="$7"
port=$((10000+$TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))
@@ -156,14 +159,44 @@ do_transfer()
sleep 1
fi
- ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
+ if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then
+ mptcp_connect="./mptcp_connect -j"
+ else
+ mptcp_connect="./mptcp_connect -r"
+ fi
+
+ ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
spid=$!
sleep 1
- ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
cpid=$!
+ if [ $rm_nr_ns1 -gt 0 ]; then
+ counter=1
+ sleep 1
+
+ while [ $counter -le $rm_nr_ns1 ]
+ do
+ ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+ sleep 1
+ let counter+=1
+ done
+ fi
+
+ if [ $rm_nr_ns2 -gt 0 ]; then
+ counter=1
+ sleep 1
+
+ while [ $counter -le $rm_nr_ns2 ]
+ do
+ ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+ sleep 1
+ let counter+=1
+ done
+ fi
+
wait $cpid
retc=$?
wait $spid
@@ -176,9 +209,9 @@ do_transfer()
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo " client exit code $retc, server $rets" 1>&2
- echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
- echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
cat "$capout"
@@ -219,7 +252,24 @@ run_tests()
connect_addr="$3"
lret=0
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return
+ fi
+}
+
+run_remove_tests()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ connect_addr="$3"
+ rm_nr_ns1="$4"
+ rm_nr_ns2="$5"
+ lret=0
+
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2}
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
@@ -276,6 +326,80 @@ chk_join_nr()
fi
}
+chk_add_nr()
+{
+ local add_nr=$1
+ local echo_nr=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "add"
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$add_nr" ]; then
+ echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - echo "
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$echo_nr" ]; then
+ echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
+chk_rm_nr()
+{
+ local rm_addr_nr=$1
+ local rm_subflow_nr=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "rm "
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$rm_addr_nr" ]; then
+ echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - sf "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$rm_subflow_nr" ]; then
+ echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
sin=$(mktemp)
sout=$(mktemp)
cin=$(mktemp)
@@ -332,6 +456,7 @@ reset
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "unused signal address" 0 0 0
+chk_add_nr 1 1
# accept and use add_addr
reset
@@ -340,6 +465,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address" 1 1 1
+chk_add_nr 1 1
# accept and use add_addr with an additional subflow
# note: signal address in server ns and local addresses in client ns must
@@ -352,6 +478,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and signal" 2 2 2
+chk_add_nr 1 1
# accept and use add_addr with additional subflows
reset
@@ -362,6 +489,59 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows and signal" 3 3 3
+chk_add_nr 1 1
+
+# single subflow, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 0 1
+chk_join_nr "remove single subflow" 1 1 1
+chk_rm_nr 1 1
+
+# multiple subflows, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 0 2
+chk_join_nr "remove multiple subflows" 2 2 2
+chk_rm_nr 2 2
+
+# single address, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+run_remove_tests $ns1 $ns2 10.0.1.1 1 0
+chk_join_nr "remove single address" 1 1 1
+chk_add_nr 1 1
+chk_rm_nr 0 0
+
+# subflow and signal, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 1 1
+chk_join_nr "remove subflow and signal" 2 2 2
+chk_add_nr 1 1
+chk_rm_nr 1 1
+
+# subflows and signal, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 1 2
+chk_join_nr "remove subflows and signal" 3 3 3
+chk_add_nr 1 1
+chk_rm_nr 2 2
# single subflow, syncookies
reset_with_cookies
@@ -396,6 +576,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address with syn cookies" 1 1 1
+chk_add_nr 1 1
# test cookie with subflow and signal
reset_with_cookies
@@ -405,6 +586,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and signal w cookies" 2 2 2
+chk_add_nr 1 1
# accept and use add_addr with additional subflows
reset_with_cookies
@@ -415,5 +597,6 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflows and signal w. cookies" 3 3 3
+chk_add_nr 1 1
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
new file mode 100755
index 000000000000..2f649b431456
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns3="ns3-$rndh"
+capture=false
+ksft_skip=4
+timeout=30
+test_cnt=1
+ret=0
+bail=0
+
+usage() {
+ echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
+ echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+ echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-d: debug this script"
+}
+
+cleanup()
+{
+ rm -f "$cin" "$cout"
+ rm -f "$sin" "$sout"
+ rm -f "$capout"
+
+ local netns
+ for netns in "$ns1" "$ns2" "$ns3";do
+ ip netns del $netns
+ done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+# "$ns1" ns2 ns3
+# ns1eth1 ns2eth1 ns2eth3 ns3eth1
+# netem
+# ns1eth2 ns2eth2
+# netem
+
+setup()
+{
+ large=$(mktemp)
+ small=$(mktemp)
+ sout=$(mktemp)
+ cout=$(mktemp)
+ capout=$(mktemp)
+ size=$((2048 * 4096))
+ dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
+ dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
+
+ trap cleanup EXIT
+
+ for i in "$ns1" "$ns2" "$ns3";do
+ ip netns add $i || exit $ksft_skip
+ ip -net $i link set lo up
+ done
+
+ ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+ ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
+ ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3"
+
+ ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1
+ ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad
+ ip -net "$ns1" link set ns1eth1 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.1.2
+ ip -net "$ns1" route add default via dead:beef:1::2
+
+ ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ ip -net "$ns1" link set ns1eth2 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.2.2 metric 101
+ ip -net "$ns1" route add default via dead:beef:2::2 metric 101
+
+ ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
+ ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
+ ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
+
+ ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ ip -net "$ns2" link set ns2eth1 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2
+ ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad
+ ip -net "$ns2" link set ns2eth2 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3
+ ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad
+ ip -net "$ns2" link set ns2eth3 up mtu 1500
+ ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
+ ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
+
+ ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1
+ ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad
+ ip -net "$ns3" link set ns3eth1 up mtu 1500
+ ip -net "$ns3" route add default via 10.0.3.2
+ ip -net "$ns3" route add default via dead:beef:3::2
+
+ ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+}
+
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
+
+do_transfer()
+{
+ local cin=$1
+ local sin=$2
+ local max_time=$3
+ local port
+ port=$((10000+$test_cnt))
+ test_cnt=$((test_cnt+1))
+
+ :> "$cout"
+ :> "$sout"
+ :> "$capout"
+
+ local addr_port
+ addr_port=$(printf "%s:%d" ${connect_addr} ${port})
+
+ if $capture; then
+ local capuser
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ local capfile="${rndh}-${port}"
+ local capopt="-i any -s 65535 -B 32768 ${capuser}"
+
+ ip netns exec ${ns3} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ local cappid_listener=$!
+
+ ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
+
+ sleep 1
+ fi
+
+ ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" &
+ local spid=$!
+
+ wait_local_port_listen "${ns3}" "${port}"
+
+ local start
+ start=$(date +%s%3N)
+ ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" &
+ local cpid=$!
+
+ wait $cpid
+ local retc=$?
+ wait $spid
+ local rets=$?
+
+ local stop
+ stop=$(date +%s%3N)
+
+ if $capture; then
+ sleep 1
+ kill ${cappid_listener}
+ kill ${cappid_connector}
+ fi
+
+ local duration
+ duration=$((stop-start))
+
+ cmp $sin $cout > /dev/null 2>&1
+ local cmps=$?
+ cmp $cin $sout > /dev/null 2>&1
+ local cmpc=$?
+
+ printf "%16s" "$duration max $max_time "
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
+ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \
+ [ $duration -lt $max_time ]; then
+ echo "[ OK ]"
+ cat "$capout"
+ return 0
+ fi
+
+ echo " [ fail ]"
+ echo "client exit code $retc, server $rets" 1>&2
+ echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
+ ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
+ echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
+ ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+ ls -l $sin $cout
+ ls -l $cin $sout
+
+ cat "$capout"
+ return 1
+}
+
+run_test()
+{
+ local rate1=$1
+ local rate2=$2
+ local delay1=$3
+ local delay2=$4
+ local lret
+ local dev
+ shift 4
+ local msg=$*
+
+ [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
+ [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
+
+ for dev in ns1eth1 ns1eth2; do
+ tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ for dev in ns2eth1 ns2eth2; do
+ tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+ # time is measure in ms
+ local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) ))
+
+ # mptcp_connect will do some sleeps to allow the mp_join handshake
+ # completion
+ time=$((time + 1350))
+
+ printf "%-50s" "$msg"
+ do_transfer $small $large $((time * 11 / 10))
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+
+ printf "%-50s" "$msg - reverse direction"
+ do_transfer $large $small $((time * 11 / 10))
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+}
+
+while getopts "bcdh" option;do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "b")
+ bail=1
+ ;;
+ "c")
+ capture=true
+ ;;
+ "d")
+ set -x
+ ;;
+ "?")
+ usage $0
+ exit 1
+ ;;
+ esac
+done
+
+setup
+run_test 10 10 0 0 "balanced bwidth"
+run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+
+# we still need some additional infrastructure to pass the following test-cases
+# run_test 30 10 0 0 "unbalanced bwidth"
+# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+exit $ret
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index 93208caacbe6..f75c53ce0a2d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -1667,6 +1667,8 @@ int main(int argc, char *argv[])
case 'R':
args.type = SOCK_RAW;
args.port = 0;
+ if (!args.protocol)
+ args.protocol = IPPROTO_RAW;
break;
case 'P':
pe = getprotobyname(optarg);
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 6331d91b86a6..170be65e0816 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -45,7 +45,7 @@ echo "raw vnet hdr"
echo "raw csum_off"
./in_netns.sh ./psock_snd -v -c
-echo "raw csum_off with bad offset (fails)"
+echo "raw csum_off with bad offset (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -C)
@@ -57,7 +57,7 @@ echo "raw min size"
echo "raw mtu size"
./in_netns.sh ./psock_snd -l "${mss}"
-echo "raw mtu size + 1 (fails)"
+echo "raw mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
@@ -65,19 +65,19 @@ echo "raw mtu size + 1 (fails)"
# echo "raw vlan mtu size"
# ./in_netns.sh ./psock_snd -V -l "${mss}"
-echo "raw vlan mtu size + 1 (fails)"
+echo "raw vlan mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
echo "dgram mtu size"
./in_netns.sh ./psock_snd -d -l "${mss}"
-echo "dgram mtu size + 1 (fails)"
+echo "dgram mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
-echo "raw truncate hlen (fails: does not arrive)"
+echo "raw truncate hlen (expected to fail: does not arrive)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
-echo "raw truncate hlen - 1 (fails: EINVAL)"
+echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
@@ -86,13 +86,13 @@ echo "raw truncate hlen - 1 (fails: EINVAL)"
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (fails)"
+echo "raw gso min size - 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
-echo "raw gso max size + 1 (fails)"
+echo "raw gso max size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index a61b7b3da549..00f837c9bc6c 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -123,6 +123,28 @@ void hash_zone(void *zone, unsigned int length)
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+
+static void *mmap_large_buffer(size_t need, size_t *allocated)
+{
+ void *buffer;
+ size_t sz;
+
+ /* Attempt to use huge pages if possible. */
+ sz = ALIGN_UP(need, map_align);
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+
+ if (buffer == (void *)-1) {
+ sz = need;
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buffer != (void *)-1)
+ fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
+ }
+ *allocated = sz;
+ return buffer;
+}
+
void *child_thread(void *arg)
{
unsigned long total_mmap = 0, total = 0;
@@ -135,6 +157,7 @@ void *child_thread(void *arg)
void *addr = NULL;
double throughput;
struct rusage ru;
+ size_t buffer_sz;
int lu, fd;
fd = (int)(unsigned long)arg;
@@ -142,9 +165,9 @@ void *child_thread(void *arg)
gettimeofday(&t0, NULL);
fcntl(fd, F_SETFL, O_NDELAY);
- buffer = malloc(chunk_size);
- if (!buffer) {
- perror("malloc");
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
+ if (buffer == (void *)-1) {
+ perror("mmap");
goto error;
}
if (zflg) {
@@ -179,6 +202,10 @@ void *child_thread(void *arg)
total_mmap += zc.length;
if (xflg)
hash_zone(addr, zc.length);
+ /* It is more efficient to unmap the pages right now,
+ * instead of doing this in next TCP_ZEROCOPY_RECEIVE.
+ */
+ madvise(addr, zc.length, MADV_DONTNEED);
total += zc.length;
}
if (zc.recv_skip_hint) {
@@ -230,7 +257,7 @@ end:
ru.ru_nvcsw);
}
error:
- free(buffer);
+ munmap(buffer, buffer_sz);
close(fd);
if (zflg)
munmap(raddr, chunk_size + map_align);
@@ -347,6 +374,7 @@ int main(int argc, char *argv[])
uint64_t total = 0;
char *host = NULL;
int fd, c, on = 1;
+ size_t buffer_sz;
char *buffer;
int sflg = 0;
int mss = 0;
@@ -437,8 +465,8 @@ int main(int argc, char *argv[])
}
do_accept(fdlisten);
}
- buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
if (buffer == (char *)-1) {
perror("mmap");
exit(1);
@@ -484,6 +512,6 @@ int main(int argc, char *argv[])
total += wr;
}
close(fd);
- munmap(buffer, chunk_size);
+ munmap(buffer, buffer_sz);
return 0;
}
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
new file mode 100755
index 000000000000..23cf924754a5
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -0,0 +1,626 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
+# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved.
+#
+# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS.
+#
+#
+# Symmetric routing topology
+#
+# blue red
+# +----+ .253 +----+ .253 +----+
+# | h1 |-------------------| r1 |-------------------| h2 |
+# +----+ .1 +----+ .2 +----+
+# 172.16.1/24 172.16.2/24
+# 2001:db8:16:1/64 2001:db8:16:2/64
+#
+#
+# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route
+# to the outgoing vrf red for the n2 network and red has a route back to n1.
+# The red VRF interface has a MTU of 1400.
+#
+# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the
+# output of the command to check that a ttl expired error is received.
+#
+# The second test runs traceroute from h1 to h2 and parses the output to check
+# for a hop on r1.
+#
+# The third test sends a ping with a packet size of 1450 from h1 to h2 and
+# parses the output of the command to check that a fragmentation error is
+# received.
+#
+#
+# Asymmetric routing topology
+#
+# This topology represents a customer setup where the issue with icmp errors
+# and VRF route leaking was initialy reported. The MTU test isn't done here
+# because of the lack of a return route in the red VRF.
+#
+# blue red
+# .253 +----+ .253
+# +----| r1 |----+
+# | +----+ |
+# +----+ | | +----+
+# | h1 |--------------+ +--------------| h2 |
+# +----+ .1 | | .2 +----+
+# 172.16.1/24 | +----+ | 172.16.2/24
+# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64
+# .254 +----+ .254
+#
+#
+# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the
+# outgoing vrf red for the n2 network but red doesn't have a route back to n1.
+# Route from h2 to h1 goes through r2.
+#
+# The objective is to check that the incoming vrf routing table is selected
+# to send an ICMP error back to the source when the ttl of a packet reaches 1
+# while it is forwarded between different vrfs.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+DEFAULT_TTYPE=sym
+
+H1_N1=172.16.1.0/24
+H1_N1_6=2001:db8:16:1::/64
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+
+H2_N2_IP=172.16.2.2
+R1_N2_IP=172.16.2.253
+R2_N2_IP=172.16.2.254
+
+H2_N2_IP6=2001:db8:16:2::2
+R1_N2_IP6=2001:db8:16:2::253
+R2_N2_IP6=2001:db8:16:2::254
+
+################################################################################
+# helpers
+
+log_section()
+{
+ echo
+ echo "###########################################################################"
+ echo "$*"
+ echo "###########################################################################"
+ echo
+}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read -r a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+run_cmd_grep()
+{
+ local grep_pattern="$1"
+ shift
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ echo "$out" | grep -q "$grep_pattern"
+ rc=$?
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+ local ns
+
+ for ns in h1 h2 r1 r2; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+setup_vrf()
+{
+ local ns=$1
+
+ ip -netns "${ns}" rule del pref 0
+ ip -netns "${ns}" rule add pref 32765 from all lookup local
+ ip -netns "${ns}" -6 rule del pref 0
+ ip -netns "${ns}" -6 rule add pref 32765 from all lookup local
+}
+
+create_vrf()
+{
+ local ns=$1
+ local vrf=$2
+ local table=$3
+
+ ip -netns "${ns}" link add "${vrf}" type vrf table "${table}"
+ ip -netns "${ns}" link set "${vrf}" up
+ ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192
+ ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192
+
+ ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}"
+ ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad
+}
+
+setup_sym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
+
+ #
+ # h2
+ #
+ ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 up
+
+ # h2 to h1 via r1
+ ip -netns h2 route add default via ${R1_N2_IP} dev eth0
+ ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # Route leak from red to blue
+ ip -netns r1 route add vrf red ${H1_N1} dev blue
+ ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
+
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+setup_asym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1 r2; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h1 link add eth1 type veth peer name r2h1
+ ip -netns h1 link set r2h1 netns r2 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ ip -netns h2 link add eth1 type veth peer name r2h2
+ ip -netns h2 link set r2h2 netns r2 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 link add br0 type bridge
+ ip -netns h1 link set br0 up
+ ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 master br0 up
+ ip -netns h1 link set eth1 master br0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
+
+ #
+ # h2
+ #
+ ip -netns h2 link add br0 type bridge
+ ip -netns h2 link set br0 up
+ ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 master br0 up
+ ip -netns h2 link set eth1 master br0 up
+
+ # h2 to h1 via r2
+ ip -netns h2 route add default via ${R2_N2_IP} dev br0
+ ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # No route leak from red to blue
+
+ #
+ # r2
+ #
+ ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+ ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+ ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
+ ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+check_connectivity()
+{
+ ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv4 connectivity"
+ return $?
+}
+
+check_connectivity6()
+{
+ ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv6 connectivity"
+ return $?
+}
+
+check_traceroute()
+{
+ if [ ! -x "$(command -v traceroute)" ]; then
+ echo "SKIP: Could not run IPV4 test without traceroute"
+ return 1
+ fi
+}
+
+check_traceroute6()
+{
+ if [ ! -x "$(command -v traceroute6)" ]; then
+ echo "SKIP: Could not run IPV6 test without traceroute6"
+ return 1
+ fi
+}
+
+ipv4_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute || return
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
+ log_test $? 0 "Traceroute reports a hop on r1"
+}
+
+ipv4_traceroute_asym()
+{
+ ipv4_traceroute asym
+}
+
+ipv6_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute6 || return
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
+ log_test $? 0 "Traceroute6 reports a hop on r1"
+}
+
+ipv6_traceroute_asym()
+{
+ ipv6_traceroute asym
+}
+
+ipv4_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP ttl exceeded"
+}
+
+ipv4_ping_ttl_asym()
+{
+ ipv4_ping_ttl asym
+}
+
+ipv4_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP Frag needed"
+}
+
+ipv4_ping_frag_asym()
+{
+ ipv4_ping_frag asym
+}
+
+ipv6_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Hop limit"
+}
+
+ipv6_ping_ttl_asym()
+{
+ ipv6_ping_ttl asym
+}
+
+ipv6_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Packet too big"
+}
+
+ipv6_ping_frag_asym()
+{
+ ipv6_ping_frag asym
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -4 Run IPv4 tests only
+ -6 Run IPv6 tests only
+ -t TEST Run only TEST
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
+
+TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym"
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :46t:pvh o
+do
+ case $o in
+ 4) TESTS=ipv4;;
+ 6) TESTS=ipv6;;
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+#
+# show user test config
+#
+if [ -z "$TESTS" ]; then
+ TESTS="$TESTS_IPV4 $TESTS_IPV6"
+elif [ "$TESTS" = "ipv4" ]; then
+ TESTS="$TESTS_IPV4"
+elif [ "$TESTS" = "ipv6" ]; then
+ TESTS="$TESTS_IPV6"
+fi
+
+for t in $TESTS
+do
+ case $t in
+ ipv4_ping_ttl|ping) ipv4_ping_ttl;;&
+ ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;&
+ ipv4_traceroute|traceroute) ipv4_traceroute;;&
+ ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
+ ipv4_ping_frag|ping) ipv4_ping_frag;;&
+
+ ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
+ ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
+ ipv6_traceroute|traceroute) ipv6_traceroute;;&
+ ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
+ ipv6_ping_frag|ping) ipv6_ping_frag;;&
+
+ # setup namespaces and config, but do not run any tests
+ setup_sym|setup) setup_sym; exit 0;;
+ setup_asym) setup_asym; exit 0;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+cleanup
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
new file mode 100644
index 000000000000..8448f74adfec
--- /dev/null
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+nf-queue
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
index 29c73bce38fa..9e56b9d47037 100644
--- a/tools/testing/selftests/netfilter/nf-queue.c
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -17,9 +17,12 @@
struct options {
bool count_packets;
+ bool gso_enabled;
int verbose;
unsigned int queue_num;
unsigned int timeout;
+ uint32_t verdict;
+ uint32_t delay_ms;
};
static unsigned int queue_stats[5];
@@ -27,7 +30,7 @@ static struct options opts;
static void help(const char *p)
{
- printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+ printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
@@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
}
static struct nlmsghdr *
-nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
{
struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd),
@@ -189,9 +192,6 @@ static void print_stats(void)
unsigned int last, total;
int i;
- if (!opts.count_packets)
- return;
-
total = 0;
last = queue_stats[0];
@@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void)
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
- flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+ flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
+ flags |= NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
@@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void)
return nl;
}
+static void sleep_ms(uint32_t delay)
+{
+ struct timespec ts = { .tv_sec = delay / 1000 };
+
+ delay %= 1000;
+
+ ts.tv_nsec = delay * 1000llu * 1000llu;
+
+ nanosleep(&ts, NULL);
+}
+
static int mainloop(void)
{
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
@@ -278,7 +290,7 @@ static int mainloop(void)
ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) {
- if (errno == ENOBUFS)
+ if (errno == ENOBUFS || errno == EINTR)
continue;
if (errno == EAGAIN) {
@@ -298,7 +310,10 @@ static int mainloop(void)
}
id = ret - MNL_CB_OK;
- nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+ if (opts.delay_ms)
+ sleep_ms(opts.delay_ms);
+
+ nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
@@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+ while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
@@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv)
if (opts.queue_num > 0xffff)
opts.queue_num = 0;
break;
+ case 'Q':
+ opts.verdict = atoi(optarg);
+ if (opts.verdict > 0xffff) {
+ fprintf(stderr, "Expected destination queue number\n");
+ exit(1);
+ }
+
+ opts.verdict <<= 16;
+ opts.verdict |= NF_QUEUE;
+ break;
+ case 'd':
+ opts.delay_ms = atoi(optarg);
+ if (opts.delay_ms == 0) {
+ fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
+ exit(1);
+ }
+ break;
case 't':
opts.timeout = atoi(optarg);
break;
+ case 'G':
+ opts.gso_enabled = false;
+ break;
case 'v':
opts.verbose++;
break;
}
}
+
+ if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
+ fprintf(stderr, "Cannot use same destination and source queue\n");
+ exit(1);
+ }
}
int main(int argc, char *argv[])
{
int ret;
+ opts.verdict = NF_ACCEPT;
+ opts.gso_enabled = true;
+
parse_opts(argc, argv);
ret = mainloop();
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
index d250b84dd5bc..087f0e6e71ce 100755
--- a/tools/testing/selftests/netfilter/nft_meta.sh
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -7,8 +7,7 @@ ksft_skip=4
sfx=$(mktemp -u "XXXXXXXX")
ns0="ns0-$sfx"
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! nft --version > /dev/null 2>&1; then
echo "SKIP: Could not run test without nft tool"
exit $ksft_skip
fi
@@ -24,6 +23,8 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo
trap cleanup EXIT
+currentyear=$(date +%G)
+lastyear=$((currentyear-1))
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table inet filter {
counter iifcount {}
@@ -33,6 +34,9 @@ table inet filter {
counter infproto4count {}
counter il4protocounter {}
counter imarkcounter {}
+ counter icpu0counter {}
+ counter ilastyearcounter {}
+ counter icurrentyearcounter {}
counter oifcount {}
counter oifnamecount {}
@@ -54,6 +58,9 @@ table inet filter {
meta nfproto ipv4 counter name "infproto4count"
meta l4proto icmp counter name "il4protocounter"
meta mark 42 counter name "imarkcounter"
+ meta cpu 0 counter name "icpu0counter"
+ meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
+ meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
}
chain output {
@@ -84,11 +91,10 @@ check_one_counter()
local want="packets $2"
local verbose="$3"
- cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want")
- if [ $? -ne 0 ];then
+ if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
echo "FAIL: $cname, want \"$want\", got"
ret=1
- ip netns exec "$ns0" nft list counter inet filter $counter
+ ip netns exec "$ns0" nft list counter inet filter $cname
fi
}
@@ -100,8 +106,7 @@ check_lo_counters()
for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
- il4protocounter \
- ol4protocounter \
+ il4protocounter icurrentyearcounter ol4protocounter \
; do
check_one_counter "$counter" "$want" "$verbose"
done
@@ -116,9 +121,22 @@ check_one_counter oskuidcounter "1" true
check_one_counter oskgidcounter "1" true
check_one_counter imarkcounter "1" true
check_one_counter omarkcounter "1" true
+check_one_counter ilastyearcounter "0" true
if [ $ret -eq 0 ];then
echo "OK: nftables meta iif/oif counters at expected values"
+else
+ exit $ret
+fi
+
+#First CPU execution and counter
+taskset -p 01 $$ > /dev/null
+ip netns exec "$ns0" nft reset counters > /dev/null
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
+check_one_counter icpu0counter "2" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta cpu counter at expected values"
fi
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 6898448b4266..3d202b90b33d 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx"
+timeout=4
cleanup()
{
@@ -20,6 +21,7 @@ cleanup()
ip netns del ${nsrouter}
rm -f "$TMPFILE0"
rm -f "$TMPFILE1"
+ rm -f "$TMPFILE2" "$TMPFILE3"
}
nft --version > /dev/null 2>&1
@@ -42,6 +44,8 @@ fi
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
trap cleanup EXIT
ip netns add ${ns1}
@@ -83,7 +87,7 @@ load_ruleset() {
local name=$1
local prio=$2
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet $name {
chain nfq {
ip protocol icmp queue bypass
@@ -118,7 +122,7 @@ EOF
load_counter_ruleset() {
local prio=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet countrules {
chain pre {
type filter hook prerouting priority $prio; policy accept;
@@ -175,7 +179,7 @@ test_ping_router() {
test_queue_blackhole() {
local proto=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table $proto blackh {
chain forward {
type filter hook forward priority 0; policy accept;
@@ -184,10 +188,10 @@ table $proto blackh {
}
EOF
if [ $proto = "ip" ] ;then
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
lret=$?
elif [ $proto = "ip6" ]; then
- ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
lret=$?
else
lret=111
@@ -214,8 +218,8 @@ test_queue()
local last=""
# spawn nf-queue listeners
- ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
sleep 1
test_ping
ret=$?
@@ -250,11 +254,11 @@ test_queue()
test_tcp_forward()
{
- ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+ ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
local nfqpid=$!
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
@@ -270,15 +274,13 @@ test_tcp_forward()
test_tcp_localhost()
{
- tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
-
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$!
sleep 1
@@ -287,6 +289,47 @@ test_tcp_localhost()
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+}
+EOF
+ tmpfile=$(mktemp) || exit 1
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
+ ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
+
+ # nfqueue 1 will be called via output hook. But this time,
+ # re-queue the packet to nfqueue program on queue 2.
+ ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
+
+ sleep 1
+ ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+ rm -f "$tmpfile"
+
+ wait
+
+ if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+ echo "FAIL: lost packets during requeue?!" 1>&2
+ return
+ fi
+
+ echo "PASS: tcp via loopback and re-queueing"
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -328,5 +371,6 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
+test_tcp_localhost_requeue
exit $ret
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index a2c80914e3dc..01f8d3c0cf2c 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -46,6 +46,10 @@
#define __NR_pidfd_getfd -1
#endif
+#ifndef PIDFD_NONBLOCK
+#define PIDFD_NONBLOCK O_NONBLOCK
+#endif
+
/*
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
* That means, when it wraps around any pid < 300 will be skipped.
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 7dca1aa4672d..1f085b922c6e 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options)
pid_t create_child(int *pidfd, unsigned flags)
{
- struct clone_args args = {
+ struct __clone_args args = {
.flags = CLONE_PIDFD | flags,
.exit_signal = SIGCHLD,
.pidfd = ptr_to_u64(pidfd),
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 7079f8eef792..be2943f072f6 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -17,10 +17,15 @@
#include <unistd.h>
#include "pidfd.h"
-#include "../kselftest.h"
+#include "../kselftest_harness.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
static pid_t sys_clone3(struct clone_args *args)
{
return syscall(__NR_clone3, args, sizeof(struct clone_args));
@@ -32,9 +37,8 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
return syscall(__NR_waitid, which, pid, info, options, ru);
}
-static int test_pidfd_wait_simple(void)
+TEST(wait_simple)
{
- const char *test_name = "pidfd wait simple";
int pidfd = -1, status = 0;
pid_t parent_tid = -1;
struct clone_args args = {
@@ -50,76 +54,40 @@ static int test_pidfd_wait_simple(void)
};
pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
- if (pidfd < 0)
- ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid == 0)
- ksft_exit_fail_msg(
- "%s test: succeeded to wait on invalid pidfd %s\n",
- test_name, strerror(errno));
- close(pidfd);
+ ASSERT_NE(pid, 0);
+ EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
- if (pidfd == 0)
- ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid == 0)
- ksft_exit_fail_msg(
- "%s test: succeeded to wait on invalid pidfd %s\n",
- test_name, strerror(errno));
- close(pidfd);
+ ASSERT_NE(pid, 0);
+ EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
pid = sys_clone3(&args);
- if (pid < 0)
- ksft_exit_fail_msg("%s test: failed to create new process %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pid, 0);
if (pid == 0)
exit(EXIT_SUCCESS);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status))
- ksft_exit_fail_msg(
- "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
- close(pidfd);
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_EXITED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ksft_test_result_pass("%s test: Passed\n", test_name);
- return 0;
+ ASSERT_GE(pid, 0);
+ ASSERT_EQ(WIFEXITED(info.si_status), true);
+ ASSERT_EQ(WEXITSTATUS(info.si_status), 0);
+ EXPECT_EQ(close(pidfd), 0);
+
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_EXITED);
+ ASSERT_EQ(info.si_pid, parent_tid);
}
-static int test_pidfd_wait_states(void)
+TEST(wait_states)
{
- const char *test_name = "pidfd wait states";
int pidfd = -1, status = 0;
pid_t parent_tid = -1;
struct clone_args args = {
@@ -135,9 +103,7 @@ static int test_pidfd_wait_states(void)
};
pid = sys_clone3(&args);
- if (pid < 0)
- ksft_exit_fail_msg("%s test: failed to create new process %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pid, 0);
if (pid == 0) {
kill(getpid(), SIGSTOP);
@@ -145,127 +111,115 @@ static int test_pidfd_wait_states(void)
exit(EXIT_SUCCESS);
}
- ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_STOPPED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_CONTINUED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_STOPPED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
- ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_KILLED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- close(pidfd);
-
- ksft_test_result_pass("%s test: Passed\n", test_name);
- return 0;
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_CONTINUED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_KILLED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ EXPECT_EQ(close(pidfd), 0);
}
-int main(int argc, char **argv)
+TEST(wait_nonblock)
{
- ksft_print_header();
- ksft_set_plan(2);
+ int pidfd, status = 0;
+ unsigned int flags = 0;
+ pid_t parent_tid = -1;
+ struct clone_args args = {
+ .parent_tid = ptr_to_u64(&parent_tid),
+ .flags = CLONE_PARENT_SETTID,
+ .exit_signal = SIGCHLD,
+ };
+ int ret;
+ pid_t pid;
+ siginfo_t info = {
+ .si_signo = 0,
+ };
+
+ /*
+ * Callers need to see ECHILD with non-blocking pidfds when no child
+ * processes exists.
+ */
+ pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK);
+ EXPECT_GE(pidfd, 0) {
+ /* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+ ASSERT_EQ(errno, EINVAL);
+ SKIP(return, "Skipping PIDFD_NONBLOCK test");
+ }
+
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, ECHILD);
+ EXPECT_EQ(close(pidfd), 0);
+
+ pid = sys_clone3(&args);
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ kill(getpid(), SIGSTOP);
+ exit(EXIT_SUCCESS);
+ }
- test_pidfd_wait_simple();
- test_pidfd_wait_states();
+ pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK);
+ EXPECT_GE(pidfd, 0) {
+ /* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+ ASSERT_EQ(errno, EINVAL);
+ SKIP(return, "Skipping PIDFD_NONBLOCK test");
+ }
+
+ flags = fcntl(pidfd, F_GETFL, 0);
+ ASSERT_GT(flags, 0);
+ ASSERT_GT((flags & O_NONBLOCK), 0);
+
+ /*
+ * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when
+ * child processes exist but none have exited.
+ */
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EAGAIN);
+
+ /*
+ * Callers need to continue seeing 0 with non-blocking pidfd and
+ * WNOHANG raised explicitly when child processes exist but none have
+ * exited.
+ */
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL);
+ ASSERT_EQ(ret, 0);
- return ksft_exit_pass();
+ ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_EXITED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ EXPECT_EQ(close(pidfd), 0);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 55ef15184057..2a0503bc7e49 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -55,8 +55,6 @@
#include <setjmp.h>
#include <signal.h>
-#include <asm/cputable.h>
-
#include "utils.h"
#include "instructions.h"
@@ -64,6 +62,7 @@ int bufsize;
int debug;
int testing;
volatile int gotsig;
+bool prefixes_enabled;
char *cipath = "/dev/fb0";
long cioffset;
@@ -77,7 +76,12 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
}
gotsig = sig;
#ifdef __powerpc64__
- ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ if (prefixes_enabled) {
+ u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP];
+ ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4);
+ } else {
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ }
#else
ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
#endif
@@ -648,6 +652,8 @@ int main(int argc, char *argv[])
exit(1);
}
+ prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1);
+
rc |= test_harness(test_alignment_handler_vsx_206,
"test_alignment_handler_vsx_206");
rc |= test_harness(test_alignment_handler_vsx_207,
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index d50cc05df495..96554e2794d1 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -481,6 +481,12 @@ int main(int argc, char *argv[])
else
printf("futex");
+ if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC))
+ touch_altivec = 0;
+
+ if (!have_hwcap(PPC_FEATURE_HAS_VSX))
+ touch_vector = 0;
+
printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no",
touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ddaf140b8255..994b11af765c 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -12,4 +12,4 @@ memcpy_p7_t1
copyuser_64_exc_t0
copyuser_64_exc_t1
copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983a1c78..3095b1f1c02b 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
- memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
+ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
-o $@ $^
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
- -D COPY_LOOP=test_memcpy_mcsafe \
+ -D COPY_LOOP=test_copy_mc_generic \
-o $@ $^
$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 120000
index 000000000000..dcbe06d500fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copy_mc_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3062f6..000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index cfa6eedcb66c..845db6273a1b 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -10,4 +10,4 @@ include ../../lib.mk
$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
index 288a4e2ad156..e76611e608af 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -63,6 +63,8 @@ int dscr_default(void)
unsigned long i, *status[THREADS];
unsigned long orig_dscr_default;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
/* Initial DSCR default */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
index aefcd8d8759b..32fcf2b324b1 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -21,6 +21,8 @@ int dscr_explicit(void)
{
unsigned long i, dscr = 0;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
srand(getpid());
set_dscr(dscr);
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
index 7c1cb46397c6..c6a81b2d6b91 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -44,6 +44,8 @@ int dscr_inherit_exec(void)
unsigned long i, dscr = 0;
pid_t pid;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
for (i = 0; i < COUNT; i++) {
dscr++;
if (dscr > DSCR_MAX)
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
index 04297a69ab59..f9dfd3d3c2d5 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -22,6 +22,8 @@ int dscr_inherit(void)
unsigned long i, dscr = 0;
pid_t pid;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
srand(getpid());
set_dscr(dscr);
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index 02f6b4efde14..fbbdffdb2e5d 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -77,6 +77,8 @@ int dscr_sysfs(void)
unsigned long orig_dscr_default;
int i, j;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
for (i = 0; i < COUNT; i++) {
for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
index 37be2c25f277..191ed126f118 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -56,6 +56,8 @@ int dscr_sysfs_thread(void)
unsigned long orig_dscr_default;
int i, j;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
for (i = 0; i < COUNT; i++) {
for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
index eaf785d11eed..e09072446dd3 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -36,6 +36,8 @@ int dscr_user(void)
{
int i;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
check_dscr("");
for (i = 0; i < COUNT; i++) {
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
index 8a8d0f456946..0d783e1065c8 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
@@ -1,17 +1,19 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only
+KSELFTESTS_SKIP=4
+
. ./eeh-functions.sh
if ! eeh_supported ; then
echo "EEH not supported on this system, skipping"
- exit 0;
+ exit $KSELFTESTS_SKIP;
fi
if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
[ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
echo "debugfs EEH testing files are missing. Is debugfs mounted?"
- exit 1;
+ exit $KSELFTESTS_SKIP;
fi
pre_lspci=`mktemp`
@@ -84,4 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)"
lspci | diff -u $pre_lspci -
rm -f $pre_lspci
-exit $failed
+test "$failed" == 0
+exit $?
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 71d2924f5b8b..052b5a775dc2 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -12,6 +12,7 @@
#include <stdbool.h>
#include <linux/auxvec.h>
#include <linux/perf_event.h>
+#include <asm/cputable.h>
#include "reg.h"
/* Avoid headaches with PRI?64 - just use %ll? always */
@@ -35,7 +36,6 @@ int pick_online_cpu(void);
int read_debugfs_file(char *debugfs_file, int *result);
int write_debugfs_file(char *debugfs_file, int result);
int read_sysfs_file(char *debugfs_file, char *result, size_t result_size);
-void set_dscr(unsigned long val);
int perf_event_open_counter(unsigned int type,
unsigned long config, int group_fd);
int perf_event_enable(int fd);
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index a864ed7e2008..fd747b2ffcfc 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -139,5 +139,6 @@ static int test(void)
int main(void)
{
+ test_harness_set_timeout(300);
return test_harness(test, "bad_accesses");
}
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
index 2980abca31e0..2070a1e2b3a5 100644
--- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -9,7 +9,6 @@
#include <stdbool.h>
#include <string.h>
#include <sys/prctl.h>
-#include <asm/cputable.h>
#include "event.h"
#include "utils.h"
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
index a96d512a18c4..a5dfa9bf3b9f 100644
--- a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -20,6 +20,9 @@ static int l3_bank_test(void)
char *p;
int i;
+ // The L3 bank logic is only used on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
p = malloc(MALLOC_SIZE);
FAIL_IF(!p);
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2d37942bf72b..ad32a09a6540 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -12,8 +12,6 @@
#include <string.h>
#include <sys/prctl.h>
-#include <asm/cputable.h>
-
#include "event.h"
#include "lib.h"
#include "utils.h"
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index fc477dfe86a2..2e0d86e0687e 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -20,6 +20,8 @@
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <linux/limits.h>
#include "ptrace.h"
#define SPRN_PVR 0x11F
@@ -44,6 +46,7 @@ struct gstruct {
};
static volatile struct gstruct gstruct __attribute__((aligned(512)));
+static volatile char cwd[PATH_MAX] __attribute__((aligned(8)));
static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
{
@@ -138,6 +141,9 @@ static void test_workload(void)
write_var(len);
}
+ /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+ syscall(__NR_getcwd, &cwd, PATH_MAX);
+
/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
write_var(1);
@@ -150,6 +156,9 @@ static void test_workload(void)
else
read_var(1);
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+ syscall(__NR_getcwd, &cwd, PATH_MAX);
+
/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
gstruct.a[rand() % A_LEN] = 'a';
@@ -293,6 +302,24 @@ static int test_set_debugreg(pid_t child_pid)
return 0;
}
+static int test_set_debugreg_kernel_userspace(pid_t child_pid)
+{
+ unsigned long wp_addr = (unsigned long)cwd;
+ char *name = "PTRACE_SET_DEBUGREG";
+
+ /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+ wp_addr &= ~0x7UL;
+ wp_addr |= (1Ul << DABR_READ_SHIFT);
+ wp_addr |= (1UL << DABR_WRITE_SHIFT);
+ wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+ ptrace_set_debugreg(child_pid, wp_addr);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8);
+
+ ptrace_set_debugreg(child_pid, 0);
+ return 0;
+}
+
static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
unsigned long addr, int len)
{
@@ -338,6 +365,22 @@ static void test_sethwdebug_exact(pid_t child_pid)
ptrace_delhwdebug(child_pid, wh);
}
+static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info;
+ unsigned long wp_addr = (unsigned long)&cwd;
+ char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT";
+ int len = 1; /* hardcoded in kernel */
+ int wh;
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+}
+
static void test_sethwdebug_range_aligned(pid_t child_pid)
{
struct ppc_hw_breakpoint info;
@@ -452,9 +495,10 @@ static void
run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
{
test_set_debugreg(child_pid);
+ test_set_debugreg_kernel_userspace(child_pid);
+ test_sethwdebug_exact(child_pid);
+ test_sethwdebug_exact_kernel_userspace(child_pid);
if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) {
- test_sethwdebug_exact(child_pid);
-
test_sethwdebug_range_aligned(child_pid);
if (dawr || is_8xx) {
test_sethwdebug_range_unaligned(child_pid);
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 0a7d0afb26b8..93a65bd1f231 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -10,6 +10,7 @@
#include <stdint.h>
#include <malloc.h>
#include <unistd.h>
+#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -41,6 +42,40 @@ static void syscall_loop(char *p, unsigned long iterations,
}
}
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+ static int warned = 0;
+ ucontext_t *ctx = (ucontext_t *)unused;
+ unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+ /* mtspr 3,RS to check for move to DSCR below */
+ if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+ if (!warned++)
+ printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+ *pc += 4;
+ } else {
+ printf("SIGILL at %p\n", pc);
+ abort();
+ }
+}
+
+static void set_dscr(unsigned long val)
+{
+ static int init = 0;
+ struct sigaction sa;
+
+ if (!init) {
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigill_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGILL, &sa, NULL))
+ perror("sigill_handler");
+ init = 1;
+ }
+
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
int rfi_flush_test(void)
{
char *p;
@@ -54,6 +89,9 @@ int rfi_flush_test(void)
SKIP_IF(geteuid() != 0);
+ // The PMU event we use only works on Power7 or later
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
perror("Unable to read powerpc/rfi_flush debugfs file");
SKIP_IF(1);
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index c8d82b784102..adc2b7294e5f 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -134,6 +134,9 @@ int spectre_v2_test(void)
s64 miss_percent;
bool is_p9;
+ // The PMU events we use only work on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
state = get_sysfs_state();
if (state == UNKNOWN) {
printf("Error: couldn't determine spectre_v2 mitigation state?\n");
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index 979df3d98368..cb2f18855c8d 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -4,7 +4,7 @@
#include <string.h>
#include <sys/mman.h>
#include <time.h>
-#include <asm/cputable.h>
+
#include "utils.h"
#define SIZE 256
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
index cc4930467235..7887f78cf072 100644
--- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -3,9 +3,13 @@
.data
.balign 8
-message:
+success_message:
.ascii "success: switch_endian_test\n\0"
+ .balign 8
+failure_message:
+ .ascii "failure: switch_endian_test\n\0"
+
.section ".toc"
.balign 8
pattern:
@@ -64,6 +68,9 @@ FUNC_START(_start)
li r0, __NR_switch_endian
sc
+ tdi 0, 0, 0x48 // b +8 if the endian was switched
+ b .Lfail // exit if endian didn't switch
+
#include "check-reversed.S"
/* Flip back, r0 already has the switch syscall number */
@@ -71,12 +78,20 @@ FUNC_START(_start)
#include "check.S"
+ ld r4, success_message@got(%r2)
+ li r5, 28 // strlen(success_message)
+ li r14, 0 // exit status
+.Lout:
li r0, __NR_write
li r3, 1 /* stdout */
- ld r4, message@got(%r2)
- li r5, 28 /* strlen(message3) */
sc
li r0, __NR_exit
- li r3, 0
+ mr r3, r14
sc
b .
+
+.Lfail:
+ ld r4, failure_message@got(%r2)
+ li r5, 28 // strlen(failure_message)
+ li r14, 1
+ b .Lout
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index 01b22775ca87..b63f8459c704 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-TEST_GEN_PROGS := ipc_unmuxed
+TEST_GEN_PROGS := ipc_unmuxed rtas_filter
CFLAGS += -I../../../../../usr/include
diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
new file mode 100644
index 000000000000..03b487f18d00
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2005-2020 IBM Corporation.
+ *
+ * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/)
+ */
+
+#include <byteswap.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "utils.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x) bswap_32(x)
+#define be32_to_cpu(x) bswap_32(x)
+#else
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#endif
+
+#define RTAS_IO_ASSERT -1098 /* Unexpected I/O Error */
+#define RTAS_UNKNOWN_OP -1099 /* No Firmware Implementation of Function */
+#define BLOCK_SIZE 4096
+#define PAGE_SIZE 4096
+#define MAX_PAGES 64
+
+static const char *ofdt_rtas_path = "/proc/device-tree/rtas";
+
+typedef __be32 uint32_t;
+struct rtas_args {
+ __be32 token;
+ __be32 nargs;
+ __be32 nret;
+ __be32 args[16];
+ __be32 *rets; /* Pointer to return values in args[]. */
+};
+
+struct region {
+ uint64_t addr;
+ uint32_t size;
+ struct region *next;
+};
+
+int read_entire_file(int fd, char **buf, size_t *len)
+{
+ size_t buf_size = 0;
+ size_t off = 0;
+ int rc;
+
+ *buf = NULL;
+ do {
+ buf_size += BLOCK_SIZE;
+ if (*buf == NULL)
+ *buf = malloc(buf_size);
+ else
+ *buf = realloc(*buf, buf_size);
+
+ if (*buf == NULL)
+ return -ENOMEM;
+
+ rc = read(fd, *buf + off, BLOCK_SIZE);
+ if (rc < 0)
+ return -EIO;
+
+ off += rc;
+ } while (rc == BLOCK_SIZE);
+
+ if (len)
+ *len = off;
+
+ return 0;
+}
+
+static int open_prop_file(const char *prop_path, const char *prop_name, int *fd)
+{
+ char *path;
+ int len;
+
+ /* allocate enough for two string, a slash and trailing NULL */
+ len = strlen(prop_path) + strlen(prop_name) + 1 + 1;
+ path = malloc(len);
+ if (path == NULL)
+ return -ENOMEM;
+
+ snprintf(path, len, "%s/%s", prop_path, prop_name);
+
+ *fd = open(path, O_RDONLY);
+ free(path);
+ if (*fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int get_property(const char *prop_path, const char *prop_name,
+ char **prop_val, size_t *prop_len)
+{
+ int rc, fd;
+
+ rc = open_prop_file(prop_path, prop_name, &fd);
+ if (rc)
+ return rc;
+
+ rc = read_entire_file(fd, prop_val, prop_len);
+ close(fd);
+
+ return rc;
+}
+
+int rtas_token(const char *call_name)
+{
+ char *prop_buf = NULL;
+ size_t len;
+ int rc;
+
+ rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len);
+ if (rc < 0) {
+ rc = RTAS_UNKNOWN_OP;
+ goto err;
+ }
+
+ rc = be32_to_cpu(*(int *)prop_buf);
+
+err:
+ free(prop_buf);
+ return rc;
+}
+
+static int read_kregion_bounds(struct region *kregion)
+{
+ char *buf;
+ int fd;
+ int rc;
+
+ fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY);
+ if (fd < 0) {
+ printf("Could not open rmo_buffer file\n");
+ return RTAS_IO_ASSERT;
+ }
+
+ rc = read_entire_file(fd, &buf, NULL);
+ close(fd);
+ if (rc) {
+ free(buf);
+ return rc;
+ }
+
+ sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size);
+ free(buf);
+
+ if (!(kregion->size && kregion->addr) ||
+ (kregion->size > (PAGE_SIZE * MAX_PAGES))) {
+ printf("Unexpected kregion bounds\n");
+ return RTAS_IO_ASSERT;
+ }
+
+ return 0;
+}
+
+static int rtas_call(const char *name, int nargs,
+ int nrets, ...)
+{
+ struct rtas_args args;
+ __be32 *rets[16];
+ int i, rc, token;
+ va_list ap;
+
+ va_start(ap, nrets);
+
+ token = rtas_token(name);
+ if (token == RTAS_UNKNOWN_OP) {
+ // We don't care if the call doesn't exist
+ printf("call '%s' not available, skipping...", name);
+ rc = RTAS_UNKNOWN_OP;
+ goto err;
+ }
+
+ args.token = cpu_to_be32(token);
+ args.nargs = cpu_to_be32(nargs);
+ args.nret = cpu_to_be32(nrets);
+
+ for (i = 0; i < nargs; i++)
+ args.args[i] = (__be32) va_arg(ap, unsigned long);
+
+ for (i = 0; i < nrets; i++)
+ rets[i] = (__be32 *) va_arg(ap, unsigned long);
+
+ rc = syscall(__NR_rtas, &args);
+ if (rc) {
+ rc = -errno;
+ goto err;
+ }
+
+ if (nrets) {
+ *(rets[0]) = be32_to_cpu(args.args[nargs]);
+
+ for (i = 1; i < nrets; i++) {
+ *(rets[i]) = args.args[nargs + i];
+ }
+ }
+
+err:
+ va_end(ap);
+ return rc;
+}
+
+static int test(void)
+{
+ struct region rmo_region;
+ uint32_t rmo_start;
+ uint32_t rmo_end;
+ __be32 rets[1];
+ int rc;
+
+ // Test a legitimate harmless call
+ // Expected: call succeeds
+ printf("Test a permitted call, no parameters... ");
+ rc = rtas_call("get-time-of-day", 0, 1, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+ // Test a prohibited call
+ // Expected: call returns -EINVAL
+ printf("Test a prohibited call... ");
+ rc = rtas_call("nvram-fetch", 0, 1, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Get RMO
+ rc = read_kregion_bounds(&rmo_region);
+ if (rc) {
+ printf("Couldn't read RMO region bounds, skipping remaining cases\n");
+ return 0;
+ }
+ rmo_start = rmo_region.addr;
+ rmo_end = rmo_start + rmo_region.size - 1;
+ printf("RMO range: %08x - %08x\n", rmo_start, rmo_end);
+
+ // Test a permitted call, user-supplied size, buffer inside RMO
+ // Expected: call succeeds
+ printf("Test a permitted call, user-supplied size, buffer inside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+ cpu_to_be32(rmo_end - rmo_start + 1), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, user-supplied size, buffer start outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, user-supplied size, buffer start outside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1),
+ cpu_to_be32(4000), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, user-supplied size, buffer end outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, user-supplied size, buffer end outside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+ cpu_to_be32(rmo_end - rmo_start + 2), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, fixed size, buffer end outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, fixed size, buffer end outside RMO... ");
+ rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test, "rtas_filter");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
index 977558497c16..29e5f26af7b9 100644
--- a/tools/testing/selftests/powerpc/tm/tm-poison.c
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -26,7 +26,7 @@
int tm_poison_test(void)
{
- int pid;
+ int cpu, pid;
cpu_set_t cpuset;
uint64_t poison = 0xdeadbeefc0dec0fe;
uint64_t unknown = 0;
@@ -35,10 +35,13 @@ int tm_poison_test(void)
SKIP_IF(!have_htm());
- /* Attach both Child and Parent to CPU 0 */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Attach both Child and Parent to the same CPU
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ CPU_SET(cpu, &cpuset);
+ FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0);
pid = fork();
if (!pid) {
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 17becf3dcee4..794d574db784 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -33,19 +33,13 @@
#include "utils.h"
#include "tm.h"
-int num_loops = 10000;
+int num_loops = 1000000;
int passed = 1;
void tfiar_tfhar(void *in)
{
- int i, cpu;
unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- cpu = (unsigned long)in >> 1;
- CPU_SET(cpu, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ int i;
/* TFIAR: Last bit has to be high so userspace can read register */
tfiar = ((unsigned long)in) + 1;
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 601f0c1d450d..c75960af8018 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -247,8 +247,7 @@ void *pong(void *not_used)
int tm_trap_test(void)
{
uint16_t k = 1;
-
- int rc;
+ int cpu, rc;
pthread_attr_t attr;
cpu_set_t cpuset;
@@ -267,9 +266,12 @@ int tm_trap_test(void)
usr1_sa.sa_sigaction = usr1_signal_handler;
sigaction(SIGUSR1, &usr1_sa, NULL);
- /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Set only one CPU in the mask. Both threads will be bound to that CPU.
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
+ CPU_SET(cpu, &cpuset);
/* Init pthread attribute */
rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 2ca2fccb0a3e..a1348a5f721a 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -338,16 +338,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
int tm_unavailable_test(void)
{
- int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
+ int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
pthread_t t1;
pthread_attr_t attr;
cpu_set_t cpuset;
SKIP_IF(!have_htm());
- /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Set only one CPU in the mask. Both threads will be bound to that CPU.
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
+ CPU_SET(cpu, &cpuset);
/* Init pthread attribute. */
rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index c402464b038f..c5a1e5c163fc 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -6,9 +6,8 @@
#ifndef _SELFTESTS_POWERPC_TM_TM_H
#define _SELFTESTS_POWERPC_TM_TM_H
-#include <asm/tm.h>
-#include <asm/cputable.h>
#include <stdbool.h>
+#include <asm/tm.h>
#include "utils.h"
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 18b6a773d5c7..1f36ee1a909a 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -10,7 +10,6 @@
#include <fcntl.h>
#include <link.h>
#include <sched.h>
-#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -273,40 +272,6 @@ int perf_event_reset(int fd)
return 0;
}
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
-{
- static int warned = 0;
- ucontext_t *ctx = (ucontext_t *)unused;
- unsigned long *pc = &UCONTEXT_NIA(ctx);
-
- /* mtspr 3,RS to check for move to DSCR below */
- if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
- if (!warned++)
- printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
- *pc += 4;
- } else {
- printf("SIGILL at %p\n", pc);
- abort();
- }
-}
-
-void set_dscr(unsigned long val)
-{
- static int init = 0;
- struct sigaction sa;
-
- if (!init) {
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = sigill_handler;
- sa.sa_flags = SA_SIGINFO;
- if (sigaction(SIGILL, &sa, NULL))
- perror("sigill_handler");
- init = 1;
- }
-
- asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
-}
-
int using_hash_mmu(bool *using_hash)
{
char line[128];
@@ -318,7 +283,9 @@ int using_hash_mmu(bool *using_hash)
rc = 0;
while (fgets(line, sizeof(line), f) != NULL) {
- if (strcmp(line, "MMU : Hash\n") == 0) {
+ if (!strcmp(line, "MMU : Hash\n") ||
+ !strcmp(line, "platform : Cell\n") ||
+ !strcmp(line, "platform : PowerMac\n")) {
*using_hash = true;
goto out;
}
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
index 7bebf9534a86..792318aaa30c 100644
--- a/tools/testing/selftests/ptrace/.gitignore
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
get_syscall_info
peeksiginfo
+vmaccess
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index e8a657a5f48a..384589095864 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: LGPL-2.1
#define _GNU_SOURCE
#include <assert.h>
+#include <linux/membarrier.h>
#include <pthread.h>
#include <sched.h>
+#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -1131,6 +1133,220 @@ static int set_signal_handler(void)
return ret;
}
+struct test_membarrier_thread_args {
+ int stop;
+ intptr_t percpu_list_ptr;
+};
+
+/* Worker threads modify data in their "active" percpu lists. */
+void *test_membarrier_worker_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ const int iters = opt_reps;
+ int i;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Wait for initialization. */
+ while (!atomic_load(&args->percpu_list_ptr)) {}
+
+ for (i = 0; i < iters; ++i) {
+ int ret;
+
+ do {
+ int cpu = rseq_cpu_start();
+
+ ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
+ sizeof(struct percpu_list_entry) * cpu, 1, cpu);
+ } while (rseq_unlikely(ret));
+ }
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+void test_membarrier_init_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ memset(list, 0, sizeof(*list));
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list_node *node;
+
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = 0;
+ node->next = NULL;
+ list->c[i].head = node;
+ }
+}
+
+void test_membarrier_free_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ for (i = 0; i < CPU_SETSIZE; i++)
+ free(list->c[i].head);
+}
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+ return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+/*
+ * The manager thread swaps per-cpu lists that worker threads see,
+ * and validates that there are no unexpected modifications.
+ */
+void *test_membarrier_manager_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ struct percpu_list list_a, list_b;
+ intptr_t expect_a = 0, expect_b = 0;
+ int cpu_a = 0, cpu_b = 0;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Init lists. */
+ test_membarrier_init_percpu_list(&list_a);
+ test_membarrier_init_percpu_list(&list_b);
+
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+
+ while (!atomic_load(&args->stop)) {
+ /* list_a is "active". */
+ cpu_a = rand() % CPU_SETSIZE;
+ /*
+ * As list_b is "inactive", we should never see changes
+ * to list_b.
+ */
+ if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_b "active". */
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
+ if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
+ errno != ENXIO /* missing CPU */) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /*
+ * Cpu A should now only modify list_b, so the values
+ * in list_a should be stable.
+ */
+ expect_a = atomic_load(&list_a.c[cpu_a].head->data);
+
+ cpu_b = rand() % CPU_SETSIZE;
+ /*
+ * As list_a is "inactive", we should never see changes
+ * to list_a.
+ */
+ if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_a "active". */
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+ if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
+ errno != ENXIO /* missing CPU*/) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /* Remember a value from list_b. */
+ expect_b = atomic_load(&list_b.c[cpu_b].head->data);
+ }
+
+ test_membarrier_free_percpu_list(&list_a);
+ test_membarrier_free_percpu_list(&list_b);
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+void test_membarrier(void)
+{
+ const int num_threads = opt_threads;
+ struct test_membarrier_thread_args thread_args;
+ pthread_t worker_threads[num_threads];
+ pthread_t manager_thread;
+ int i, ret;
+
+ if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
+ perror("sys_membarrier");
+ abort();
+ }
+
+ thread_args.stop = 0;
+ thread_args.percpu_list_ptr = 0;
+ ret = pthread_create(&manager_thread, NULL,
+ test_membarrier_manager_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&worker_threads[i], NULL,
+ test_membarrier_worker_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(worker_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ atomic_store(&thread_args.stop, 1);
+ ret = pthread_join(manager_thread, NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+}
+#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
+void test_membarrier(void)
+{
+ fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
+ "Skipping membarrier test.\n");
+}
+#endif
+
static void show_usage(int argc, char **argv)
{
printf("Usage : %s <OPTIONS>\n",
@@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv)
printf(" [-r N] Number of repetitions per thread (default 5000)\n");
printf(" [-d] Disable rseq system call (no initialization)\n");
printf(" [-D M] Disable rseq for each M threads\n");
- printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+ printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
printf(" [-v] Verbose output.\n");
printf(" [-h] Show this help.\n");
@@ -1268,6 +1484,7 @@ int main(int argc, char **argv)
case 'i':
case 'b':
case 'm':
+ case 'r':
break;
default:
show_usage(argc, argv);
@@ -1320,6 +1537,10 @@ int main(int argc, char **argv)
printf_verbose("counter increment\n");
test_percpu_inc();
break;
+ case 'r':
+ printf_verbose("membarrier\n");
+ test_membarrier();
+ break;
}
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index b2da6004fe30..640411518e46 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -279,6 +279,63 @@ error1:
#endif
}
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ * pval = *(ptr+off)
+ * *pval += inc;
+ */
+static inline __attribute__((always_inline))
+int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+#endif
+ /* get p+v */
+ "movq %[ptr], %%rbx\n\t"
+ "addq %[off], %%rbx\n\t"
+ /* get pv */
+ "movq (%%rbx), %%rcx\n\t"
+ /* *pv += inc */
+ "addq %[inc], (%%rcx)\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_abi] "r" (&__rseq_abi),
+ /* final store input */
+ [ptr] "m" (*ptr),
+ [off] "er" (off),
+ [inc] "er" (inc)
+ : "memory", "cc", "rax", "rbx", "rcx"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
static inline __attribute__((always_inline))
int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t newv2,
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
index e426304fd4a0..f51bc83c9e41 100755
--- a/tools/testing/selftests/rseq/run_param_test.sh
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -15,6 +15,7 @@ TEST_LIST=(
"-T m"
"-T m -M"
"-T i"
+ "-T r"
)
TEST_NAME=(
@@ -25,6 +26,7 @@ TEST_NAME=(
"memcpy"
"memcpy with barrier"
"increment"
+ "membarrier"
)
IFS="$OLDIFS"
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
new file mode 100755
index 000000000000..609a4ef9300e
--- /dev/null
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -0,0 +1,93 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run installed kselftest tests.
+#
+BASE_DIR=$(realpath $(dirname $0))
+cd $BASE_DIR
+TESTS="$BASE_DIR"/kselftest-list.txt
+if [ ! -r "$TESTS" ] ; then
+ echo "$0: Could not find list of tests to run ($TESTS)" >&2
+ available=""
+else
+ available="$(cat "$TESTS")"
+fi
+
+. ./kselftest/runner.sh
+ROOT=$PWD
+
+usage()
+{
+ cat <<EOF
+Usage: $0 [OPTIONS]
+ -s | --summary Print summary with detailed log in output.log
+ -t | --test COLLECTION:TEST Run TEST from COLLECTION
+ -c | --collection COLLECTION Run all tests from COLLECTION
+ -l | --list List the available collection:test entries
+ -d | --dry-run Don't actually run any tests
+ -h | --help Show this usage info
+EOF
+ exit $1
+}
+
+COLLECTIONS=""
+TESTS=""
+dryrun=""
+while true; do
+ case "$1" in
+ -s | --summary)
+ logfile="$BASE_DIR"/output.log
+ cat /dev/null > $logfile
+ shift ;;
+ -t | --test)
+ TESTS="$TESTS $2"
+ shift 2 ;;
+ -c | --collection)
+ COLLECTIONS="$COLLECTIONS $2"
+ shift 2 ;;
+ -l | --list)
+ echo "$available"
+ exit 0 ;;
+ -n | --dry-run)
+ dryrun="echo"
+ shift ;;
+ -h | --help)
+ usage 0 ;;
+ "")
+ break ;;
+ *)
+ usage 1 ;;
+ esac
+done
+
+# Add all selected collections to the explicit test list.
+if [ -n "$COLLECTIONS" ]; then
+ for collection in $COLLECTIONS ; do
+ found="$(echo "$available" | grep "^$collection:")"
+ if [ -z "$found" ] ; then
+ echo "No such collection '$collection'" >&2
+ exit 1
+ fi
+ TESTS="$TESTS $found"
+ done
+fi
+# Replace available test list with explicitly selected tests.
+if [ -n "$TESTS" ]; then
+ valid=""
+ for test in $TESTS ; do
+ found="$(echo "$available" | grep "^${test}$")"
+ if [ -z "$found" ] ; then
+ echo "No such test '$test'" >&2
+ exit 1
+ fi
+ valid="$valid $found"
+ done
+ available="$(echo "$valid" | sed -e 's/ /\n/g')"
+fi
+
+collections=$(echo "$available" | cut -d: -f1 | uniq)
+for collection in $collections ; do
+ [ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg
+ tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2)
+ ($dryrun cd "$collection" && $dryrun run_many $tests)
+done
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 7a6d40286a42..4a180439ee9e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -774,8 +774,15 @@ void *kill_thread(void *data)
return (void *)SIBLING_EXIT_UNKILLED;
}
+enum kill_t {
+ KILL_THREAD,
+ KILL_PROCESS,
+ RET_UNKNOWN
+};
+
/* Prepare a thread that will kill itself or both of us. */
-void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+void kill_thread_or_group(struct __test_metadata *_metadata,
+ enum kill_t kill_how)
{
pthread_t thread;
void *status;
@@ -791,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
.len = (unsigned short)ARRAY_SIZE(filter_thread),
.filter = filter_thread,
};
+ int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
struct sock_filter filter_process[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+ BPF_STMT(BPF_RET|BPF_K, kill),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog_process = {
@@ -808,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
}
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
- kill_process ? &prog_process : &prog_thread));
+ kill_how == KILL_THREAD ? &prog_thread
+ : &prog_process));
/*
* Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
* flag cannot be downgraded by a new filter.
*/
- ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+ if (kill_how == KILL_PROCESS)
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
/* Start a thread that will exit immediately. */
ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
@@ -842,7 +852,7 @@ TEST(KILL_thread)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
- kill_thread_or_group(_metadata, false);
+ kill_thread_or_group(_metadata, KILL_THREAD);
_exit(38);
}
@@ -861,7 +871,7 @@ TEST(KILL_process)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
- kill_thread_or_group(_metadata, true);
+ kill_thread_or_group(_metadata, KILL_PROCESS);
_exit(38);
}
@@ -872,6 +882,27 @@ TEST(KILL_process)
ASSERT_EQ(SIGSYS, WTERMSIG(status));
}
+TEST(KILL_unknown)
+{
+ int status;
+ pid_t child_pid;
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ kill_thread_or_group(_metadata, RET_UNKNOWN);
+ _exit(38);
+ }
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+ /* If the entire process was killed, we'll see SIGSYS. */
+ EXPECT_TRUE(WIFSIGNALED(status)) {
+ TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
+ }
+ ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
/* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)
{
@@ -1667,70 +1698,148 @@ TEST_F(TRACE_poke, getpid_runs_normally)
}
#if defined(__x86_64__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM orig_rax
-# define SYSCALL_RET rax
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_rax
+# define SYSCALL_RET(_regs) (_regs).rax
#elif defined(__i386__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM orig_eax
-# define SYSCALL_RET eax
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_eax
+# define SYSCALL_RET(_regs) (_regs).eax
#elif defined(__arm__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM ARM_r7
-# define SYSCALL_RET ARM_r0
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).ARM_r7
+# ifndef PTRACE_SET_SYSCALL
+# define PTRACE_SET_SYSCALL 23
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
+# define SYSCALL_RET(_regs) (_regs).ARM_r0
#elif defined(__aarch64__)
-# define ARCH_REGS struct user_pt_regs
-# define SYSCALL_NUM regs[8]
-# define SYSCALL_RET regs[0]
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).regs[8]
+# ifndef NT_ARM_SYSTEM_CALL
+# define NT_ARM_SYSTEM_CALL 0x404
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ struct iovec __v; \
+ typeof(_nr) __nr = (_nr); \
+ __v.iov_base = &__nr; \
+ __v.iov_len = sizeof(__nr); \
+ EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
+ NT_ARM_SYSTEM_CALL, &__v)); \
+ } while (0)
+# define SYSCALL_RET(_regs) (_regs).regs[0]
#elif defined(__riscv) && __riscv_xlen == 64
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM a7
-# define SYSCALL_RET a0
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).a7
+# define SYSCALL_RET(_regs) (_regs).a0
#elif defined(__csky__)
-# define ARCH_REGS struct pt_regs
-#if defined(__CSKYABIV2__)
-# define SYSCALL_NUM regs[3]
-#else
-# define SYSCALL_NUM regs[9]
-#endif
-# define SYSCALL_RET a0
+# define ARCH_REGS struct pt_regs
+# if defined(__CSKYABIV2__)
+# define SYSCALL_NUM(_regs) (_regs).regs[3]
+# else
+# define SYSCALL_NUM(_regs) (_regs).regs[9]
+# endif
+# define SYSCALL_RET(_regs) (_regs).a0
#elif defined(__hppa__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM gr[20]
-# define SYSCALL_RET gr[28]
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).gr[20]
+# define SYSCALL_RET(_regs) (_regs).gr[28]
#elif defined(__powerpc__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM gpr[0]
-# define SYSCALL_RET gpr[3]
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).gpr[0]
+# define SYSCALL_RET(_regs) (_regs).gpr[3]
+# define SYSCALL_RET_SET(_regs, _val) \
+ do { \
+ typeof(_val) _result = (_val); \
+ /* \
+ * A syscall error is signaled by CR0 SO bit \
+ * and the code is stored as a positive value. \
+ */ \
+ if (_result < 0) { \
+ SYSCALL_RET(_regs) = -result; \
+ (_regs).ccr |= 0x10000000; \
+ } else { \
+ SYSCALL_RET(_regs) = result; \
+ (_regs).ccr &= ~0x10000000; \
+ } \
+ } while (0)
+# define SYSCALL_RET_SET_ON_PTRACE_EXIT
#elif defined(__s390__)
-# define ARCH_REGS s390_regs
-# define SYSCALL_NUM gprs[2]
-# define SYSCALL_RET gprs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# define ARCH_REGS s390_regs
+# define SYSCALL_NUM(_regs) (_regs).gprs[2]
+# define SYSCALL_RET_SET(_regs, _val) \
+ TH_LOG("Can't modify syscall return on this architecture")
#elif defined(__mips__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM regs[2]
-# define SYSCALL_SYSCALL_NUM regs[4]
-# define SYSCALL_RET regs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# include <asm/unistd_nr_n32.h>
+# include <asm/unistd_nr_n64.h>
+# include <asm/unistd_nr_o32.h>
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) \
+ ({ \
+ typeof((_regs).regs[2]) _nr; \
+ if ((_regs).regs[2] == __NR_O32_Linux) \
+ _nr = (_regs).regs[4]; \
+ else \
+ _nr = (_regs).regs[2]; \
+ _nr; \
+ })
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ if ((_regs).regs[2] == __NR_O32_Linux) \
+ (_regs).regs[4] = _nr; \
+ else \
+ (_regs).regs[2] = _nr; \
+ } while (0)
+# define SYSCALL_RET_SET(_regs, _val) \
+ TH_LOG("Can't modify syscall return on this architecture")
#elif defined(__xtensa__)
-# define ARCH_REGS struct user_pt_regs
-# define SYSCALL_NUM syscall
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).syscall
/*
* On xtensa syscall return value is in the register
* a2 of the current window which is not fixed.
*/
-#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2]
+#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2]
#elif defined(__sh__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM gpr[3]
-# define SYSCALL_RET gpr[0]
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).gpr[3]
+# define SYSCALL_RET(_regs) (_regs).gpr[0]
#else
# error "Do not know how to find your architecture's registers and syscalls"
#endif
+/*
+ * Most architectures can change the syscall by just updating the
+ * associated register. This is the default if not defined above.
+ */
+#ifndef SYSCALL_NUM_SET
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ SYSCALL_NUM(_regs) = (_nr); \
+ } while (0)
+#endif
+/*
+ * Most architectures can change the syscall return value by just
+ * writing to the SYSCALL_RET register. This is the default if not
+ * defined above. If an architecture cannot set the return value
+ * (for example when the syscall and return value register is
+ * shared), report it with TH_LOG() in an arch-specific definition
+ * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
+ */
+#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
+# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
+#endif
+#ifndef SYSCALL_RET_SET
+# define SYSCALL_RET_SET(_regs, _val) \
+ do { \
+ SYSCALL_RET(_regs) = (_val); \
+ } while (0)
+#endif
+
/* When the syscall return can't be changed, stub out the tests for it. */
-#ifdef SYSCALL_NUM_RET_SHARE_REG
+#ifndef SYSCALL_RET
# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
#else
# define EXPECT_SYSCALL_RETURN(val, action) \
@@ -1745,116 +1854,92 @@ TEST_F(TRACE_poke, getpid_runs_normally)
} while (0)
#endif
-/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
+/*
+ * Some architectures (e.g. powerpc) can only set syscall
+ * return values on syscall exit during ptrace.
+ */
+const bool ptrace_entry_set_syscall_nr = true;
+const bool ptrace_entry_set_syscall_ret =
+#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
+ true;
+#else
+ false;
+#endif
+
+/*
+ * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
* architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
*/
#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
-#define HAVE_GETREGS
+# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
+# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
+#else
+# define ARCH_GETREGS(_regs) ({ \
+ struct iovec __v; \
+ __v.iov_base = &(_regs); \
+ __v.iov_len = sizeof(_regs); \
+ ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \
+ })
+# define ARCH_SETREGS(_regs) ({ \
+ struct iovec __v; \
+ __v.iov_base = &(_regs); \
+ __v.iov_len = sizeof(_regs); \
+ ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \
+ })
#endif
/* Architecture-specific syscall fetching routine. */
int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
{
ARCH_REGS regs;
-#ifdef HAVE_GETREGS
- EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
- TH_LOG("PTRACE_GETREGS failed");
- return -1;
- }
-#else
- struct iovec iov;
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
- TH_LOG("PTRACE_GETREGSET failed");
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
return -1;
}
-#endif
-#if defined(__mips__)
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
- return regs.SYSCALL_SYSCALL_NUM;
-#endif
- return regs.SYSCALL_NUM;
+ return SYSCALL_NUM(regs);
}
/* Architecture-specific syscall changing routine. */
-void change_syscall(struct __test_metadata *_metadata,
- pid_t tracee, int syscall, int result)
+void __change_syscall(struct __test_metadata *_metadata,
+ pid_t tracee, long *syscall, long *ret)
{
- int ret;
- ARCH_REGS regs;
-#ifdef HAVE_GETREGS
- ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
-#else
- struct iovec iov;
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
- EXPECT_EQ(0, ret) {}
+ ARCH_REGS orig, regs;
-#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
- defined(__s390__) || defined(__hppa__) || defined(__riscv) || \
- defined(__xtensa__) || defined(__csky__) || defined(__sh__)
- {
- regs.SYSCALL_NUM = syscall;
- }
-#elif defined(__mips__)
- {
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
- regs.SYSCALL_SYSCALL_NUM = syscall;
- else
- regs.SYSCALL_NUM = syscall;
- }
+ /* Do not get/set registers if we have nothing to do. */
+ if (!syscall && !ret)
+ return;
-#elif defined(__arm__)
-# ifndef PTRACE_SET_SYSCALL
-# define PTRACE_SET_SYSCALL 23
-# endif
- {
- ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
- EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
+ return;
}
+ orig = regs;
-#elif defined(__aarch64__)
-# ifndef NT_ARM_SYSTEM_CALL
-# define NT_ARM_SYSTEM_CALL 0x404
-# endif
- {
- iov.iov_base = &syscall;
- iov.iov_len = sizeof(syscall);
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
- &iov);
- EXPECT_EQ(0, ret);
- }
+ if (syscall)
+ SYSCALL_NUM_SET(regs, *syscall);
-#else
- ASSERT_EQ(1, 0) {
- TH_LOG("How is the syscall changed on this architecture?");
- }
-#endif
+ if (ret)
+ SYSCALL_RET_SET(regs, *ret);
- /* If syscall is skipped, change return value. */
- if (syscall == -1)
-#ifdef SYSCALL_NUM_RET_SHARE_REG
- TH_LOG("Can't modify syscall return on this architecture");
+ /* Flush any register changes made. */
+ if (memcmp(&orig, &regs, sizeof(orig)) != 0)
+ EXPECT_EQ(0, ARCH_SETREGS(regs));
+}
-#elif defined(__xtensa__)
- regs.SYSCALL_RET(regs) = result;
-#else
- regs.SYSCALL_RET = result;
-#endif
+/* Change only syscall number. */
+void change_syscall_nr(struct __test_metadata *_metadata,
+ pid_t tracee, long syscall)
+{
+ __change_syscall(_metadata, tracee, &syscall, NULL);
+}
-#ifdef HAVE_GETREGS
- ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
-#else
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
- EXPECT_EQ(0, ret);
+/* Change syscall return value (and set syscall number to -1). */
+void change_syscall_ret(struct __test_metadata *_metadata,
+ pid_t tracee, long ret)
+{
+ long syscall = -1;
+
+ __change_syscall(_metadata, tracee, &syscall, &ret);
}
void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
@@ -1872,17 +1957,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
case 0x1002:
/* change getpid to getppid. */
EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, __NR_getppid, 0);
+ change_syscall_nr(_metadata, tracee, __NR_getppid);
break;
case 0x1003:
/* skip gettid with valid return code. */
EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, -1, 45000);
+ change_syscall_ret(_metadata, tracee, 45000);
break;
case 0x1004:
/* skip openat with error. */
EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, -1, -ESRCH);
+ change_syscall_ret(_metadata, tracee, -ESRCH);
break;
case 0x1005:
/* do nothing (allow getppid) */
@@ -1897,12 +1982,21 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
}
+FIXTURE(TRACE_syscall) {
+ struct sock_fprog prog;
+ pid_t tracer, mytid, mypid, parent;
+ long syscall_nr;
+};
+
void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
int status, void *args)
{
- int ret, nr;
+ int ret;
unsigned long msg;
static bool entry;
+ long syscall_nr_val, syscall_ret_val;
+ long *syscall_nr = NULL, *syscall_ret = NULL;
+ FIXTURE_DATA(TRACE_syscall) *self = args;
/*
* The traditional way to tell PTRACE_SYSCALL entry/exit
@@ -1916,24 +2010,48 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
- if (!entry)
- return;
+ /*
+ * Some architectures only support setting return values during
+ * syscall exit under ptrace, and on exit the syscall number may
+ * no longer be available. Therefore, save the initial sycall
+ * number here, so it can be examined during both entry and exit
+ * phases.
+ */
+ if (entry)
+ self->syscall_nr = get_syscall(_metadata, tracee);
- nr = get_syscall(_metadata, tracee);
+ /*
+ * Depending on the architecture's syscall setting abilities, we
+ * pick which things to set during this phase (entry or exit).
+ */
+ if (entry == ptrace_entry_set_syscall_nr)
+ syscall_nr = &syscall_nr_val;
+ if (entry == ptrace_entry_set_syscall_ret)
+ syscall_ret = &syscall_ret_val;
+
+ /* Now handle the actual rewriting cases. */
+ switch (self->syscall_nr) {
+ case __NR_getpid:
+ syscall_nr_val = __NR_getppid;
+ /* Never change syscall return for this case. */
+ syscall_ret = NULL;
+ break;
+ case __NR_gettid:
+ syscall_nr_val = -1;
+ syscall_ret_val = 45000;
+ break;
+ case __NR_openat:
+ syscall_nr_val = -1;
+ syscall_ret_val = -ESRCH;
+ break;
+ default:
+ /* Unhandled, do nothing. */
+ return;
+ }
- if (nr == __NR_getpid)
- change_syscall(_metadata, tracee, __NR_getppid, 0);
- if (nr == __NR_gettid)
- change_syscall(_metadata, tracee, -1, 45000);
- if (nr == __NR_openat)
- change_syscall(_metadata, tracee, -1, -ESRCH);
+ __change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
}
-FIXTURE(TRACE_syscall) {
- struct sock_fprog prog;
- pid_t tracer, mytid, mypid, parent;
-};
-
FIXTURE_VARIANT(TRACE_syscall) {
/*
* All of the SECCOMP_RET_TRACE behaviors can be tested with either
@@ -1992,7 +2110,7 @@ FIXTURE_SETUP(TRACE_syscall)
self->tracer = setup_trace_fixture(_metadata,
variant->use_ptrace ? tracer_ptrace
: tracer_seccomp,
- NULL, variant->use_ptrace);
+ self, variant->use_ptrace);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
@@ -3142,11 +3260,11 @@ skip:
static int user_notif_syscall(int nr, unsigned int flags)
{
struct sock_filter filter[] = {
- BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
@@ -3699,7 +3817,7 @@ TEST(user_notification_filter_empty)
long ret;
int status;
struct pollfd pollfd;
- struct clone_args args = {
+ struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
@@ -3715,7 +3833,7 @@ TEST(user_notification_filter_empty)
if (pid == 0) {
int listener;
- listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
if (listener < 0)
_exit(EXIT_FAILURE);
@@ -3753,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded)
long ret;
int status;
struct pollfd pollfd;
- struct clone_args args = {
+ struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index a9026706d597..30873b19d04b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -3,6 +3,23 @@
uname_M := $(shell uname -m 2>/dev/null || echo not)
MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
+# Without this, failed build products remain, with up-to-date timestamps,
+# thus tricking Make (and you!) into believing that All Is Well, in subsequent
+# make invocations:
+.DELETE_ON_ERROR:
+
+# Avoid accidental wrong builds, due to built-in rules working just a little
+# bit too well--but not quite as well as required for our situation here.
+#
+# In other words, "make userfaultfd" is supposed to fail to build at all,
+# because this Makefile only supports either "make" (all), or "make /full/path".
+# However, the built-in rules, if not suppressed, will pick up CFLAGS and the
+# initial LDLIBS (but not the target-specific LDLIBS, because those are only
+# set for the full path target!). This causes it to get pretty far into building
+# things despite using incorrect values such as an *occasionally* incomplete
+# LDLIBS.
+MAKEFLAGS += --no-builtin-rules
+
CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
LDLIBS = -lrt
TEST_GEN_FILES = compaction_test
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index bcec71250873..9b420140ba2b 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -18,7 +18,8 @@
#include "../kselftest.h"
-#define MAP_SIZE 1048576
+#define MAP_SIZE_MB 100
+#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024)
struct map_list {
void *map;
@@ -165,7 +166,7 @@ int main(int argc, char **argv)
void *map = NULL;
unsigned long mem_free = 0;
unsigned long hugepage_size = 0;
- unsigned long mem_fragmentable = 0;
+ long mem_fragmentable_MB = 0;
if (prereq() != 0) {
printf("Either the sysctl compact_unevictable_allowed is not\n"
@@ -190,9 +191,9 @@ int main(int argc, char **argv)
return -1;
}
- mem_fragmentable = mem_free * 0.8 / 1024;
+ mem_fragmentable_MB = mem_free * 0.8 / 1024;
- while (mem_fragmentable > 0) {
+ while (mem_fragmentable_MB > 0) {
map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
if (map == MAP_FAILED)
@@ -213,7 +214,7 @@ int main(int argc, char **argv)
for (i = 0; i < MAP_SIZE; i += page_size)
*(unsigned long *)(map + i) = (unsigned long)map + i;
- mem_fragmentable--;
+ mem_fragmentable_MB -= MAP_SIZE_MB;
}
for (entry = list; entry != NULL; entry = entry->next) {
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 3ba674b64fa9..69dd0d1aa30b 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -3,3 +3,4 @@ CONFIG_USERFAULTFD=y
CONFIG_TEST_VMALLOC=m
CONFIG_DEVICE_PRIVATE=y
CONFIG_TEST_HMM=m
+CONFIG_GUP_BENCHMARK=y
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 43b4dfe161a2..1d4359341e44 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -15,12 +15,12 @@
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
-#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
-#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
+#define GUP_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark)
-#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark)
+#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
+#define PIN_BENCHMARK _IOWR('g', 4, struct gup_benchmark)
+#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_benchmark)
/* Just the flags we need, copied from mm.h: */
#define FOLL_WRITE 0x01 /* check pte is writable */
@@ -52,6 +52,9 @@ int main(int argc, char **argv)
case 'b':
cmd = PIN_BENCHMARK;
break;
+ case 'L':
+ cmd = PIN_LONGTERM_BENCHMARK;
+ break;
case 'm':
size = atoi(optarg) * MB;
break;
@@ -67,9 +70,6 @@ int main(int argc, char **argv)
case 'T':
thp = 0;
break;
- case 'L':
- cmd = GUP_LONGTERM_BENCHMARK;
- break;
case 'U':
cmd = GUP_BENCHMARK;
break;
@@ -105,12 +105,16 @@ int main(int argc, char **argv)
gup.flags |= FOLL_WRITE;
fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
- if (fd == -1)
- perror("open"), exit(1);
+ if (fd == -1) {
+ perror("open");
+ exit(1);
+ }
p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
- if (p == MAP_FAILED)
- perror("mmap"), exit(1);
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
gup.addr = (unsigned long)p;
if (thp == 1)
@@ -123,8 +127,10 @@ int main(int argc, char **argv)
for (i = 0; i < repeats; i++) {
gup.size = size;
- if (ioctl(fd, cmd, &gup))
- perror("ioctl"), exit(1);
+ if (ioctl(fd, cmd, &gup)) {
+ perror("ioctl");
+ exit(1);
+ }
printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
gup.put_delta_usec);
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 93fc5cadce61..0a28a6a29581 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -680,7 +680,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
n = gethugepagesizes(pagesizes, 4);
if (n <= 0)
- return;
+ SKIP(return, "Huge page size could not be determined");
for (idx = 0; --n > 0; ) {
if (pagesizes[n] < pagesizes[idx])
idx = n;
@@ -694,7 +694,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
buffer->ptr = get_hugepage_region(size, GHR_STRICT);
if (buffer->ptr == NULL) {
free(buffer);
- return;
+ SKIP(return, "Huge page could not be allocated");
}
buffer->fd = -1;
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 61e5cfeb1350..9b0912a01777 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -227,8 +227,10 @@ static void hugetlb_allocate_area(void **alloc_area)
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
if (area_alias == MAP_FAILED) {
- if (munmap(*alloc_area, nr_pages * page_size) < 0)
- perror("hugetlb munmap"), exit(1);
+ if (munmap(*alloc_area, nr_pages * page_size) < 0) {
+ perror("hugetlb munmap");
+ exit(1);
+ }
*alloc_area = NULL;
return;
}
@@ -337,9 +339,10 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
/* Undo write-protect, do wakeup after that */
prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
- if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
- fprintf(stderr, "clear WP failed for address 0x%Lx\n",
- start), exit(1);
+ if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
+ fprintf(stderr, "clear WP failed for address 0x%Lx\n", start);
+ exit(1);
+ }
}
static void *locking_thread(void *arg)
@@ -359,8 +362,10 @@ static void *locking_thread(void *arg)
seed += cpu;
bzero(&rand, sizeof(rand));
bzero(&randstate, sizeof(randstate));
- if (initstate_r(seed, randstate, sizeof(randstate), &rand))
- fprintf(stderr, "srandom_r error\n"), exit(1);
+ if (initstate_r(seed, randstate, sizeof(randstate), &rand)) {
+ fprintf(stderr, "srandom_r error\n");
+ exit(1);
+ }
} else {
page_nr = -bounces;
if (!(bounces & BOUNCE_RACINGFAULTS))
@@ -369,12 +374,16 @@ static void *locking_thread(void *arg)
while (!finished) {
if (bounces & BOUNCE_RANDOM) {
- if (random_r(&rand, &rand_nr))
- fprintf(stderr, "random_r 1 error\n"), exit(1);
+ if (random_r(&rand, &rand_nr)) {
+ fprintf(stderr, "random_r 1 error\n");
+ exit(1);
+ }
page_nr = rand_nr;
if (sizeof(page_nr) > sizeof(rand_nr)) {
- if (random_r(&rand, &rand_nr))
- fprintf(stderr, "random_r 2 error\n"), exit(1);
+ if (random_r(&rand, &rand_nr)) {
+ fprintf(stderr, "random_r 2 error\n");
+ exit(1);
+ }
page_nr |= (((unsigned long) rand_nr) << 16) <<
16;
}
@@ -385,11 +394,13 @@ static void *locking_thread(void *arg)
start = time(NULL);
if (bounces & BOUNCE_VERIFY) {
count = *area_count(area_dst, page_nr);
- if (!count)
+ if (!count) {
fprintf(stderr,
"page_nr %lu wrong count %Lu %Lu\n",
page_nr, count,
- count_verify[page_nr]), exit(1);
+ count_verify[page_nr]);
+ exit(1);
+ }
/*
@@ -401,11 +412,12 @@ static void *locking_thread(void *arg)
*/
#if 1
if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
- page_size))
+ page_size)) {
fprintf(stderr,
"my_bcmp page_nr %lu wrong count %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]), exit(1);
+ page_nr, count, count_verify[page_nr]);
+ exit(1);
+ }
#else
unsigned long loops;
@@ -437,7 +449,7 @@ static void *locking_thread(void *arg)
fprintf(stderr,
"page_nr %lu memory corruption %Lu %Lu\n",
page_nr, count,
- count_verify[page_nr]), exit(1);
+ count_verify[page_nr]); exit(1);
}
count++;
*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
@@ -461,12 +473,14 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
offset);
if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
/* real retval in ufdio_copy.copy */
- if (uffdio_copy->copy != -EEXIST)
+ if (uffdio_copy->copy != -EEXIST) {
fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
- uffdio_copy->copy), exit(1);
+ uffdio_copy->copy);
+ exit(1);
+ }
} else {
fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n",
- uffdio_copy->copy), exit(1);
+ uffdio_copy->copy); exit(1);
}
}
@@ -474,9 +488,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
{
struct uffdio_copy uffdio_copy;
- if (offset >= nr_pages * page_size)
- fprintf(stderr, "unexpected offset %lu\n",
- offset), exit(1);
+ if (offset >= nr_pages * page_size) {
+ fprintf(stderr, "unexpected offset %lu\n", offset);
+ exit(1);
+ }
uffdio_copy.dst = (unsigned long) area_dst + offset;
uffdio_copy.src = (unsigned long) area_src + offset;
uffdio_copy.len = page_size;
@@ -487,12 +502,14 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
uffdio_copy.copy = 0;
if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
/* real retval in ufdio_copy.copy */
- if (uffdio_copy.copy != -EEXIST)
+ if (uffdio_copy.copy != -EEXIST) {
fprintf(stderr, "UFFDIO_COPY error %Ld\n",
- uffdio_copy.copy), exit(1);
+ uffdio_copy.copy);
+ exit(1);
+ }
} else if (uffdio_copy.copy != page_size) {
fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
- uffdio_copy.copy), exit(1);
+ uffdio_copy.copy); exit(1);
} else {
if (test_uffdio_copy_eexist && retry) {
test_uffdio_copy_eexist = false;
@@ -521,11 +538,11 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
if (ret < 0) {
if (errno == EAGAIN)
return 1;
- else
- perror("blocking read error"), exit(1);
+ perror("blocking read error");
} else {
- fprintf(stderr, "short read\n"), exit(1);
+ fprintf(stderr, "short read\n");
}
+ exit(1);
}
return 0;
@@ -536,9 +553,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
{
unsigned long offset;
- if (msg->event != UFFD_EVENT_PAGEFAULT)
- fprintf(stderr, "unexpected msg event %u\n",
- msg->event), exit(1);
+ if (msg->event != UFFD_EVENT_PAGEFAULT) {
+ fprintf(stderr, "unexpected msg event %u\n", msg->event);
+ exit(1);
+ }
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
wp_range(uffd, msg->arg.pagefault.address, page_size, false);
@@ -546,8 +564,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
} else {
/* Missing page faults */
if (bounces & BOUNCE_VERIFY &&
- msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
- fprintf(stderr, "unexpected write fault\n"), exit(1);
+ msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) {
+ fprintf(stderr, "unexpected write fault\n");
+ exit(1);
+ }
offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
offset &= ~(page_size-1);
@@ -574,25 +594,32 @@ static void *uffd_poll_thread(void *arg)
for (;;) {
ret = poll(pollfd, 2, -1);
- if (!ret)
- fprintf(stderr, "poll error %d\n", ret), exit(1);
- if (ret < 0)
- perror("poll"), exit(1);
+ if (!ret) {
+ fprintf(stderr, "poll error %d\n", ret);
+ exit(1);
+ }
+ if (ret < 0) {
+ perror("poll");
+ exit(1);
+ }
if (pollfd[1].revents & POLLIN) {
- if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
- fprintf(stderr, "read pipefd error\n"),
- exit(1);
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1) {
+ fprintf(stderr, "read pipefd error\n");
+ exit(1);
+ }
break;
}
- if (!(pollfd[0].revents & POLLIN))
+ if (!(pollfd[0].revents & POLLIN)) {
fprintf(stderr, "pollfd[0].revents %d\n",
- pollfd[0].revents), exit(1);
+ pollfd[0].revents);
+ exit(1);
+ }
if (uffd_read_msg(uffd, &msg))
continue;
switch (msg.event) {
default:
fprintf(stderr, "unexpected msg event %u\n",
- msg.event), exit(1);
+ msg.event); exit(1);
break;
case UFFD_EVENT_PAGEFAULT:
uffd_handle_page_fault(&msg, stats);
@@ -606,8 +633,10 @@ static void *uffd_poll_thread(void *arg)
uffd_reg.range.start = msg.arg.remove.start;
uffd_reg.range.len = msg.arg.remove.end -
msg.arg.remove.start;
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
- fprintf(stderr, "remove failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) {
+ fprintf(stderr, "remove failure\n");
+ exit(1);
+ }
break;
case UFFD_EVENT_REMAP:
area_dst = (char *)(unsigned long)msg.arg.remap.to;
@@ -879,8 +908,10 @@ static int faulting_process(int signal_test)
area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
- if (area_dst == MAP_FAILED)
- perror("mremap"), exit(1);
+ if (area_dst == MAP_FAILED) {
+ perror("mremap");
+ exit(1);
+ }
for (; nr < nr_pages; nr++) {
count = *area_count(area_dst, nr);
@@ -888,7 +919,7 @@ static int faulting_process(int signal_test)
fprintf(stderr,
"nr %lu memory corruption %Lu %Lu\n",
nr, count,
- count_verify[nr]), exit(1);
+ count_verify[nr]); exit(1);
}
/*
* Trigger write protection if there is by writting
@@ -901,8 +932,10 @@ static int faulting_process(int signal_test)
return 1;
for (nr = 0; nr < nr_pages; nr++) {
- if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
- fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+ if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) {
+ fprintf(stderr, "nr %lu is not zero\n", nr);
+ exit(1);
+ }
}
return 0;
@@ -916,12 +949,14 @@ static void retry_uffdio_zeropage(int ufd,
uffdio_zeropage->range.len,
offset);
if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
- if (uffdio_zeropage->zeropage != -EEXIST)
+ if (uffdio_zeropage->zeropage != -EEXIST) {
fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
- uffdio_zeropage->zeropage), exit(1);
+ uffdio_zeropage->zeropage);
+ exit(1);
+ }
} else {
fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
- uffdio_zeropage->zeropage), exit(1);
+ uffdio_zeropage->zeropage); exit(1);
}
}
@@ -933,9 +968,10 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
- if (offset >= nr_pages * page_size)
- fprintf(stderr, "unexpected offset %lu\n",
- offset), exit(1);
+ if (offset >= nr_pages * page_size) {
+ fprintf(stderr, "unexpected offset %lu\n", offset);
+ exit(1);
+ }
uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
uffdio_zeropage.range.len = page_size;
uffdio_zeropage.mode = 0;
@@ -943,22 +979,26 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
if (ret) {
/* real retval in ufdio_zeropage.zeropage */
if (has_zeropage) {
- if (uffdio_zeropage.zeropage == -EEXIST)
- fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
- exit(1);
- else
+ if (uffdio_zeropage.zeropage == -EEXIST) {
+ fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n");
+ exit(1);
+ } else {
fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage);
+ exit(1);
+ }
} else {
- if (uffdio_zeropage.zeropage != -EINVAL)
+ if (uffdio_zeropage.zeropage != -EINVAL) {
fprintf(stderr,
"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage);
+ exit(1);
+ }
}
} else if (has_zeropage) {
if (uffdio_zeropage.zeropage != page_size) {
fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage); exit(1);
} else {
if (test_uffdio_zeropage_eexist && retry) {
test_uffdio_zeropage_eexist = false;
@@ -970,7 +1010,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
} else {
fprintf(stderr,
"UFFDIO_ZEROPAGE succeeded %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage); exit(1);
}
return 0;
@@ -1000,19 +1040,24 @@ static int userfaultfd_zeropage_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
expected_ioctls = uffd_test_ops->expected_ioctls;
if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
+ expected_ioctls) {
fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
+ "unexpected missing ioctl for anon memory\n");
+ exit(1);
+ }
if (uffdio_zeropage(uffd, 0)) {
- if (my_bcmp(area_dst, zeropage, page_size))
- fprintf(stderr, "zeropage is not zero\n"), exit(1);
+ if (my_bcmp(area_dst, zeropage, page_size)) {
+ fprintf(stderr, "zeropage is not zero\n");
+ exit(1);
+ }
}
close(uffd);
@@ -1047,32 +1092,41 @@ static int userfaultfd_events_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+ fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+ exit(1);
+ }
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
- perror("uffd_poll_thread create"), exit(1);
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+ perror("uffd_poll_thread create");
+ exit(1);
+ }
pid = fork();
- if (pid < 0)
- perror("fork"), exit(1);
+ if (pid < 0) {
+ perror("fork");
+ exit(1);
+ }
if (!pid)
return faulting_process(0);
waitpid(pid, &err, 0);
- if (err)
- fprintf(stderr, "faulting process failed\n"), exit(1);
+ if (err) {
+ fprintf(stderr, "faulting process failed\n");
+ exit(1);
+ }
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
- perror("pipe write"), exit(1);
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+ perror("pipe write");
+ exit(1);
+ }
if (pthread_join(uffd_mon, NULL))
return 1;
@@ -1110,38 +1164,49 @@ static int userfaultfd_sig_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+ fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+ exit(1);
+ }
- if (faulting_process(1))
- fprintf(stderr, "faulting process failed\n"), exit(1);
+ if (faulting_process(1)) {
+ fprintf(stderr, "faulting process failed\n");
+ exit(1);
+ }
if (uffd_test_ops->release_pages(area_dst))
return 1;
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
- perror("uffd_poll_thread create"), exit(1);
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+ perror("uffd_poll_thread create");
+ exit(1);
+ }
pid = fork();
- if (pid < 0)
- perror("fork"), exit(1);
+ if (pid < 0) {
+ perror("fork");
+ exit(1);
+ }
if (!pid)
exit(faulting_process(2));
waitpid(pid, &err, 0);
- if (err)
- fprintf(stderr, "faulting process failed\n"), exit(1);
+ if (err) {
+ fprintf(stderr, "faulting process failed\n");
+ exit(1);
+ }
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
- perror("pipe write"), exit(1);
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+ perror("pipe write");
+ exit(1);
+ }
if (pthread_join(uffd_mon, (void **)&userfaults))
return 1;
@@ -1395,7 +1460,7 @@ static void set_test_type(const char *type)
test_type = TEST_SHMEM;
uffd_test_ops = &shmem_uffd_test_ops;
} else {
- fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
+ fprintf(stderr, "Unknown test type: %s\n", type); exit(1);
}
if (test_type == TEST_HUGETLB)
@@ -1403,12 +1468,15 @@ static void set_test_type(const char *type)
else
page_size = sysconf(_SC_PAGE_SIZE);
- if (!page_size)
- fprintf(stderr, "Unable to determine page size\n"),
- exit(2);
+ if (!page_size) {
+ fprintf(stderr, "Unable to determine page size\n");
+ exit(2);
+ }
if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
- > page_size)
- fprintf(stderr, "Impossible to run this test\n"), exit(2);
+ > page_size) {
+ fprintf(stderr, "Impossible to run this test\n");
+ exit(2);
+ }
}
static void sigalrm(int sig)
@@ -1425,8 +1493,10 @@ int main(int argc, char **argv)
if (argc < 4)
usage();
- if (signal(SIGALRM, sigalrm) == SIG_ERR)
- fprintf(stderr, "failed to arm SIGALRM"), exit(1);
+ if (signal(SIGALRM, sigalrm) == SIG_ERR) {
+ fprintf(stderr, "failed to arm SIGALRM");
+ exit(1);
+ }
alarm(ALARM_INTERVAL_SECS);
set_test_type(argv[1]);
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 998319553523..7161cfc2e60b 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -443,6 +443,68 @@ static void test_unexpected_base(void)
#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
+static void test_ptrace_write_gs_read_base(void)
+{
+ int status;
+ pid_t child = fork();
+
+ if (child < 0)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
+
+ printf("[RUN]\tARCH_SET_GS to 1\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ wait(&status);
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ unsigned long base;
+ unsigned long gs_offset = USER_REGS_OFFSET(gs);
+ unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+ /* Read the initial base. It should be 1. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+ if (base == 1) {
+ printf("[OK]\tGSBASE started at 1\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
+ }
+
+ printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
+
+ /* Poke an LDT selector into GS. */
+ if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
+ err(1, "PTRACE_POKEUSER");
+
+ /* And read the base. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+ if (base == 0 || base == 1) {
+ printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
+ }
+ }
+
+ ptrace(PTRACE_CONT, child, NULL, NULL);
+
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
+}
+
static void test_ptrace_write_gsbase(void)
{
int status;
@@ -517,6 +579,9 @@ static void test_ptrace_write_gsbase(void)
END:
ptrace(PTRACE_CONT, child, NULL, NULL);
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
}
int main()
@@ -526,6 +591,9 @@ int main()
shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ /* Do these tests before we have an LDT. */
+ test_ptrace_write_gs_read_base();
+
/* Probe FSGSBASE */
sethandler(SIGILL, sigill, 0);
if (sigsetjmp(jmpbuf, 1) == 0) {
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 58c0eab71bca..0517c744b04e 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -78,6 +78,7 @@
#define KPF_ARCH 38
#define KPF_UNCACHED 39
#define KPF_SOFTDIRTY 40
+#define KPF_ARCH_2 41
/* [48-] take some arbitrary free slots for expanding overloaded flags
* not part of kernel API
@@ -135,6 +136,7 @@ static const char * const page_flag_names[] = {
[KPF_ARCH] = "h:arch",
[KPF_UNCACHED] = "c:uncached",
[KPF_SOFTDIRTY] = "f:softdirty",
+ [KPF_ARCH_2] = "H:arch_2",
[KPF_READAHEAD] = "I:readahead",
[KPF_SLOB_FREE] = "P:slob_free",