summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-10 22:26:21 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-10 22:26:21 +0300
commitf17b474e36647c23801ef8fdaf2255ab66dd2973 (patch)
tree7fbaa4d93d71d72eb1cf8f61201eb42881daaeb0
parenta7423e6ea2f8f6f453de79213c26f7a36c86d9a2 (diff)
parentdb975debcb8c4cd367a78811bc1ba84c83f854bd (diff)
downloadlinux-f17b474e36647c23801ef8fdaf2255ab66dd2973.tar.xz
Merge tag 'bpf-next-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Support associating BPF program with struct_ops (Amery Hung) - Switch BPF local storage to rqspinlock and remove recursion detection counters which were causing false positives (Amery Hung) - Fix live registers marking for indirect jumps (Anton Protopopov) - Introduce execution context detection BPF helpers (Changwoo Min) - Improve verifier precision for 32bit sign extension pattern (Cupertino Miranda) - Optimize BTF type lookup by sorting vmlinux BTF and doing binary search (Donglin Peng) - Allow states pruning for misc/invalid slots in iterator loops (Eduard Zingerman) - In preparation for ASAN support in BPF arenas teach libbpf to move global BPF variables to the end of the region and enable arena kfuncs while holding locks (Emil Tsalapatis) - Introduce support for implicit arguments in kfuncs and migrate a number of them to new API. This is a prerequisite for cgroup sub-schedulers in sched-ext (Ihor Solodrai) - Fix incorrect copied_seq calculation in sockmap (Jiayuan Chen) - Fix ORC stack unwind from kprobe_multi (Jiri Olsa) - Speed up fentry attach by using single ftrace direct ops in BPF trampolines (Jiri Olsa) - Require frozen map for calculating map hash (KP Singh) - Fix lock entry creation in TAS fallback in rqspinlock (Kumar Kartikeya Dwivedi) - Allow user space to select cpu in lookup/update operations on per-cpu array and hash maps (Leon Hwang) - Make kfuncs return trusted pointers by default (Matt Bobrowski) - Introduce "fsession" support where single BPF program is executed upon entry and exit from traced kernel function (Menglong Dong) - Allow bpf_timer and bpf_wq use in all programs types (Mykyta Yatsenko, Andrii Nakryiko, Kumar Kartikeya Dwivedi, Alexei Starovoitov) - Make KF_TRUSTED_ARGS the default for all kfuncs and clean up their definition across the tree (Puranjay Mohan) - Allow BPF arena calls from non-sleepable context (Puranjay Mohan) - Improve register id comparison logic in the verifier and extend linked registers with negative offsets (Puranjay Mohan) - In preparation for BPF-OOM introduce kfuncs to access memcg events (Roman Gushchin) - Use CFI compatible destructor kfunc type (Sami Tolvanen) - Add bitwise tracking for BPF_END in the verifier (Tianci Cao) - Add range tracking for BPF_DIV and BPF_MOD in the verifier (Yazhou Tang) - Make BPF selftests work with 64k page size (Yonghong Song) * tag 'bpf-next-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (268 commits) selftests/bpf: Fix outdated test on storage->smap selftests/bpf: Choose another percpu variable in bpf for btf_dump test selftests/bpf: Remove test_task_storage_map_stress_lookup selftests/bpf: Update task_local_storage/task_storage_nodeadlock test selftests/bpf: Update task_local_storage/recursion test selftests/bpf: Update sk_storage_omem_uncharge test bpf: Switch to bpf_selem_unlink_nofail in bpf_local_storage_{map_free, destroy} bpf: Support lockless unlink when freeing map or local storage bpf: Prepare for bpf_selem_unlink_nofail() bpf: Remove unused percpu counter from bpf_local_storage_map_free bpf: Remove cgroup local storage percpu counter bpf: Remove task local storage percpu counter bpf: Change local_storage->lock and b->lock to rqspinlock bpf: Convert bpf_selem_unlink to failable bpf: Convert bpf_selem_link_map to failable bpf: Convert bpf_selem_unlink_map to failable bpf: Select bpf_local_storage_map_bucket based on bpf_local_storage selftests/xsk: fix number of Tx frags in invalid packet selftests/xsk: properly handle batch ending in the middle of a packet bpf: Prevent reentrance into call_rcu_tasks_trace() ...
-rw-r--r--Documentation/bpf/bpf_prog_run.rst3
-rw-r--r--Documentation/bpf/kfuncs.rst260
-rw-r--r--Documentation/process/changes.rst4
-rw-r--r--Documentation/scheduler/sched-ext.rst1
-rw-r--r--MAINTAINERS11
-rw-r--r--Makefile15
-rw-r--r--arch/arm64/net/bpf_jit_comp.c75
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/ftrace.h2
-rw-r--r--arch/x86/kernel/ftrace_64.S5
-rw-r--r--arch/x86/net/bpf_jit_comp.c88
-rw-r--r--drivers/hid/bpf/hid_bpf_dispatch.c5
-rw-r--r--drivers/hid/bpf/progs/hid_bpf_helpers.h8
-rw-r--r--fs/bpf_fs_kfuncs.c23
-rw-r--r--fs/verity/measure.c2
-rw-r--r--include/asm-generic/rqspinlock.h2
-rw-r--r--include/linux/bpf-cgroup.h4
-rw-r--r--include/linux/bpf.h178
-rw-r--r--include/linux/bpf_local_storage.h29
-rw-r--r--include/linux/bpf_mprog.h10
-rw-r--r--include/linux/bpf_verifier.h14
-rw-r--r--include/linux/btf.h9
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/ftrace.h31
-rw-r--r--include/linux/ftrace_regs.h25
-rw-r--r--include/linux/memcontrol.h19
-rw-r--r--include/linux/skmsg.h70
-rw-r--r--include/linux/tnum.h5
-rw-r--r--include/uapi/linux/bpf.h28
-rw-r--r--init/Kconfig2
-rw-r--r--kernel/bpf/Makefile12
-rw-r--r--kernel/bpf/arena.c405
-rw-r--r--kernel/bpf/arraymap.c29
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c62
-rw-r--r--kernel/bpf/bpf_inode_storage.c6
-rw-r--r--kernel/bpf/bpf_insn_array.c4
-rw-r--r--kernel/bpf/bpf_iter.c2
-rw-r--r--kernel/bpf/bpf_local_storage.c408
-rw-r--r--kernel/bpf/bpf_lsm.c5
-rw-r--r--kernel/bpf/bpf_lsm_proto.c19
-rw-r--r--kernel/bpf/bpf_struct_ops.c88
-rw-r--r--kernel/bpf/bpf_task_storage.c154
-rw-r--r--kernel/bpf/btf.c228
-rw-r--r--kernel/bpf/cgroup.c6
-rw-r--r--kernel/bpf/cgroup_iter.c26
-rw-r--r--kernel/bpf/core.c15
-rw-r--r--kernel/bpf/cpumap.c21
-rw-r--r--kernel/bpf/cpumask.c2
-rw-r--r--kernel/bpf/crypto.c10
-rw-r--r--kernel/bpf/hashtab.c105
-rw-r--r--kernel/bpf/helpers.c698
-rw-r--r--kernel/bpf/inode.c42
-rw-r--r--kernel/bpf/local_storage.c27
-rw-r--r--kernel/bpf/map_iter.c2
-rw-r--r--kernel/bpf/offload.c12
-rw-r--r--kernel/bpf/range_tree.c5
-rw-r--r--kernel/bpf/ringbuf.c1
-rw-r--r--kernel/bpf/rqspinlock.c7
-rw-r--r--kernel/bpf/stream.c24
-rw-r--r--kernel/bpf/syscall.c173
-rw-r--r--kernel/bpf/tnum.c16
-rw-r--r--kernel/bpf/token.c1
-rw-r--r--kernel/bpf/trampoline.c320
-rw-r--r--kernel/bpf/verifier.c1471
-rw-r--r--kernel/sched/ext.c8
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--kernel/trace/bpf_trace.c84
-rw-r--r--kernel/trace/ftrace.c407
-rw-r--r--lib/Kconfig.debug13
-rw-r--r--mm/Makefile3
-rw-r--r--mm/bpf_memcontrol.c193
-rw-r--r--mm/memcontrol-v1.h1
-rw-r--r--mm/memcontrol.c16
-rw-r--r--net/bpf/test_run.c1
-rw-r--r--net/core/bpf_sk_storage.c21
-rw-r--r--net/core/filter.c34
-rw-r--r--net/core/skmsg.c30
-rw-r--r--net/core/xdp.c2
-rw-r--r--net/ipv4/tcp_bpf.c25
-rw-r--r--net/ipv4/udp_bpf.c23
-rw-r--r--net/netfilter/nf_conntrack_bpf.c22
-rw-r--r--net/netfilter/nf_flow_table_bpf.c2
-rw-r--r--net/netfilter/nf_nat_bpf.c2
-rw-r--r--net/sched/bpf_qdisc.c20
-rw-r--r--net/xfrm/xfrm_state_bpf.c2
-rw-r--r--scripts/Makefile.btf21
-rw-r--r--scripts/Makefile.modfinal5
-rw-r--r--scripts/Makefile.vmlinux2
-rwxr-xr-xscripts/gen-btf.sh147
-rwxr-xr-xscripts/link-vmlinux.sh43
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst30
-rw-r--r--tools/bpf/bpftool/Makefile4
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool9
-rw-r--r--tools/bpf/bpftool/common.c1
-rw-r--r--tools/bpf/bpftool/gen.c8
-rw-r--r--tools/bpf/bpftool/net.c31
-rw-r--r--tools/bpf/resolve_btfids/Makefile3
-rw-r--r--tools/bpf/resolve_btfids/main.c954
-rw-r--r--tools/include/uapi/linux/bpf.h28
-rw-r--r--tools/lib/bpf/bpf.c20
-rw-r--r--tools/lib/bpf/bpf.h29
-rw-r--r--tools/lib/bpf/bpf_helpers.h6
-rw-r--r--tools/lib/bpf/btf.c305
-rw-r--r--tools/lib/bpf/btf.h42
-rw-r--r--tools/lib/bpf/btf_dump.c9
-rw-r--r--tools/lib/bpf/libbpf.c95
-rw-r--r--tools/lib/bpf/libbpf.h37
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/sched_ext/README.md1
-rw-r--r--tools/testing/selftests/bpf/.gitignore4
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile26
-rw-r--r--tools/testing/selftests/bpf/bench.c4
-rw-r--r--tools/testing/selftests/bpf/bench.h1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c1
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh5
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h85
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h3
-rw-r--r--tools/testing/selftests/bpf/bpftool_helpers.c74
-rw-r--r--tools/testing/selftests/bpf/bpftool_helpers.h11
-rw-r--r--tools/testing/selftests/bpf/cgroup_iter_memcg.h18
-rw-r--r--tools/testing/selftests/bpf/map_tests/task_storage_map.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_list.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_gotox.c208
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c371
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c144
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_permute.c244
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_iter.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c223
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/exe_ctx.c59
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fsession_test.c140
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_args_test.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/iters.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/percpu_alloc.c335
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c294
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c120
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c191
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_local_data.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c250
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c33
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c137
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_failure.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/wq.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c16
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c11
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c6
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c39
-rw-r--r--tools/testing/selftests/bpf/progs/compute_live_registers.c41
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader.c2
-rw-r--r--tools/testing/selftests/bpf/progs/free_timer.c10
-rw-r--r--tools/testing/selftests/bpf/progs/fsession_test.c179
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_args_test.c84
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c23
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c140
-rw-r--r--tools/testing/selftests/bpf/progs/iters_css.c9
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_implicit_args.c41
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_override.c15
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c15
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c19
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c18
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_array.c32
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c10
-rw-r--r--tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c38
-rw-r--r--tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c12
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_ips.c27
-rw-r--r--tools/testing/selftests/bpf/progs/stream.c53
-rw-r--r--tools/testing/selftests/bpf/progs/stream_fail.c6
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c6
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c1
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c7
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_assoc.c105
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_assoc_in_timer.c77
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_assoc_reuse.c75
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_sleepable.c43
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_data.bpf.h2
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c14
-rw-r--r--tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c7
-rw-r--r--tools/testing/selftests/bpf/progs/task_work.c7
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_fail.c8
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_stress.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c57
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_decl_tag.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ctx.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c21
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c118
-rw-r--r--tools/testing/selftests/bpf/progs/timer_start_deadlock.c70
-rw-r--r--tools/testing/selftests/bpf/progs/timer_start_delete_race.c66
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c46
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session.c7
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c15
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c11
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c223
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_globals1.c87
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_globals2.c49
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c50
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_async_cb_context.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bswap.c43
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_default_trusted_ptr.c29
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c1149
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_restricted.c111
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_jit_inline.c20
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_linked_scalars.c336
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_lsm.c31
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c53
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subreg.c153
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c22
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xdp.c35
-rw-r--r--tools/testing/selftests/bpf/progs/wq_failures.c4
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_map.sh398
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_metadata.sh85
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h10
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c122
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h12
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h12
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_value_access.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c4
-rw-r--r--tools/testing/selftests/bpf/veristat.c2
-rw-r--r--tools/testing/selftests/hid/progs/hid_bpf_helpers.h8
248 files changed, 13304 insertions, 2945 deletions
diff --git a/Documentation/bpf/bpf_prog_run.rst b/Documentation/bpf/bpf_prog_run.rst
index 4868c909df5c..81ef768c75a3 100644
--- a/Documentation/bpf/bpf_prog_run.rst
+++ b/Documentation/bpf/bpf_prog_run.rst
@@ -34,11 +34,12 @@ following types:
- ``BPF_PROG_TYPE_LWT_IN``
- ``BPF_PROG_TYPE_LWT_OUT``
- ``BPF_PROG_TYPE_LWT_XMIT``
-- ``BPF_PROG_TYPE_LWT_SEG6LOCAL``
- ``BPF_PROG_TYPE_FLOW_DISSECTOR``
- ``BPF_PROG_TYPE_STRUCT_OPS``
- ``BPF_PROG_TYPE_RAW_TRACEPOINT``
- ``BPF_PROG_TYPE_SYSCALL``
+- ``BPF_PROG_TYPE_TRACING``
+- ``BPF_PROG_TYPE_NETFILTER``
When using the ``BPF_PROG_RUN`` command, userspace supplies an input context
object and (for program types operating on network packets) a buffer containing
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index e38941370b90..75e6c078e0e7 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -50,7 +50,70 @@ A wrapper kfunc is often needed when we need to annotate parameters of the
kfunc. Otherwise one may directly make the kfunc visible to the BPF program by
registering it with the BPF subsystem. See :ref:`BPF_kfunc_nodef`.
-2.2 Annotating kfunc parameters
+2.2 kfunc Parameters
+--------------------
+
+All kfuncs now require trusted arguments by default. This means that all
+pointer arguments must be valid, and all pointers to BTF objects must be
+passed in their unmodified form (at a zero offset, and without having been
+obtained from walking another pointer, with exceptions described below).
+
+There are two types of pointers to kernel objects which are considered "trusted":
+
+1. Pointers which are passed as tracepoint or struct_ops callback arguments.
+2. Pointers which were returned from a KF_ACQUIRE kfunc.
+
+Pointers to non-BTF objects (e.g. scalar pointers) may also be passed to
+kfuncs, and may have a non-zero offset.
+
+The definition of "valid" pointers is subject to change at any time, and has
+absolutely no ABI stability guarantees.
+
+As mentioned above, a nested pointer obtained from walking a trusted pointer is
+no longer trusted, with one exception. If a struct type has a field that is
+guaranteed to be valid (trusted or rcu, as in KF_RCU description below) as long
+as its parent pointer is valid, the following macros can be used to express
+that to the verifier:
+
+* ``BTF_TYPE_SAFE_TRUSTED``
+* ``BTF_TYPE_SAFE_RCU``
+* ``BTF_TYPE_SAFE_RCU_OR_NULL``
+
+For example,
+
+.. code-block:: c
+
+ BTF_TYPE_SAFE_TRUSTED(struct socket) {
+ struct sock *sk;
+ };
+
+or
+
+.. code-block:: c
+
+ BTF_TYPE_SAFE_RCU(struct task_struct) {
+ const cpumask_t *cpus_ptr;
+ struct css_set __rcu *cgroups;
+ struct task_struct __rcu *real_parent;
+ struct task_struct *group_leader;
+ };
+
+In other words, you must:
+
+1. Wrap the valid pointer type in a ``BTF_TYPE_SAFE_*`` macro.
+
+2. Specify the type and name of the valid nested field. This field must match
+ the field in the original type definition exactly.
+
+A new type declared by a ``BTF_TYPE_SAFE_*`` macro also needs to be emitted so
+that it appears in BTF. For example, ``BTF_TYPE_SAFE_TRUSTED(struct socket)``
+is emitted in the ``type_is_trusted()`` function as follows:
+
+.. code-block:: c
+
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
+
+2.3 Annotating kfunc parameters
-------------------------------
Similar to BPF helpers, there is sometime need for additional context required
@@ -58,7 +121,7 @@ by the verifier to make the usage of kernel functions safer and more useful.
Hence, we can annotate a parameter by suffixing the name of the argument of the
kfunc with a __tag, where tag may be one of the supported annotations.
-2.2.1 __sz Annotation
+2.3.1 __sz Annotation
---------------------
This annotation is used to indicate a memory and size pair in the argument list.
@@ -74,7 +137,7 @@ argument as its size. By default, without __sz annotation, the size of the type
of the pointer is used. Without __sz annotation, a kfunc cannot accept a void
pointer.
-2.2.2 __k Annotation
+2.3.2 __k Annotation
--------------------
This annotation is only understood for scalar arguments, where it indicates that
@@ -98,7 +161,7 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
size parameter, and the value of the constant matters for program safety, __k
suffix should be used.
-2.2.3 __uninit Annotation
+2.3.3 __uninit Annotation
-------------------------
This annotation is used to indicate that the argument will be treated as
@@ -115,27 +178,36 @@ Here, the dynptr will be treated as an uninitialized dynptr. Without this
annotation, the verifier will reject the program if the dynptr passed in is
not initialized.
-2.2.4 __opt Annotation
--------------------------
+2.3.4 __nullable Annotation
+---------------------------
-This annotation is used to indicate that the buffer associated with an __sz or __szk
-argument may be null. If the function is passed a nullptr in place of the buffer,
-the verifier will not check that length is appropriate for the buffer. The kfunc is
-responsible for checking if this buffer is null before using it.
+This annotation is used to indicate that the pointer argument may be NULL.
+The verifier will allow passing NULL for such arguments.
An example is given below::
- __bpf_kfunc void *bpf_dynptr_slice(..., void *buffer__opt, u32 buffer__szk)
+ __bpf_kfunc void bpf_task_release(struct task_struct *task__nullable)
{
...
}
-Here, the buffer may be null. If buffer is not null, it at least of size buffer_szk.
-Either way, the returned buffer is either NULL, or of size buffer_szk. Without this
-annotation, the verifier will reject the program if a null pointer is passed in with
-a nonzero size.
+Here, the task pointer may be NULL. The kfunc is responsible for checking if
+the pointer is NULL before dereferencing it.
+
+The __nullable annotation can be combined with other annotations. For example,
+when used with __sz or __szk annotations for memory and size pairs, the
+verifier will skip size validation when a NULL pointer is passed, but will
+still process the size argument to extract constant size information when
+needed::
+
+ __bpf_kfunc void *bpf_dynptr_slice(..., void *buffer__nullable,
+ u32 buffer__szk)
+
+Here, the buffer may be NULL. If the buffer is not NULL, it must be at least
+buffer__szk bytes in size. The kfunc is responsible for checking if the buffer
+is NULL before using it.
-2.2.5 __str Annotation
+2.3.5 __str Annotation
----------------------------
This annotation is used to indicate that the argument is a constant string.
@@ -160,26 +232,9 @@ Or::
...
}
-2.2.6 __prog Annotation
----------------------------
-This annotation is used to indicate that the argument needs to be fixed up to
-the bpf_prog_aux of the caller BPF program. Any value passed into this argument
-is ignored, and rewritten by the verifier.
-
-An example is given below::
-
- __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, void *value),
- unsigned int flags,
- void *aux__prog)
- {
- struct bpf_prog_aux *aux = aux__prog;
- ...
- }
-
.. _BPF_kfunc_nodef:
-2.3 Using an existing kernel function
+2.4 Using an existing kernel function
-------------------------------------
When an existing function in the kernel is fit for consumption by BPF programs,
@@ -187,7 +242,7 @@ it can be directly registered with the BPF subsystem. However, care must still
be taken to review the context in which it will be invoked by the BPF program
and whether it is safe to do so.
-2.4 Annotating kfuncs
+2.5 Annotating kfuncs
---------------------
In addition to kfuncs' arguments, verifier may need more information about the
@@ -216,7 +271,7 @@ protected. An example is given below::
...
}
-2.4.1 KF_ACQUIRE flag
+2.5.1 KF_ACQUIRE flag
---------------------
The KF_ACQUIRE flag is used to indicate that the kfunc returns a pointer to a
@@ -226,7 +281,7 @@ referenced kptr (by invoking bpf_kptr_xchg). If not, the verifier fails the
loading of the BPF program until no lingering references remain in all possible
explored states of the program.
-2.4.2 KF_RET_NULL flag
+2.5.2 KF_RET_NULL flag
----------------------
The KF_RET_NULL flag is used to indicate that the pointer returned by the kfunc
@@ -235,87 +290,21 @@ returned from the kfunc before making use of it (dereferencing or passing to
another helper). This flag is often used in pairing with KF_ACQUIRE flag, but
both are orthogonal to each other.
-2.4.3 KF_RELEASE flag
+2.5.3 KF_RELEASE flag
---------------------
The KF_RELEASE flag is used to indicate that the kfunc releases the pointer
passed in to it. There can be only one referenced pointer that can be passed
in. All copies of the pointer being released are invalidated as a result of
-invoking kfunc with this flag. KF_RELEASE kfuncs automatically receive the
-protection afforded by the KF_TRUSTED_ARGS flag described below.
-
-2.4.4 KF_TRUSTED_ARGS flag
---------------------------
+invoking kfunc with this flag.
-The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
-indicates that the all pointer arguments are valid, and that all pointers to
-BTF objects have been passed in their unmodified form (that is, at a zero
-offset, and without having been obtained from walking another pointer, with one
-exception described below).
-
-There are two types of pointers to kernel objects which are considered "valid":
-
-1. Pointers which are passed as tracepoint or struct_ops callback arguments.
-2. Pointers which were returned from a KF_ACQUIRE kfunc.
-
-Pointers to non-BTF objects (e.g. scalar pointers) may also be passed to
-KF_TRUSTED_ARGS kfuncs, and may have a non-zero offset.
-
-The definition of "valid" pointers is subject to change at any time, and has
-absolutely no ABI stability guarantees.
-
-As mentioned above, a nested pointer obtained from walking a trusted pointer is
-no longer trusted, with one exception. If a struct type has a field that is
-guaranteed to be valid (trusted or rcu, as in KF_RCU description below) as long
-as its parent pointer is valid, the following macros can be used to express
-that to the verifier:
-
-* ``BTF_TYPE_SAFE_TRUSTED``
-* ``BTF_TYPE_SAFE_RCU``
-* ``BTF_TYPE_SAFE_RCU_OR_NULL``
-
-For example,
-
-.. code-block:: c
-
- BTF_TYPE_SAFE_TRUSTED(struct socket) {
- struct sock *sk;
- };
-
-or
-
-.. code-block:: c
-
- BTF_TYPE_SAFE_RCU(struct task_struct) {
- const cpumask_t *cpus_ptr;
- struct css_set __rcu *cgroups;
- struct task_struct __rcu *real_parent;
- struct task_struct *group_leader;
- };
-
-In other words, you must:
-
-1. Wrap the valid pointer type in a ``BTF_TYPE_SAFE_*`` macro.
-
-2. Specify the type and name of the valid nested field. This field must match
- the field in the original type definition exactly.
-
-A new type declared by a ``BTF_TYPE_SAFE_*`` macro also needs to be emitted so
-that it appears in BTF. For example, ``BTF_TYPE_SAFE_TRUSTED(struct socket)``
-is emitted in the ``type_is_trusted()`` function as follows:
-
-.. code-block:: c
-
- BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
-
-
-2.4.5 KF_SLEEPABLE flag
+2.5.4 KF_SLEEPABLE flag
-----------------------
The KF_SLEEPABLE flag is used for kfuncs that may sleep. Such kfuncs can only
be called by sleepable BPF programs (BPF_F_SLEEPABLE).
-2.4.6 KF_DESTRUCTIVE flag
+2.5.5 KF_DESTRUCTIVE flag
--------------------------
The KF_DESTRUCTIVE flag is used to indicate functions calling which is
@@ -324,18 +313,19 @@ rebooting or panicking. Due to this additional restrictions apply to these
calls. At the moment they only require CAP_SYS_BOOT capability, but more can be
added later.
-2.4.7 KF_RCU flag
+2.5.6 KF_RCU flag
-----------------
-The KF_RCU flag is a weaker version of KF_TRUSTED_ARGS. The kfuncs marked with
-KF_RCU expect either PTR_TRUSTED or MEM_RCU arguments. The verifier guarantees
-that the objects are valid and there is no use-after-free. The pointers are not
-NULL, but the object's refcount could have reached zero. The kfuncs need to
-consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
-pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
-also be KF_RET_NULL.
+The KF_RCU flag allows kfuncs to opt out of the default trusted args
+requirement and accept RCU pointers with weaker guarantees. The kfuncs marked
+with KF_RCU expect either PTR_TRUSTED or MEM_RCU arguments. The verifier
+guarantees that the objects are valid and there is no use-after-free. The
+pointers are not NULL, but the object's refcount could have reached zero. The
+kfuncs need to consider doing refcnt != 0 check, especially when returning a
+KF_ACQUIRE pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should
+very likely also be KF_RET_NULL.
-2.4.8 KF_RCU_PROTECTED flag
+2.5.7 KF_RCU_PROTECTED flag
---------------------------
The KF_RCU_PROTECTED flag is used to indicate that the kfunc must be invoked in
@@ -354,7 +344,7 @@ RCU protection but do not take RCU protected arguments.
.. _KF_deprecated_flag:
-2.4.9 KF_DEPRECATED flag
+2.5.8 KF_DEPRECATED flag
------------------------
The KF_DEPRECATED flag is used for kfuncs which are scheduled to be
@@ -374,7 +364,39 @@ encouraged to make their use-cases known as early as possible, and participate
in upstream discussions regarding whether to keep, change, deprecate, or remove
those kfuncs if and when such discussions occur.
-2.5 Registering the kfuncs
+2.5.9 KF_IMPLICIT_ARGS flag
+------------------------------------
+
+The KF_IMPLICIT_ARGS flag is used to indicate that the BPF signature
+of the kfunc is different from it's kernel signature, and the values
+for implicit arguments are provided at load time by the verifier.
+
+Only arguments of specific types are implicit.
+Currently only ``struct bpf_prog_aux *`` type is supported.
+
+A kfunc with KF_IMPLICIT_ARGS flag therefore has two types in BTF: one
+function matching the kernel declaration (with _impl suffix in the
+name by convention), and another matching the intended BPF API.
+
+Verifier only allows calls to the non-_impl version of a kfunc, that
+uses a signature without the implicit arguments.
+
+Example declaration:
+
+.. code-block:: c
+
+ __bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw,
+ void *map__map, bpf_task_work_callback_t callback,
+ struct bpf_prog_aux *aux) { ... }
+
+Example usage in BPF program:
+
+.. code-block:: c
+
+ /* note that the last argument is omitted */
+ bpf_task_work_schedule_signal(task, &work->tw, &arrmap, task_work_callback);
+
+2.6 Registering the kfuncs
--------------------------
Once the kfunc is prepared for use, the final step to making it visible is
@@ -397,7 +419,7 @@ type. An example is shown below::
}
late_initcall(init_subsystem);
-2.6 Specifying no-cast aliases with ___init
+2.7 Specifying no-cast aliases with ___init
--------------------------------------------
The verifier will always enforce that the BTF type of a pointer passed to a
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 0cf97dbab29d..6b373e193548 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -38,7 +38,7 @@ bash 4.2 bash --version
binutils 2.30 ld -v
flex 2.5.35 flex --version
bison 2.0 bison --version
-pahole 1.16 pahole --version
+pahole 1.22 pahole --version
util-linux 2.10o mount --version
kmod 13 depmod -V
e2fsprogs 1.41.4 e2fsck -V
@@ -143,7 +143,7 @@ pahole
Since Linux 5.2, if CONFIG_DEBUG_INFO_BTF is selected, the build system
generates BTF (BPF Type Format) from DWARF in vmlinux, a bit later from kernel
-modules as well. This requires pahole v1.16 or later.
+modules as well. This requires pahole v1.22 or later.
It is found in the 'dwarves' or 'pahole' distro packages or from
https://fedorapeople.org/~acme/dwarves/.
diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst
index 404fe6126a76..9e2882d937b4 100644
--- a/Documentation/scheduler/sched-ext.rst
+++ b/Documentation/scheduler/sched-ext.rst
@@ -43,7 +43,6 @@ options should be enabled to use sched_ext:
CONFIG_DEBUG_INFO_BTF=y
CONFIG_BPF_JIT_ALWAYS_ON=y
CONFIG_BPF_JIT_DEFAULT_ON=y
- CONFIG_PAHOLE_HAS_SPLIT_BTF=y
CONFIG_PAHOLE_HAS_BTF_TAG=y
sched_ext is used only when the BPF scheduler is loaded and running.
diff --git a/MAINTAINERS b/MAINTAINERS
index 67a650d11d19..75c33ba3db87 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4772,6 +4772,7 @@ F: net/sched/act_bpf.c
F: net/sched/cls_bpf.c
F: samples/bpf/
F: scripts/bpf_doc.py
+F: scripts/gen-btf.sh
F: scripts/Makefile.btf
F: scripts/pahole-version.sh
F: tools/bpf/
@@ -4804,6 +4805,15 @@ L: bpf@vger.kernel.org
S: Maintained
F: tools/lib/bpf/
+BPF [MEMORY MANAGEMENT EXTENSIONS]
+M: Roman Gushchin <roman.gushchin@linux.dev>
+M: JP Kobryn <inwardvessel@gmail.com>
+M: Shakeel Butt <shakeel.butt@linux.dev>
+L: bpf@vger.kernel.org
+L: linux-mm@kvack.org
+S: Maintained
+F: mm/bpf_memcontrol.c
+
BPF [MISC]
L: bpf@vger.kernel.org
S: Odd Fixes
@@ -4853,6 +4863,7 @@ S: Maintained
F: Documentation/bpf/prog_lsm.rst
F: include/linux/bpf_lsm.h
F: kernel/bpf/bpf_lsm.c
+F: kernel/bpf/bpf_lsm_proto.c
F: kernel/trace/bpf_trace.c
F: security/bpf/
diff --git a/Makefile b/Makefile
index 6b283d201799..d957ece15ca6 100644
--- a/Makefile
+++ b/Makefile
@@ -708,11 +708,12 @@ endif
# The expansion should be delayed until arch/$(SRCARCH)/Makefile is included.
# Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile.
-# CC_VERSION_TEXT and RUSTC_VERSION_TEXT are referenced from Kconfig (so they
-# need export), and from include/config/auto.conf.cmd to detect the compiler
-# upgrade.
+# CC_VERSION_TEXT, RUSTC_VERSION_TEXT and PAHOLE_VERSION are referenced from
+# Kconfig (so they need export), and from include/config/auto.conf.cmd to
+# detect the version changes between builds.
CC_VERSION_TEXT = $(subst $(pound),,$(shell LC_ALL=C $(CC) --version 2>/dev/null | head -n 1))
RUSTC_VERSION_TEXT = $(subst $(pound),,$(shell $(RUSTC) --version 2>/dev/null))
+PAHOLE_VERSION = $(shell $(srctree)/scripts/pahole-version.sh $(PAHOLE))
ifneq ($(findstring clang,$(CC_VERSION_TEXT)),)
include $(srctree)/scripts/Makefile.clang
@@ -733,7 +734,7 @@ ifdef config-build
# KBUILD_DEFCONFIG may point out an alternative default configuration
# used for 'make defconfig'
include $(srctree)/arch/$(SRCARCH)/Makefile
-export KBUILD_DEFCONFIG KBUILD_KCONFIG CC_VERSION_TEXT RUSTC_VERSION_TEXT
+export KBUILD_DEFCONFIG KBUILD_KCONFIG CC_VERSION_TEXT RUSTC_VERSION_TEXT PAHOLE_VERSION
config: outputmakefile scripts_basic FORCE
$(Q)$(MAKE) $(build)=scripts/kconfig $@
@@ -1928,12 +1929,18 @@ clean: private rm-files := Module.symvers modules.nsdeps compile_commands.json
PHONY += prepare
# now expand this into a simple variable to reduce the cost of shell evaluations
prepare: CC_VERSION_TEXT := $(CC_VERSION_TEXT)
+prepare: PAHOLE_VERSION := $(PAHOLE_VERSION)
prepare:
@if [ "$(CC_VERSION_TEXT)" != "$(CONFIG_CC_VERSION_TEXT)" ]; then \
echo >&2 "warning: the compiler differs from the one used to build the kernel"; \
echo >&2 " The kernel was built by: $(CONFIG_CC_VERSION_TEXT)"; \
echo >&2 " You are using: $(CC_VERSION_TEXT)"; \
fi
+ @if [ "$(PAHOLE_VERSION)" != "$(CONFIG_PAHOLE_VERSION)" ]; then \
+ echo >&2 "warning: pahole version differs from the one used to build the kernel"; \
+ echo >&2 " The kernel was built with: $(CONFIG_PAHOLE_VERSION)"; \
+ echo >&2 " You are using: $(PAHOLE_VERSION)"; \
+ fi
PHONY += help
help:
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 5ce82edc508e..d6f278b04acf 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -118,7 +118,7 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
static inline void emit_u32_data(const u32 data, struct jit_ctx *ctx)
{
if (ctx->image != NULL && ctx->write)
- ctx->image[ctx->idx] = data;
+ ctx->image[ctx->idx] = (__force __le32)data;
ctx->idx++;
}
@@ -2503,6 +2503,12 @@ static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS;
}
+static void store_func_meta(struct jit_ctx *ctx, u64 func_meta, int func_meta_off)
+{
+ emit_a64_mov_i64(A64_R(10), func_meta, ctx);
+ emit(A64_STR64I(A64_R(10), A64_SP, func_meta_off), ctx);
+}
+
/* Based on the x86's implementation of arch_prepare_bpf_trampoline().
*
* bpf prog and function entry before bpf trampoline hooked:
@@ -2526,7 +2532,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
int regs_off;
int retval_off;
int bargs_off;
- int nfuncargs_off;
+ int func_meta_off;
int ip_off;
int run_ctx_off;
int oargs_off;
@@ -2537,6 +2543,9 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
bool save_ret;
__le32 **branches = NULL;
bool is_struct_ops = is_struct_ops_tramp(fentry);
+ int cookie_off, cookie_cnt, cookie_bargs_off;
+ int fsession_cnt = bpf_fsession_cnt(tlinks);
+ u64 func_meta;
/* trampoline stack layout:
* [ parent ip ]
@@ -2555,10 +2564,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
* [ ... ]
* SP + bargs_off [ arg reg 1 ] for bpf
*
- * SP + nfuncargs_off [ arg regs count ]
+ * SP + func_meta_off [ regs count, etc ]
*
* SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
+ * [ stack cookie N ]
+ * [ ... ]
+ * SP + cookie_off [ stack cookie 1 ]
+ *
* SP + run_ctx_off [ bpf_tramp_run_ctx ]
*
* [ stack arg N ]
@@ -2575,13 +2588,18 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
/* room for bpf_tramp_run_ctx */
stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
+ cookie_off = stack_size;
+ /* room for session cookies */
+ cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ stack_size += cookie_cnt * 8;
+
ip_off = stack_size;
/* room for IP address argument */
if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8;
- nfuncargs_off = stack_size;
- /* room for args count */
+ func_meta_off = stack_size;
+ /* room for function metadata, such as regs count */
stack_size += 8;
bargs_off = stack_size;
@@ -2639,9 +2657,9 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
}
- /* save arg regs count*/
- emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx);
- emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx);
+ /* save function metadata */
+ func_meta = nfuncargs;
+ store_func_meta(ctx, func_meta, func_meta_off);
/* save args for bpf */
save_args(ctx, bargs_off, oargs_off, m, a, false);
@@ -2659,10 +2677,27 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit_call((const u64)__bpf_tramp_enter, ctx);
}
- for (i = 0; i < fentry->nr_links; i++)
+ if (fsession_cnt) {
+ /* clear all the session cookies' value */
+ emit(A64_MOVZ(1, A64_R(10), 0, 0), ctx);
+ for (int i = 0; i < cookie_cnt; i++)
+ emit(A64_STR64I(A64_R(10), A64_SP, cookie_off + 8 * i), ctx);
+ /* clear the return value to make sure fentry always gets 0 */
+ emit(A64_STR64I(A64_R(10), A64_SP, retval_off), ctx);
+ }
+
+ cookie_bargs_off = (bargs_off - cookie_off) / 8;
+ for (i = 0; i < fentry->nr_links; i++) {
+ if (bpf_prog_calls_session_cookie(fentry->links[i])) {
+ u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
+
+ store_func_meta(ctx, meta, func_meta_off);
+ cookie_bargs_off--;
+ }
invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
retval_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET);
+ }
if (fmod_ret->nr_links) {
branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
@@ -2694,9 +2729,22 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
*branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
}
- for (i = 0; i < fexit->nr_links; i++)
+ /* set the "is_return" flag for fsession */
+ func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
+ if (fsession_cnt)
+ store_func_meta(ctx, func_meta, func_meta_off);
+
+ cookie_bargs_off = (bargs_off - cookie_off) / 8;
+ for (i = 0; i < fexit->nr_links; i++) {
+ if (bpf_prog_calls_session_cookie(fexit->links[i])) {
+ u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
+
+ store_func_meta(ctx, meta, func_meta_off);
+ cookie_bargs_off--;
+ }
invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
run_ctx_off, false);
+ }
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = ctx->ro_image + ctx->idx;
@@ -2746,6 +2794,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
return ctx->idx;
}
+bool bpf_jit_supports_fsession(void)
+{
+ return true;
+}
+
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
@@ -3132,7 +3185,7 @@ void bpf_jit_free(struct bpf_prog *prog)
bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
kfree(jit_data);
}
- prog->bpf_func -= cfi_get_offset();
+ prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset();
hdr = bpf_jit_binary_pack_hdr(prog);
bpf_jit_binary_pack_free(hdr, NULL);
priv_stack_ptr = prog->aux->priv_stack_ptr;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 80527299f859..53bf2cf7ff6f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -336,6 +336,7 @@ config X86
select SCHED_SMT if SMP
select ARCH_SUPPORTS_SCHED_CLUSTER if SMP
select ARCH_SUPPORTS_SCHED_MC if SMP
+ select HAVE_SINGLE_FTRACE_DIRECT_OPS if X86_64 && DYNAMIC_FTRACE_WITH_DIRECT_CALLS
config INSTRUCTION_DECODER
def_bool y
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b08c95872eed..c56e1e63b893 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -57,7 +57,7 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
}
#define arch_ftrace_partial_regs(regs) do { \
- regs->flags &= ~X86_EFLAGS_FIXED; \
+ regs->flags |= X86_EFLAGS_FIXED; \
regs->cs = __KERNEL_CS; \
} while (0)
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index a132608265f6..62c1c93aa1c6 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -364,6 +364,9 @@ SYM_CODE_START(return_to_handler)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
+ /* Store original rsp for pt_regs.sp value. */
+ movq %rsp, %rdi
+
/* Restore return_to_handler value that got eaten by previous ret instruction. */
subq $8, %rsp
UNWIND_HINT_FUNC
@@ -374,7 +377,7 @@ SYM_CODE_START(return_to_handler)
movq %rax, RAX(%rsp)
movq %rdx, RDX(%rsp)
movq %rbp, RBP(%rsp)
- movq %rsp, RSP(%rsp)
+ movq %rdi, RSP(%rsp)
movq %rsp, %rdi
call ftrace_return_to_handler
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b0bac2a66eff..070ba80e39d7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1300,12 +1300,23 @@ static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
}
+static void emit_store_stack_imm64(u8 **pprog, int reg, int stack_off, u64 imm64)
+{
+ /*
+ * mov reg, imm64
+ * mov QWORD PTR [rbp + stack_off], reg
+ */
+ emit_mov_imm64(pprog, reg, imm64 >> 32, (u32) imm64);
+ emit_stx(pprog, BPF_DW, BPF_REG_FP, reg, stack_off);
+}
+
static int emit_atomic_rmw(u8 **pprog, u32 atomic_op,
u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
{
u8 *prog = *pprog;
- EMIT1(0xF0); /* lock prefix */
+ if (atomic_op != BPF_XCHG)
+ EMIT1(0xF0); /* lock prefix */
maybe_emit_mod(&prog, dst_reg, src_reg, bpf_size == BPF_DW);
@@ -1347,7 +1358,9 @@ static int emit_atomic_rmw_index(u8 **pprog, u32 atomic_op, u32 size,
{
u8 *prog = *pprog;
- EMIT1(0xF0); /* lock prefix */
+ if (atomic_op != BPF_XCHG)
+ EMIT1(0xF0); /* lock prefix */
+
switch (size) {
case BPF_W:
EMIT1(add_3mod(0x40, dst_reg, src_reg, index_reg));
@@ -3081,13 +3094,19 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
- int run_ctx_off, bool save_ret,
- void *image, void *rw_image)
+ int run_ctx_off, int func_meta_off, bool save_ret,
+ void *image, void *rw_image, u64 func_meta,
+ int cookie_off)
{
- int i;
+ int i, cur_cookie = (cookie_off - stack_size) / 8;
u8 *prog = *pprog;
for (i = 0; i < tl->nr_links; i++) {
+ if (tl->links[i]->link.prog->call_session_cookie) {
+ emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off,
+ func_meta | (cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT));
+ cur_cookie--;
+ }
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
run_ctx_off, save_ret, image, rw_image))
return -EINVAL;
@@ -3205,12 +3224,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
void *func_addr)
{
int i, ret, nr_regs = m->nr_args, stack_size = 0;
- int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
+ int regs_off, func_meta_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
void *orig_call = func_addr;
+ int cookie_off, cookie_cnt;
u8 **branches = NULL;
+ u64 func_meta;
u8 *prog;
bool save_ret;
@@ -3246,7 +3267,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* [ ... ]
* RBP - regs_off [ reg_arg1 ] program's ctx pointer
*
- * RBP - nregs_off [ regs count ] always
+ * RBP - func_meta_off [ regs count, etc ] always
*
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
@@ -3269,15 +3290,20 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
stack_size += nr_regs * 8;
regs_off = stack_size;
- /* regs count */
+ /* function matedata, such as regs count */
stack_size += 8;
- nregs_off = stack_size;
+ func_meta_off = stack_size;
if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8; /* room for IP address argument */
ip_off = stack_size;
+ cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ /* room for session cookies */
+ stack_size += cookie_cnt * 8;
+ cookie_off = stack_size;
+
stack_size += 8;
rbx_off = stack_size;
@@ -3345,20 +3371,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* mov QWORD PTR [rbp - rbx_off], rbx */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off);
- /* Store number of argument registers of the traced function:
- * mov rax, nr_regs
- * mov QWORD PTR [rbp - nregs_off], rax
- */
- emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_regs);
- emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -nregs_off);
+ func_meta = nr_regs;
+ /* Store number of argument registers of the traced function */
+ emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off, func_meta);
if (flags & BPF_TRAMP_F_IP_ARG) {
- /* Store IP address of the traced function:
- * movabsq rax, func_addr
- * mov QWORD PTR [rbp - ip_off], rax
- */
- emit_mov_imm64(&prog, BPF_REG_0, (long) func_addr >> 32, (u32) (long) func_addr);
- emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
+ /* Store IP address of the traced function */
+ emit_store_stack_imm64(&prog, BPF_REG_0, -ip_off, (long)func_addr);
}
save_args(m, &prog, regs_off, false, flags);
@@ -3373,9 +3392,18 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
+ if (bpf_fsession_cnt(tlinks)) {
+ /* clear all the session cookies' value */
+ for (int i = 0; i < cookie_cnt; i++)
+ emit_store_stack_imm64(&prog, BPF_REG_0, -cookie_off + 8 * i, 0);
+ /* clear the return value to make sure fentry always get 0 */
+ emit_store_stack_imm64(&prog, BPF_REG_0, -8, 0);
+ }
+
if (fentry->nr_links) {
- if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
- flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
+ if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, func_meta_off,
+ flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image,
+ func_meta, cookie_off))
return -EINVAL;
}
@@ -3435,9 +3463,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
+ /* set the "is_return" flag for fsession */
+ func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
+ if (bpf_fsession_cnt(tlinks))
+ emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off, func_meta);
+
if (fexit->nr_links) {
- if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
- false, image, rw_image)) {
+ if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, func_meta_off,
+ false, image, rw_image, func_meta, cookie_off)) {
ret = -EINVAL;
goto cleanup;
}
@@ -4079,3 +4112,8 @@ bool bpf_jit_supports_timed_may_goto(void)
{
return true;
}
+
+bool bpf_jit_supports_fsession(void)
+{
+ return true;
+}
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c
index 9a06f9b0e4ef..892aca026ffa 100644
--- a/drivers/hid/bpf/hid_bpf_dispatch.c
+++ b/drivers/hid/bpf/hid_bpf_dispatch.c
@@ -295,9 +295,6 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr
{
struct hid_bpf_ctx_kern *ctx_kern;
- if (!ctx)
- return NULL;
-
ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx);
if (rdwr_buf_size + offset > ctx->allocated_size)
@@ -364,7 +361,7 @@ __hid_bpf_hw_check_params(struct hid_bpf_ctx *ctx, __u8 *buf, size_t *buf__sz,
u32 report_len;
/* check arguments */
- if (!ctx || !hid_ops || !buf)
+ if (!hid_ops)
return -EINVAL;
switch (rtype) {
diff --git a/drivers/hid/bpf/progs/hid_bpf_helpers.h b/drivers/hid/bpf/progs/hid_bpf_helpers.h
index bf19785a6b06..228f8d787567 100644
--- a/drivers/hid/bpf/progs/hid_bpf_helpers.h
+++ b/drivers/hid/bpf/progs/hid_bpf_helpers.h
@@ -33,11 +33,9 @@ extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx,
/* bpf_wq implementation */
extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
-extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, void *value),
- unsigned int flags__k, void *aux__ign) __ksym;
-#define bpf_wq_set_callback(wq, cb, flags) \
- bpf_wq_set_callback_impl(wq, cb, flags, NULL)
+extern int bpf_wq_set_callback(struct bpf_wq *wq,
+ int (*callback_fn)(void *, int *, void *),
+ unsigned int flags) __weak __ksym;
#define HID_MAX_DESCRIPTOR_SIZE 4096
#define HID_IGNORE_EVENT -1
diff --git a/fs/bpf_fs_kfuncs.c b/fs/bpf_fs_kfuncs.c
index 5ace2511fec5..e4e51a1d0de2 100644
--- a/fs/bpf_fs_kfuncs.c
+++ b/fs/bpf_fs_kfuncs.c
@@ -68,10 +68,7 @@ __bpf_kfunc void bpf_put_file(struct file *file)
*
* Resolve the pathname for the supplied *path* and store it in *buf*. This BPF
* kfunc is the safer variant of the legacy bpf_d_path() helper and should be
- * used in place of bpf_d_path() whenever possible. It enforces KF_TRUSTED_ARGS
- * semantics, meaning that the supplied *path* must itself hold a valid
- * reference, or else the BPF program will be outright rejected by the BPF
- * verifier.
+ * used in place of bpf_d_path() whenever possible.
*
* This BPF kfunc may only be called from BPF LSM programs.
*
@@ -359,14 +356,13 @@ __bpf_kfunc int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__s
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
-BTF_ID_FLAGS(func, bpf_get_task_exe_file,
- KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_get_task_exe_file, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_set_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_remove_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_path_d_path)
+BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_set_dentry_xattr, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_remove_dentry_xattr, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)
static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
@@ -377,9 +373,8 @@ static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
return -EACCES;
}
-/* bpf_[set|remove]_dentry_xattr.* hooks have KF_TRUSTED_ARGS and
- * KF_SLEEPABLE, so they are only available to sleepable hooks with
- * dentry arguments.
+/* bpf_[set|remove]_dentry_xattr.* hooks have KF_SLEEPABLE, so they are only
+ * available to sleepable hooks with dentry arguments.
*
* Setting and removing xattr requires exclusive lock on dentry->d_inode.
* Some hooks already locked d_inode, while some hooks have not locked
diff --git a/fs/verity/measure.c b/fs/verity/measure.c
index 388734132f01..6a35623ebdf0 100644
--- a/fs/verity/measure.c
+++ b/fs/verity/measure.c
@@ -162,7 +162,7 @@ __bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *di
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(fsverity_set_ids)
-BTF_ID_FLAGS(func, bpf_get_fsverity_digest, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_get_fsverity_digest)
BTF_KFUNCS_END(fsverity_set_ids)
static int bpf_get_fsverity_digest_filter(const struct bpf_prog *prog, u32 kfunc_id)
diff --git a/include/asm-generic/rqspinlock.h b/include/asm-generic/rqspinlock.h
index 0f2dcbbfee2f..5c5cf2f7fc39 100644
--- a/include/asm-generic/rqspinlock.h
+++ b/include/asm-generic/rqspinlock.h
@@ -191,7 +191,7 @@ static __always_inline int res_spin_lock(rqspinlock_t *lock)
#else
-#define res_spin_lock(lock) resilient_tas_spin_lock(lock)
+#define res_spin_lock(lock) ({ grab_held_lock_entry(lock); resilient_tas_spin_lock(lock); })
#endif /* CONFIG_QUEUED_SPINLOCKS */
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index d1eb5c7729cb..2f535331f926 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -172,7 +172,7 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
-int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value, u64 flags);
int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
void *value, u64 flags);
@@ -470,7 +470,7 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
static inline void bpf_cgroup_storage_free(
struct bpf_cgroup_storage *storage) {}
static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
- void *value) {
+ void *value, u64 flags) {
return 0;
}
static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e5be698256d1..cd9b96434904 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -287,6 +287,7 @@ struct bpf_map_owner {
enum bpf_prog_type type;
bool jited;
bool xdp_has_frags;
+ bool sleepable;
u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE];
const struct btf_type *attach_func_proto;
enum bpf_attach_type expected_attach_type;
@@ -673,6 +674,22 @@ void bpf_map_free_internal_structs(struct bpf_map *map, void *obj);
int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
struct bpf_dynptr *ptr__uninit);
+#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, int node_id,
+ u64 flags);
+void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt);
+#else
+static inline void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt,
+ int node_id, u64 flags)
+{
+ return NULL;
+}
+
+static inline void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt)
+{
+}
+#endif
+
extern const struct bpf_map_ops bpf_map_offload_ops;
/* bpf_type_flag contains a set of flags that are applicable to the values of
@@ -737,7 +754,7 @@ enum bpf_type_flag {
MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS),
/* PTR was passed from the kernel in a trusted context, and may be
- * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions.
+ * passed to kfuncs or BPF helper functions.
* Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above.
* PTR_UNTRUSTED refers to a kptr that was read directly from a map
* without invoking bpf_kptr_xchg(). What we really need to know is
@@ -1213,6 +1230,9 @@ enum {
#endif
};
+#define BPF_TRAMP_COOKIE_INDEX_SHIFT 8
+#define BPF_TRAMP_IS_RETURN_SHIFT 63
+
struct bpf_tramp_links {
struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS];
int nr_links;
@@ -1293,6 +1313,7 @@ enum bpf_tramp_prog_type {
BPF_TRAMP_MODIFY_RETURN,
BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */
+ BPF_TRAMP_FSESSION,
};
struct bpf_tramp_image {
@@ -1309,14 +1330,17 @@ struct bpf_tramp_image {
};
struct bpf_trampoline {
- /* hlist for trampoline_table */
- struct hlist_node hlist;
+ /* hlist for trampoline_key_table */
+ struct hlist_node hlist_key;
+ /* hlist for trampoline_ip_table */
+ struct hlist_node hlist_ip;
struct ftrace_ops *fops;
/* serializes access to fields of this trampoline */
struct mutex mutex;
refcount_t refcnt;
u32 flags;
u64 key;
+ unsigned long ip;
struct {
struct btf_func_model model;
void *addr;
@@ -1418,7 +1442,7 @@ bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset,
void *src, u64 len, u64 flags);
void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
- void *buffer__opt, u64 buffer__szk);
+ void *buffer__nullable, u64 buffer__szk);
static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len)
{
@@ -1742,8 +1766,12 @@ struct bpf_prog_aux {
struct rcu_head rcu;
};
struct bpf_stream stream[2];
+ struct mutex st_ops_assoc_mutex;
+ struct bpf_map __rcu *st_ops_assoc;
};
+#define BPF_NR_CONTEXTS 4 /* normal, softirq, hardirq, NMI */
+
struct bpf_prog {
u16 pages; /* Number of allocated pages */
u16 jited:1, /* Is our filter JIT'ed? */
@@ -1759,6 +1787,7 @@ struct bpf_prog {
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_func_ip:1, /* Do we call get_func_ip() */
+ call_session_cookie:1, /* Do we call bpf_session_cookie() */
tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
sleepable:1; /* BPF program is sleepable */
enum bpf_prog_type type; /* Type of BPF program */
@@ -1770,7 +1799,7 @@ struct bpf_prog {
u8 tag[BPF_TAG_SIZE];
};
struct bpf_prog_stats __percpu *stats;
- int __percpu *active;
+ u8 __percpu *active; /* u8[BPF_NR_CONTEXTS] for recursion protection */
unsigned int (*bpf_func)(const void *ctx,
const struct bpf_insn *insn);
struct bpf_prog_aux *aux; /* Auxiliary fields */
@@ -1855,6 +1884,11 @@ struct bpf_tracing_link {
struct bpf_prog *tgt_prog;
};
+struct bpf_fsession_link {
+ struct bpf_tracing_link link;
+ struct bpf_tramp_link fexit;
+};
+
struct bpf_raw_tp_link {
struct bpf_link link;
struct bpf_raw_event_map *btp;
@@ -2002,6 +2036,40 @@ struct bpf_struct_ops_common_value {
enum bpf_struct_ops_state state;
};
+static inline bool bpf_prog_get_recursion_context(struct bpf_prog *prog)
+{
+#ifdef CONFIG_ARM64
+ u8 rctx = interrupt_context_level();
+ u8 *active = this_cpu_ptr(prog->active);
+ u32 val;
+
+ preempt_disable();
+ active[rctx]++;
+ val = le32_to_cpu(*(__le32 *)active);
+ preempt_enable();
+ if (val != BIT(rctx * 8))
+ return false;
+
+ return true;
+#else
+ return this_cpu_inc_return(*(int __percpu *)(prog->active)) == 1;
+#endif
+}
+
+static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog)
+{
+#ifdef CONFIG_ARM64
+ u8 rctx = interrupt_context_level();
+ u8 *active = this_cpu_ptr(prog->active);
+
+ preempt_disable();
+ active[rctx]--;
+ preempt_enable();
+#else
+ this_cpu_dec(*(int __percpu *)(prog->active));
+#endif
+}
+
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
/* This macro helps developer to register a struct_ops type and generate
* type information correctly. Developers should use this macro to register
@@ -2044,6 +2112,9 @@ static inline void bpf_module_put(const void *data, struct module *owner)
module_put(owner);
}
int bpf_struct_ops_link_create(union bpf_attr *attr);
+int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map);
+void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog);
+void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux);
u32 bpf_struct_ops_id(const void *kdata);
#ifdef CONFIG_NET
@@ -2091,6 +2162,17 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
{
return -EOPNOTSUPP;
}
+static inline int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map)
+{
+ return -EOPNOTSUPP;
+}
+static inline void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog)
+{
+}
+static inline void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux)
+{
+ return NULL;
+}
static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
{
}
@@ -2101,6 +2183,37 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
#endif
+static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
+{
+ struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ int cnt = 0;
+
+ for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
+ if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION)
+ cnt++;
+ }
+
+ return cnt;
+}
+
+static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_link *link)
+{
+ return link->link.prog->call_session_cookie;
+}
+
+static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links)
+{
+ struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ int cnt = 0;
+
+ for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
+ if (bpf_prog_calls_session_cookie(fentries.links[i]))
+ cnt++;
+ }
+
+ return cnt;
+}
+
int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
const struct bpf_ctx_arg_aux *info, u32 cnt);
@@ -2540,6 +2653,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
unsigned long nr_pages, struct page **page_array);
#ifdef CONFIG_MEMCG
+void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg);
+void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *memcg);
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node);
void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
@@ -2564,6 +2681,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
kvcalloc(_n, _size, _flags)
#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \
__alloc_percpu_gfp(_size, _align, _flags)
+static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg)
+{
+ *new_memcg = NULL;
+ *old_memcg = NULL;
+}
+
+static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *memcg)
+{
+}
#endif
static inline int
@@ -2764,8 +2892,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
-int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags);
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
@@ -3243,6 +3371,11 @@ static inline void bpf_prog_report_arena_violation(bool write, unsigned long add
}
#endif /* CONFIG_BPF_SYSCALL */
+static inline bool bpf_net_capable(void)
+{
+ return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
+}
+
static __always_inline int
bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
{
@@ -3832,14 +3965,43 @@ bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image)
}
#endif
+static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type)
+{
+ switch (map_type) {
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags)
{
- if (flags & ~allowed_flags)
+ u32 cpu;
+
+ if ((u32)flags & ~allowed_flags)
return -EINVAL;
if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
return -EINVAL;
+ if (!(flags & BPF_F_CPU) && flags >> 32)
+ return -EINVAL;
+
+ if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) {
+ if (!bpf_map_supports_cpu_flags(map->map_type))
+ return -EINVAL;
+ if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS))
+ return -EINVAL;
+
+ cpu = flags >> 32;
+ if ((flags & BPF_F_CPU) && cpu >= num_possible_cpus())
+ return -ERANGE;
+ }
+
return 0;
}
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 66432248cd81..85efa9772530 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -15,12 +15,13 @@
#include <linux/types.h>
#include <linux/bpf_mem_alloc.h>
#include <uapi/linux/btf.h>
+#include <asm/rqspinlock.h>
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
struct bpf_local_storage_map_bucket {
struct hlist_head list;
- raw_spinlock_t lock;
+ rqspinlock_t lock;
};
/* Thp map is not the primary owner of a bpf_local_storage_elem.
@@ -67,6 +68,11 @@ struct bpf_local_storage_data {
u8 data[] __aligned(8);
};
+#define SELEM_MAP_UNLINKED (1 << 0)
+#define SELEM_STORAGE_UNLINKED (1 << 1)
+#define SELEM_UNLINKED (SELEM_MAP_UNLINKED | SELEM_STORAGE_UNLINKED)
+#define SELEM_TOFREE (1 << 2)
+
/* Linked to bpf_local_storage and bpf_local_storage_map */
struct bpf_local_storage_elem {
struct hlist_node map_node; /* Linked to bpf_local_storage_map */
@@ -79,7 +85,9 @@ struct bpf_local_storage_elem {
* after raw_spin_unlock
*/
};
- /* 8 bytes hole */
+ atomic_t state;
+ bool use_kmalloc_nolock;
+ /* 3 bytes hole */
/* The data is stored in another cacheline to minimize
* the number of cachelines access during a cache hit.
*/
@@ -88,13 +96,14 @@ struct bpf_local_storage_elem {
struct bpf_local_storage {
struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
- struct bpf_local_storage_map __rcu *smap;
struct hlist_head list; /* List of bpf_local_storage_elem */
void *owner; /* The object that owns the above "list" of
* bpf_local_storage_elem.
*/
struct rcu_head rcu;
- raw_spinlock_t lock; /* Protect adding/removing from the "list" */
+ rqspinlock_t lock; /* Protect adding/removing from the "list" */
+ u64 mem_charge; /* Copy of mem charged to owner. Protected by "lock" */
+ refcount_t owner_refcnt;/* Used to pin owner when map_free is uncharging */
bool use_kmalloc_nolock;
};
@@ -162,11 +171,10 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
return SDATA(selem);
}
-void bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
+u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
void bpf_local_storage_map_free(struct bpf_map *map,
- struct bpf_local_storage_cache *cache,
- int __percpu *busy_counter);
+ struct bpf_local_storage_cache *cache);
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
const struct btf *btf,
@@ -176,10 +184,11 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem);
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now);
+int bpf_selem_unlink(struct bpf_local_storage_elem *selem);
-void bpf_selem_link_map(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem);
+int bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem);
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
diff --git a/include/linux/bpf_mprog.h b/include/linux/bpf_mprog.h
index 929225f7b095..0b9f4caeeb0a 100644
--- a/include/linux/bpf_mprog.h
+++ b/include/linux/bpf_mprog.h
@@ -340,4 +340,14 @@ static inline bool bpf_mprog_supported(enum bpf_prog_type type)
return false;
}
}
+
+static inline bool bpf_mprog_detach_empty(enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return bpf_net_capable();
+ default:
+ return false;
+ }
+}
#endif /* __BPF_MPROG_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 130bcbd66f60..ef8e45a362d9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -147,8 +147,12 @@ struct bpf_reg_state {
* registers. Example:
* r1 = r2; both will have r1->id == r2->id == N
* r1 += 10; r1->id == N | BPF_ADD_CONST and r1->off == 10
+ * r3 = r2; both will have r3->id == r2->id == N
+ * w3 += 10; r3->id == N | BPF_ADD_CONST32 and r3->off == 10
*/
-#define BPF_ADD_CONST (1U << 31)
+#define BPF_ADD_CONST64 (1U << 31)
+#define BPF_ADD_CONST32 (1U << 30)
+#define BPF_ADD_CONST (BPF_ADD_CONST64 | BPF_ADD_CONST32)
u32 id;
/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
* from a pointer-cast helper, bpf_sk_fullsock() and
@@ -692,12 +696,16 @@ struct bpf_id_pair {
struct bpf_idmap {
u32 tmp_id_gen;
+ u32 cnt;
struct bpf_id_pair map[BPF_ID_MAP_SIZE];
};
struct bpf_idset {
- u32 count;
- u32 ids[BPF_ID_MAP_SIZE];
+ u32 num_ids;
+ struct {
+ u32 id;
+ u32 cnt;
+ } entries[BPF_ID_MAP_SIZE];
};
/* see verifier.c:compute_scc_callchain() */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index f06976ffb63f..48108471c5b1 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -34,7 +34,7 @@
*
* And the following kfunc:
*
- * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
+ * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE)
*
* All invocations to the kfunc must pass the unmodified, unwalked task:
*
@@ -66,7 +66,6 @@
* return 0;
* }
*/
-#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
#define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */
@@ -79,6 +78,7 @@
#define KF_ARENA_RET (1 << 13) /* kfunc returns an arena pointer */
#define KF_ARENA_ARG1 (1 << 14) /* kfunc takes an arena pointer as its first argument */
#define KF_ARENA_ARG2 (1 << 15) /* kfunc takes an arena pointer as its second argument */
+#define KF_IMPLICIT_ARGS (1 << 16) /* kfunc has implicit arguments supplied by the verifier */
/*
* Tag marking a kernel function as a kfunc. This is meant to minimize the
@@ -220,6 +220,7 @@ bool btf_is_module(const struct btf *btf);
bool btf_is_vmlinux(const struct btf *btf);
struct module *btf_try_get_module(const struct btf *btf);
u32 btf_nr_types(const struct btf *btf);
+u32 btf_named_start_id(const struct btf *btf, bool own);
struct btf *btf_base_btf(const struct btf *btf);
bool btf_type_is_i32(const struct btf_type *t);
bool btf_type_is_i64(const struct btf_type *t);
@@ -575,8 +576,8 @@ const char *btf_name_by_offset(const struct btf *btf, u32 offset);
const char *btf_str_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
-u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id,
- const struct bpf_prog *prog);
+u32 *btf_kfunc_flags(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog);
+bool btf_kfunc_is_allowed(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog);
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
const struct bpf_prog *prog);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fd54fed8f95f..4e1cb4f91f49 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1167,6 +1167,7 @@ bool bpf_jit_supports_arena(void);
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena);
bool bpf_jit_supports_private_stack(void);
bool bpf_jit_supports_timed_may_goto(void);
+bool bpf_jit_supports_fsession(void);
u64 bpf_arch_uaddress_limit(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
u64 arch_bpf_timed_may_goto(void);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a3a8989e3268..705db0a6d995 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -82,6 +82,7 @@ static inline void early_trace_init(void) { }
struct module;
struct ftrace_hash;
+struct ftrace_func_entry;
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \
defined(CONFIG_DYNAMIC_FTRACE)
@@ -359,7 +360,6 @@ enum {
FTRACE_OPS_FL_DIRECT = BIT(17),
FTRACE_OPS_FL_SUBOP = BIT(18),
FTRACE_OPS_FL_GRAPH = BIT(19),
- FTRACE_OPS_FL_JMP = BIT(20),
};
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -403,9 +403,17 @@ enum ftrace_ops_cmd {
* Negative on failure. The return value is dependent on the
* callback.
*/
-typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd);
+typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, unsigned long ip, enum ftrace_ops_cmd cmd);
#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define FTRACE_HASH_DEFAULT_BITS 10
+
+struct ftrace_hash *alloc_ftrace_hash(int size_bits);
+void free_ftrace_hash(struct ftrace_hash *hash);
+struct ftrace_func_entry *add_ftrace_hash_entry_direct(struct ftrace_hash *hash,
+ unsigned long ip, unsigned long direct);
+
/* The hash used to know what functions callbacks trace */
struct ftrace_ops_hash {
struct ftrace_hash __rcu *notrace_hash;
@@ -535,6 +543,10 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr);
int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr);
+int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash);
+int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash);
+int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock);
+
void ftrace_stub_direct_tramp(void);
#else
@@ -561,6 +573,21 @@ static inline int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned l
return -ENODEV;
}
+static inline int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash)
+{
+ return -ENODEV;
+}
+
+static inline int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash)
+{
+ return -ENODEV;
+}
+
+static inline int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock)
+{
+ return -ENODEV;
+}
+
/*
* This must be implemented by the architecture.
* It is the way the ftrace direct_ops helper, when called
diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h
index 15627ceea9bc..386fa48c4a95 100644
--- a/include/linux/ftrace_regs.h
+++ b/include/linux/ftrace_regs.h
@@ -33,6 +33,31 @@ struct ftrace_regs;
#define ftrace_regs_get_frame_pointer(fregs) \
frame_pointer(&arch_ftrace_regs(fregs)->regs)
+static __always_inline void
+ftrace_partial_regs_update(struct ftrace_regs *fregs, struct pt_regs *regs) { }
+
+#else
+
+/*
+ * ftrace_partial_regs_update - update the original ftrace_regs from regs
+ * @fregs: The ftrace_regs to update from @regs
+ * @regs: The partial regs from ftrace_partial_regs() that was updated
+ *
+ * Some architectures have the partial regs living in the ftrace_regs
+ * structure, whereas other architectures need to make a different copy
+ * of the @regs. If a partial @regs is retrieved by ftrace_partial_regs() and
+ * if the code using @regs updates a field (like the instruction pointer or
+ * stack pointer) it may need to propagate that change to the original @fregs
+ * it retrieved the partial @regs from. Use this function to guarantee that
+ * update happens.
+ */
+static __always_inline void
+ftrace_partial_regs_update(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ ftrace_regs_set_instruction_pointer(fregs, instruction_pointer(regs));
+ ftrace_regs_set_return_value(fregs, regs_return_value(regs));
+}
+
#endif /* HAVE_ARCH_FTRACE_REGS */
/* This can be overridden by the architectures */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5b004b95648b..67f154de10bc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -949,7 +949,11 @@ static inline void mod_memcg_page_state(struct page *page,
rcu_read_unlock();
}
+unsigned long memcg_events(struct mem_cgroup *memcg, int event);
unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
+unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item);
+bool memcg_stat_item_valid(int idx);
+bool memcg_vm_event_item_valid(enum vm_event_item idx);
unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx);
unsigned long lruvec_page_state_local(struct lruvec *lruvec,
enum node_stat_item idx);
@@ -1375,6 +1379,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
return 0;
}
+static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item)
+{
+ return 0;
+}
+
+static inline bool memcg_stat_item_valid(int idx)
+{
+ return false;
+}
+
+static inline bool memcg_vm_event_item_valid(enum vm_event_item idx)
+{
+ return false;
+}
+
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 49847888c287..829b281d6c9c 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -97,6 +97,8 @@ struct sk_psock {
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
spinlock_t ingress_lock;
+ /** @msg_tot_len: Total bytes queued in ingress_msg list. */
+ u32 msg_tot_len;
unsigned long state;
struct list_head link;
spinlock_t link_lock;
@@ -141,6 +143,8 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
+int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags, int *copied_from_self);
bool sk_msg_is_readable(struct sock *sk);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
@@ -319,6 +323,27 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
}
+static inline u32 sk_psock_get_msg_len_nolock(struct sk_psock *psock)
+{
+ /* Used by ioctl to read msg_tot_len only; lock-free for performance */
+ return READ_ONCE(psock->msg_tot_len);
+}
+
+static inline void sk_psock_msg_len_add_locked(struct sk_psock *psock, int diff)
+{
+ /* Use WRITE_ONCE to ensure correct read in sk_psock_get_msg_len_nolock().
+ * ingress_lock should be held to prevent concurrent updates to msg_tot_len
+ */
+ WRITE_ONCE(psock->msg_tot_len, psock->msg_tot_len + diff);
+}
+
+static inline void sk_psock_msg_len_add(struct sk_psock *psock, int diff)
+{
+ spin_lock_bh(&psock->ingress_lock);
+ sk_psock_msg_len_add_locked(psock, diff);
+ spin_unlock_bh(&psock->ingress_lock);
+}
+
static inline bool sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
@@ -327,6 +352,7 @@ static inline bool sk_psock_queue_msg(struct sk_psock *psock,
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
list_add_tail(&msg->list, &psock->ingress_msg);
+ sk_psock_msg_len_add_locked(psock, msg->sg.size);
ret = true;
} else {
sk_msg_free(psock->sk, msg);
@@ -343,18 +369,25 @@ static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
spin_lock_bh(&psock->ingress_lock);
msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
- if (msg)
+ if (msg) {
list_del(&msg->list);
+ sk_psock_msg_len_add_locked(psock, -msg->sg.size);
+ }
spin_unlock_bh(&psock->ingress_lock);
return msg;
}
+static inline struct sk_msg *sk_psock_peek_msg_locked(struct sk_psock *psock)
+{
+ return list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+}
+
static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock)
{
struct sk_msg *msg;
spin_lock_bh(&psock->ingress_lock);
- msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+ msg = sk_psock_peek_msg_locked(psock);
spin_unlock_bh(&psock->ingress_lock);
return msg;
}
@@ -521,6 +554,39 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
return !!psock->saved_data_ready;
}
+/* for tcp only, sk is locked */
+static inline ssize_t sk_psock_msg_inq(struct sock *sk)
+{
+ struct sk_psock *psock;
+ ssize_t inq = 0;
+
+ psock = sk_psock_get(sk);
+ if (likely(psock)) {
+ inq = sk_psock_get_msg_len_nolock(psock);
+ sk_psock_put(sk, psock);
+ }
+ return inq;
+}
+
+/* for udp only, sk is not locked */
+static inline ssize_t sk_msg_first_len(struct sock *sk)
+{
+ struct sk_psock *psock;
+ struct sk_msg *msg;
+ ssize_t inq = 0;
+
+ psock = sk_psock_get(sk);
+ if (likely(psock)) {
+ spin_lock_bh(&psock->ingress_lock);
+ msg = sk_psock_peek_msg_locked(psock);
+ if (msg)
+ inq = msg->sg.size;
+ spin_unlock_bh(&psock->ingress_lock);
+ sk_psock_put(sk, psock);
+ }
+ return inq;
+}
+
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
#define BPF_F_STRPARSER (1UL << 1)
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index c52b862dad45..fa4654ffb621 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -63,6 +63,11 @@ struct tnum tnum_union(struct tnum t1, struct tnum t2);
/* Return @a with all but the lowest @size bytes cleared */
struct tnum tnum_cast(struct tnum a, u8 size);
+/* Swap the bytes of a tnum */
+struct tnum tnum_bswap16(struct tnum a);
+struct tnum tnum_bswap32(struct tnum a);
+struct tnum tnum_bswap64(struct tnum a);
+
/* Returns true if @a is a known constant */
static inline bool tnum_is_const(struct tnum a)
{
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f8d8513eda27..c8d400b7680a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order {
BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */
BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */
BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */
+ /*
+ * Walks the immediate children of the specified parent
+ * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE,
+ * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP
+ * the iterator does not include the specified parent as one of the
+ * returned iterator elements.
+ */
+ BPF_CGROUP_ITER_CHILDREN,
};
union bpf_iter_link_info {
@@ -918,6 +926,16 @@ union bpf_iter_link_info {
* Number of bytes read from the stream on success, or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
+ * BPF_PROG_ASSOC_STRUCT_OPS
+ * Description
+ * Associate a BPF program with a struct_ops map. The struct_ops
+ * map is identified by *map_fd* and the BPF program is
+ * identified by *prog_fd*.
+ *
+ * Return
+ * 0 on success or -1 if an error occurred (in which case,
+ * *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -974,6 +992,7 @@ enum bpf_cmd {
BPF_PROG_BIND_MAP,
BPF_TOKEN_CREATE,
BPF_PROG_STREAM_READ_BY_FD,
+ BPF_PROG_ASSOC_STRUCT_OPS,
__MAX_BPF_CMD,
};
@@ -1134,6 +1153,7 @@ enum bpf_attach_type {
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
+ BPF_TRACE_FSESSION,
__MAX_BPF_ATTACH_TYPE
};
@@ -1373,6 +1393,8 @@ enum {
BPF_NOEXIST = 1, /* create new element if it didn't exist */
BPF_EXIST = 2, /* update existing element */
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
+ BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */
+ BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */
};
/* flags for BPF_MAP_CREATE command */
@@ -1894,6 +1916,12 @@ union bpf_attr {
__u32 prog_fd;
} prog_stream_read;
+ struct {
+ __u32 map_fd;
+ __u32 prog_fd;
+ __u32 flags;
+ } prog_assoc_struct_ops;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
diff --git a/init/Kconfig b/init/Kconfig
index 99941f34abc6..0b425841df46 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -178,7 +178,7 @@ config RUSTC_HAS_FILE_AS_C_STR
config PAHOLE_VERSION
int
- default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE))
+ default "$(PAHOLE_VERSION)"
config CONSTRUCTORS
bool
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 232cbc97434d..79cf22860a99 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -42,7 +42,17 @@ endif
ifeq ($(CONFIG_BPF_JIT),y)
obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o
obj-$(CONFIG_BPF_SYSCALL) += cpumask.o
-obj-${CONFIG_BPF_LSM} += bpf_lsm.o
+# bpf_lsm_proto.o must precede bpf_lsm.o. The current pahole logic
+# deduplicates function prototypes within
+# btf_encoder__add_saved_func() by keeping the first instance seen. We
+# need the function prototype(s) in bpf_lsm_proto.o to take precedence
+# over those within bpf_lsm.o. Having bpf_lsm_proto.o precede
+# bpf_lsm.o ensures its DWARF CU is processed early, forcing the
+# generated BTF to contain the overrides.
+#
+# Notably, this is a temporary workaround whilst the deduplication
+# semantics within pahole are revisited accordingly.
+obj-${CONFIG_BPF_LSM} += bpf_lsm_proto.o bpf_lsm.o
endif
ifneq ($(CONFIG_CRYPTO),)
obj-$(CONFIG_BPF_SYSCALL) += crypto.o
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 872dc0e41c65..42fae0a9f314 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -2,11 +2,15 @@
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
#include <linux/bpf.h>
#include <linux/btf.h>
+#include <linux/cacheflush.h>
#include <linux/err.h>
+#include <linux/irq_work.h>
#include "linux/filter.h"
+#include <linux/llist.h>
#include <linux/btf_ids.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
+#include <asm/tlbflush.h>
#include "range_tree.h"
/*
@@ -42,14 +46,31 @@
#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
+static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable);
+
struct bpf_arena {
struct bpf_map map;
u64 user_vm_start;
u64 user_vm_end;
struct vm_struct *kern_vm;
struct range_tree rt;
+ /* protects rt */
+ rqspinlock_t spinlock;
struct list_head vma_list;
+ /* protects vma_list */
struct mutex lock;
+ struct irq_work free_irq;
+ struct work_struct free_work;
+ struct llist_head free_spans;
+};
+
+static void arena_free_worker(struct work_struct *work);
+static void arena_free_irq(struct irq_work *iw);
+
+struct arena_free_span {
+ struct llist_node node;
+ unsigned long uaddr;
+ u32 page_cnt;
};
u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena)
@@ -92,6 +113,66 @@ static long compute_pgoff(struct bpf_arena *arena, long uaddr)
return (u32)(uaddr - (u32)arena->user_vm_start) >> PAGE_SHIFT;
}
+struct apply_range_data {
+ struct page **pages;
+ int i;
+};
+
+static int apply_range_set_cb(pte_t *pte, unsigned long addr, void *data)
+{
+ struct apply_range_data *d = data;
+ struct page *page;
+
+ if (!data)
+ return 0;
+ /* sanity check */
+ if (unlikely(!pte_none(ptep_get(pte))))
+ return -EBUSY;
+
+ page = d->pages[d->i];
+ /* paranoia, similar to vmap_pages_pte_range() */
+ if (WARN_ON_ONCE(!pfn_valid(page_to_pfn(page))))
+ return -EINVAL;
+
+ set_pte_at(&init_mm, addr, pte, mk_pte(page, PAGE_KERNEL));
+ d->i++;
+ return 0;
+}
+
+static void flush_vmap_cache(unsigned long start, unsigned long size)
+{
+ flush_cache_vmap(start, start + size);
+}
+
+static int apply_range_clear_cb(pte_t *pte, unsigned long addr, void *free_pages)
+{
+ pte_t old_pte;
+ struct page *page;
+
+ /* sanity check */
+ old_pte = ptep_get(pte);
+ if (pte_none(old_pte) || !pte_present(old_pte))
+ return 0; /* nothing to do */
+
+ page = pte_page(old_pte);
+ if (WARN_ON_ONCE(!page))
+ return -EINVAL;
+
+ pte_clear(&init_mm, addr, pte);
+
+ /* Add page to the list so it is freed later */
+ if (free_pages)
+ __llist_add(&page->pcp_llist, free_pages);
+
+ return 0;
+}
+
+static int populate_pgtable_except_pte(struct bpf_arena *arena)
+{
+ return apply_to_page_range(&init_mm, bpf_arena_get_kern_vm_start(arena),
+ KERN_VM_SZ - GUARD_SZ, apply_range_set_cb, NULL);
+}
+
static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
{
struct vm_struct *kern_vm;
@@ -136,6 +217,9 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
arena->user_vm_end = arena->user_vm_start + vm_range;
INIT_LIST_HEAD(&arena->vma_list);
+ init_llist_head(&arena->free_spans);
+ init_irq_work(&arena->free_irq, arena_free_irq);
+ INIT_WORK(&arena->free_work, arena_free_worker);
bpf_map_init_from_attr(&arena->map, attr);
range_tree_init(&arena->rt);
err = range_tree_set(&arena->rt, 0, attr->max_entries);
@@ -144,6 +228,13 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
goto err;
}
mutex_init(&arena->lock);
+ raw_res_spin_lock_init(&arena->spinlock);
+ err = populate_pgtable_except_pte(arena);
+ if (err) {
+ range_tree_destroy(&arena->rt);
+ bpf_map_area_free(arena);
+ goto err;
+ }
return &arena->map;
err:
@@ -184,6 +275,10 @@ static void arena_map_free(struct bpf_map *map)
if (WARN_ON_ONCE(!list_empty(&arena->vma_list)))
return;
+ /* Ensure no pending deferred frees */
+ irq_work_sync(&arena->free_irq);
+ flush_work(&arena->free_work);
+
/*
* free_vm_area() calls remove_vm_area() that calls free_unmap_vmap_area().
* It unmaps everything from vmalloc area and clears pgtables.
@@ -265,44 +360,59 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
{
struct bpf_map *map = vmf->vma->vm_file->private_data;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+ struct mem_cgroup *new_memcg, *old_memcg;
struct page *page;
long kbase, kaddr;
+ unsigned long flags;
int ret;
kbase = bpf_arena_get_kern_vm_start(arena);
kaddr = kbase + (u32)(vmf->address);
- guard(mutex)(&arena->lock);
+ if (raw_res_spin_lock_irqsave(&arena->spinlock, flags))
+ /* Make a reasonable effort to address impossible case */
+ return VM_FAULT_RETRY;
+
page = vmalloc_to_page((void *)kaddr);
if (page)
/* already have a page vmap-ed */
goto out;
+ bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
+
if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT)
/* User space requested to segfault when page is not allocated by bpf prog */
- return VM_FAULT_SIGSEGV;
+ goto out_unlock_sigsegv;
ret = range_tree_clear(&arena->rt, vmf->pgoff, 1);
if (ret)
- return VM_FAULT_SIGSEGV;
+ goto out_unlock_sigsegv;
+ struct apply_range_data data = { .pages = &page, .i = 0 };
/* Account into memcg of the process that created bpf_arena */
ret = bpf_map_alloc_pages(map, NUMA_NO_NODE, 1, &page);
if (ret) {
range_tree_set(&arena->rt, vmf->pgoff, 1);
- return VM_FAULT_SIGSEGV;
+ goto out_unlock_sigsegv;
}
- ret = vm_area_map_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE, &page);
+ ret = apply_to_page_range(&init_mm, kaddr, PAGE_SIZE, apply_range_set_cb, &data);
if (ret) {
range_tree_set(&arena->rt, vmf->pgoff, 1);
- __free_page(page);
- return VM_FAULT_SIGSEGV;
+ free_pages_nolock(page, 0);
+ goto out_unlock_sigsegv;
}
+ flush_vmap_cache(kaddr, PAGE_SIZE);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
out:
page_ref_add(page, 1);
+ raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
vmf->page = page;
return 0;
+out_unlock_sigsegv:
+ bpf_map_memcg_exit(old_memcg, new_memcg);
+ raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
+ return VM_FAULT_SIGSEGV;
}
static const struct vm_operations_struct arena_vm_ops = {
@@ -423,12 +533,18 @@ static u64 clear_lo32(u64 val)
* Allocate pages and vmap them into kernel vmalloc area.
* Later the pages will be mmaped into user space vma.
*/
-static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt, int node_id)
+static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt, int node_id,
+ bool sleepable)
{
/* user_vm_end/start are fixed before bpf prog runs */
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
- struct page **pages;
+ struct mem_cgroup *new_memcg, *old_memcg;
+ struct apply_range_data data;
+ struct page **pages = NULL;
+ long remaining, mapped = 0;
+ long alloc_pages;
+ unsigned long flags;
long pgoff = 0;
u32 uaddr32;
int ret, i;
@@ -445,17 +561,23 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
return 0;
}
- /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
- pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL);
- if (!pages)
+ bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
+ /* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */
+ alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
+ pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), __GFP_ACCOUNT, NUMA_NO_NODE);
+ if (!pages) {
+ bpf_map_memcg_exit(old_memcg, new_memcg);
return 0;
+ }
+ data.pages = pages;
- guard(mutex)(&arena->lock);
+ if (raw_res_spin_lock_irqsave(&arena->spinlock, flags))
+ goto out_free_pages;
if (uaddr) {
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
if (ret)
- goto out_free_pages;
+ goto out_unlock_free_pages;
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
} else {
ret = pgoff = range_tree_find(&arena->rt, page_cnt);
@@ -463,33 +585,62 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
}
if (ret)
- goto out_free_pages;
-
- ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
- if (ret)
- goto out;
+ goto out_unlock_free_pages;
+ remaining = page_cnt;
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
- /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
- * will not overflow 32-bit. Lower 32-bit need to represent
- * contiguous user address range.
- * Map these pages at kern_vm_start base.
- * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
- * lower 32-bit and it's ok.
- */
- ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32,
- kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages);
- if (ret) {
- for (i = 0; i < page_cnt; i++)
- __free_page(pages[i]);
- goto out;
+
+ while (remaining) {
+ long this_batch = min(remaining, alloc_pages);
+
+ /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
+ memset(pages, 0, this_batch * sizeof(struct page *));
+
+ ret = bpf_map_alloc_pages(&arena->map, node_id, this_batch, pages);
+ if (ret)
+ goto out;
+
+ /*
+ * Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
+ * will not overflow 32-bit. Lower 32-bit need to represent
+ * contiguous user address range.
+ * Map these pages at kern_vm_start base.
+ * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
+ * lower 32-bit and it's ok.
+ */
+ data.i = 0;
+ ret = apply_to_page_range(&init_mm,
+ kern_vm_start + uaddr32 + (mapped << PAGE_SHIFT),
+ this_batch << PAGE_SHIFT, apply_range_set_cb, &data);
+ if (ret) {
+ /* data.i pages were mapped, account them and free the remaining */
+ mapped += data.i;
+ for (i = data.i; i < this_batch; i++)
+ free_pages_nolock(pages[i], 0);
+ goto out;
+ }
+
+ mapped += this_batch;
+ remaining -= this_batch;
}
- kvfree(pages);
+ flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
+ raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
+ kfree_nolock(pages);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
return clear_lo32(arena->user_vm_start) + uaddr32;
out:
- range_tree_set(&arena->rt, pgoff, page_cnt);
+ range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped);
+ raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
+ if (mapped) {
+ flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
+ arena_free_pages(arena, uaddr32, mapped, sleepable);
+ }
+ goto out_free_pages;
+out_unlock_free_pages:
+ raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
out_free_pages:
- kvfree(pages);
+ kfree_nolock(pages);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
return 0;
}
@@ -502,42 +653,66 @@ static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
{
struct vma_list *vml;
+ guard(mutex)(&arena->lock);
+ /* iterate link list under lock */
list_for_each_entry(vml, &arena->vma_list, head)
zap_page_range_single(vml->vma, uaddr,
PAGE_SIZE * page_cnt, NULL);
}
-static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
+static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable)
{
+ struct mem_cgroup *new_memcg, *old_memcg;
u64 full_uaddr, uaddr_end;
- long kaddr, pgoff, i;
+ long kaddr, pgoff;
struct page *page;
+ struct llist_head free_pages;
+ struct llist_node *pos, *t;
+ struct arena_free_span *s;
+ unsigned long flags;
+ int ret = 0;
/* only aligned lower 32-bit are relevant */
uaddr = (u32)uaddr;
uaddr &= PAGE_MASK;
+ kaddr = bpf_arena_get_kern_vm_start(arena) + uaddr;
full_uaddr = clear_lo32(arena->user_vm_start) + uaddr;
uaddr_end = min(arena->user_vm_end, full_uaddr + (page_cnt << PAGE_SHIFT));
if (full_uaddr >= uaddr_end)
return;
page_cnt = (uaddr_end - full_uaddr) >> PAGE_SHIFT;
+ pgoff = compute_pgoff(arena, uaddr);
+ bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
- guard(mutex)(&arena->lock);
+ if (!sleepable)
+ goto defer;
+
+ ret = raw_res_spin_lock_irqsave(&arena->spinlock, flags);
+
+ /* Can't proceed without holding the spinlock so defer the free */
+ if (ret)
+ goto defer;
- pgoff = compute_pgoff(arena, uaddr);
- /* clear range */
range_tree_set(&arena->rt, pgoff, page_cnt);
+ init_llist_head(&free_pages);
+ /* clear ptes and collect struct pages */
+ apply_to_existing_page_range(&init_mm, kaddr, page_cnt << PAGE_SHIFT,
+ apply_range_clear_cb, &free_pages);
+
+ /* drop the lock to do the tlb flush and zap pages */
+ raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
+
+ /* ensure no stale TLB entries */
+ flush_tlb_kernel_range(kaddr, kaddr + (page_cnt * PAGE_SIZE));
+
if (page_cnt > 1)
/* bulk zap if multiple pages being freed */
zap_pages(arena, full_uaddr, page_cnt);
- kaddr = bpf_arena_get_kern_vm_start(arena) + uaddr;
- for (i = 0; i < page_cnt; i++, kaddr += PAGE_SIZE, full_uaddr += PAGE_SIZE) {
- page = vmalloc_to_page((void *)kaddr);
- if (!page)
- continue;
+ llist_for_each_safe(pos, t, __llist_del_all(&free_pages)) {
+ page = llist_entry(pos, struct page, pcp_llist);
if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */
/* Optimization for the common case of page_cnt==1:
* If page wasn't mapped into some user vma there
@@ -545,9 +720,27 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
* page_cnt is big it's faster to do the batched zap.
*/
zap_pages(arena, full_uaddr, 1);
- vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE);
__free_page(page);
}
+ bpf_map_memcg_exit(old_memcg, new_memcg);
+
+ return;
+
+defer:
+ s = kmalloc_nolock(sizeof(struct arena_free_span), __GFP_ACCOUNT, -1);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
+ if (!s)
+ /*
+ * If allocation fails in non-sleepable context, pages are intentionally left
+ * inaccessible (leaked) until the arena is destroyed. Cleanup or retries are not
+ * possible here, so we intentionally omit them for safety.
+ */
+ return;
+
+ s->page_cnt = page_cnt;
+ s->uaddr = uaddr;
+ llist_add(&s->node, &arena->free_spans);
+ irq_work_queue(&arena->free_irq);
}
/*
@@ -557,6 +750,8 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt)
{
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
+ struct mem_cgroup *new_memcg, *old_memcg;
+ unsigned long flags;
long pgoff;
int ret;
@@ -567,15 +762,94 @@ static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt
if (pgoff + page_cnt > page_cnt_max)
return -EINVAL;
- guard(mutex)(&arena->lock);
+ if (raw_res_spin_lock_irqsave(&arena->spinlock, flags))
+ return -EBUSY;
/* Cannot guard already allocated pages. */
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
- if (ret)
- return -EBUSY;
+ if (ret) {
+ ret = -EBUSY;
+ goto out;
+ }
/* "Allocate" the region to prevent it from being allocated. */
- return range_tree_clear(&arena->rt, pgoff, page_cnt);
+ bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
+ ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
+out:
+ raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
+ return ret;
+}
+
+static void arena_free_worker(struct work_struct *work)
+{
+ struct bpf_arena *arena = container_of(work, struct bpf_arena, free_work);
+ struct mem_cgroup *new_memcg, *old_memcg;
+ struct llist_node *list, *pos, *t;
+ struct arena_free_span *s;
+ u64 arena_vm_start, user_vm_start;
+ struct llist_head free_pages;
+ struct page *page;
+ unsigned long full_uaddr;
+ long kaddr, page_cnt, pgoff;
+ unsigned long flags;
+
+ if (raw_res_spin_lock_irqsave(&arena->spinlock, flags)) {
+ schedule_work(work);
+ return;
+ }
+
+ bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
+
+ init_llist_head(&free_pages);
+ arena_vm_start = bpf_arena_get_kern_vm_start(arena);
+ user_vm_start = bpf_arena_get_user_vm_start(arena);
+
+ list = llist_del_all(&arena->free_spans);
+ llist_for_each(pos, list) {
+ s = llist_entry(pos, struct arena_free_span, node);
+ page_cnt = s->page_cnt;
+ kaddr = arena_vm_start + s->uaddr;
+ pgoff = compute_pgoff(arena, s->uaddr);
+
+ /* clear ptes and collect pages in free_pages llist */
+ apply_to_existing_page_range(&init_mm, kaddr, page_cnt << PAGE_SHIFT,
+ apply_range_clear_cb, &free_pages);
+
+ range_tree_set(&arena->rt, pgoff, page_cnt);
+ }
+ raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
+
+ /* Iterate the list again without holding spinlock to do the tlb flush and zap_pages */
+ llist_for_each_safe(pos, t, list) {
+ s = llist_entry(pos, struct arena_free_span, node);
+ page_cnt = s->page_cnt;
+ full_uaddr = clear_lo32(user_vm_start) + s->uaddr;
+ kaddr = arena_vm_start + s->uaddr;
+
+ /* ensure no stale TLB entries */
+ flush_tlb_kernel_range(kaddr, kaddr + (page_cnt * PAGE_SIZE));
+
+ /* remove pages from user vmas */
+ zap_pages(arena, full_uaddr, page_cnt);
+
+ kfree_nolock(s);
+ }
+
+ /* free all pages collected by apply_to_existing_page_range() in the first loop */
+ llist_for_each_safe(pos, t, __llist_del_all(&free_pages)) {
+ page = llist_entry(pos, struct page, pcp_llist);
+ __free_page(page);
+ }
+
+ bpf_map_memcg_exit(old_memcg, new_memcg);
+}
+
+static void arena_free_irq(struct irq_work *iw)
+{
+ struct bpf_arena *arena = container_of(iw, struct bpf_arena, free_irq);
+
+ schedule_work(&arena->free_work);
}
__bpf_kfunc_start_defs();
@@ -589,9 +863,20 @@ __bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_
if (map->map_type != BPF_MAP_TYPE_ARENA || flags || !page_cnt)
return NULL;
- return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id);
+ return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, true);
}
+void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt,
+ int node_id, u64 flags)
+{
+ struct bpf_map *map = p__map;
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+
+ if (map->map_type != BPF_MAP_TYPE_ARENA || flags || !page_cnt)
+ return NULL;
+
+ return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, false);
+}
__bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt)
{
struct bpf_map *map = p__map;
@@ -599,7 +884,17 @@ __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt
if (map->map_type != BPF_MAP_TYPE_ARENA || !page_cnt || !ptr__ign)
return;
- arena_free_pages(arena, (long)ptr__ign, page_cnt);
+ arena_free_pages(arena, (long)ptr__ign, page_cnt, true);
+}
+
+void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt)
+{
+ struct bpf_map *map = p__map;
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+
+ if (map->map_type != BPF_MAP_TYPE_ARENA || !page_cnt || !ptr__ign)
+ return;
+ arena_free_pages(arena, (long)ptr__ign, page_cnt, false);
}
__bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_cnt)
@@ -618,9 +913,9 @@ __bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_c
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(arena_kfuncs)
-BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2)
-BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
-BTF_ID_FLAGS(func, bpf_arena_reserve_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
+BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_ARENA_RET | KF_ARENA_ARG2)
+BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_ARENA_ARG2)
+BTF_ID_FLAGS(func, bpf_arena_reserve_pages, KF_ARENA_ARG2)
BTF_KFUNCS_END(arena_kfuncs)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1eeb31c5b317..67e9e811de3a 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -307,7 +307,7 @@ static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key,
return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
}
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
@@ -325,11 +325,18 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
size = array->elem_size;
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
+ if (map_flags & BPF_F_CPU) {
+ cpu = map_flags >> 32;
+ copy_map_value(map, value, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, value);
+ goto unlock;
+ }
for_each_possible_cpu(cpu) {
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
check_and_init_map_value(map, value + off);
off += size;
}
+unlock:
rcu_read_unlock();
return 0;
}
@@ -398,10 +405,11 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
void __percpu *pptr;
- int cpu, off = 0;
+ void *ptr, *val;
u32 size;
+ int cpu;
- if (unlikely(map_flags > BPF_EXIST))
+ if (unlikely((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS))
/* unknown flags */
return -EINVAL;
@@ -422,11 +430,20 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
size = array->elem_size;
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
+ if (map_flags & BPF_F_CPU) {
+ cpu = map_flags >> 32;
+ ptr = per_cpu_ptr(pptr, cpu);
+ copy_map_value(map, ptr, value);
+ bpf_obj_free_fields(array->map.record, ptr);
+ goto unlock;
+ }
for_each_possible_cpu(cpu) {
- copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
- bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
- off += size;
+ ptr = per_cpu_ptr(pptr, cpu);
+ val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
+ copy_map_value(map, ptr, val);
+ bpf_obj_free_fields(array->map.record, ptr);
}
+unlock:
rcu_read_unlock();
return 0;
}
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index 0687a760974a..c2a2ead1f466 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -11,29 +11,6 @@
DEFINE_BPF_STORAGE_CACHE(cgroup_cache);
-static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy);
-
-static void bpf_cgrp_storage_lock(void)
-{
- cant_migrate();
- this_cpu_inc(bpf_cgrp_storage_busy);
-}
-
-static void bpf_cgrp_storage_unlock(void)
-{
- this_cpu_dec(bpf_cgrp_storage_busy);
-}
-
-static bool bpf_cgrp_storage_trylock(void)
-{
- cant_migrate();
- if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) {
- this_cpu_dec(bpf_cgrp_storage_busy);
- return false;
- }
- return true;
-}
-
static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner)
{
struct cgroup *cg = owner;
@@ -45,16 +22,14 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
struct bpf_local_storage *local_storage;
- rcu_read_lock_dont_migrate();
+ rcu_read_lock();
local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
if (!local_storage)
goto out;
- bpf_cgrp_storage_lock();
bpf_local_storage_destroy(local_storage);
- bpf_cgrp_storage_unlock();
out:
- rcu_read_unlock_migrate();
+ rcu_read_unlock();
}
static struct bpf_local_storage_data *
@@ -83,9 +58,7 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key)
if (IS_ERR(cgroup))
return ERR_CAST(cgroup);
- bpf_cgrp_storage_lock();
sdata = cgroup_storage_lookup(cgroup, map, true);
- bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return sdata ? sdata->data : NULL;
}
@@ -102,10 +75,8 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);
- bpf_cgrp_storage_lock();
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
value, map_flags, false, GFP_ATOMIC);
- bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -118,8 +89,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), false);
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
@@ -132,9 +102,7 @@ static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);
- bpf_cgrp_storage_lock();
err = cgroup_storage_delete(cgroup, map);
- bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return err;
}
@@ -151,7 +119,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
static void cgroup_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy);
+ bpf_local_storage_map_free(map, &cgroup_cache);
}
/* *gfp_flags* is a hidden argument provided by the verifier */
@@ -159,7 +127,6 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
- bool nobusy;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
@@ -168,38 +135,27 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
if (!cgroup)
return (unsigned long)NULL;
- nobusy = bpf_cgrp_storage_trylock();
-
- sdata = cgroup_storage_lookup(cgroup, map, nobusy);
+ sdata = cgroup_storage_lookup(cgroup, map, true);
if (sdata)
- goto unlock;
+ goto out;
/* only allocate new storage, when the cgroup is refcounted */
if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
- (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy)
+ (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
value, BPF_NOEXIST, false, gfp_flags);
-unlock:
- if (nobusy)
- bpf_cgrp_storage_unlock();
+out:
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
}
BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgroup)
{
- int ret;
-
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!cgroup)
return -EINVAL;
- if (!bpf_cgrp_storage_trylock())
- return -EBUSY;
-
- ret = cgroup_storage_delete(cgroup, map);
- bpf_cgrp_storage_unlock();
- return ret;
+ return cgroup_storage_delete(cgroup, map);
}
const struct bpf_map_ops cgrp_storage_map_ops = {
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index e54cce2b9175..e86734609f3d 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -110,9 +110,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), false);
-
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
@@ -186,7 +184,7 @@ static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
static void inode_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &inode_cache, NULL);
+ bpf_local_storage_map_free(map, &inode_cache);
}
const struct bpf_map_ops inode_storage_map_ops = {
diff --git a/kernel/bpf/bpf_insn_array.c b/kernel/bpf/bpf_insn_array.c
index c96630cb75bf..c0286f25ca3c 100644
--- a/kernel/bpf/bpf_insn_array.c
+++ b/kernel/bpf/bpf_insn_array.c
@@ -123,10 +123,10 @@ static int insn_array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
if ((off % sizeof(long)) != 0 ||
(off / sizeof(long)) >= map->max_entries)
- return -EINVAL;
+ return -EACCES;
/* from BPF's point of view, this map is a jump table */
- *imm = (unsigned long)insn_array->ips + off;
+ *imm = (unsigned long)insn_array->ips;
return 0;
}
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index eec60b57bd3d..4b58d56ecab1 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -86,7 +86,7 @@ static bool bpf_iter_support_resched(struct seq_file *seq)
/* bpf_seq_read, a customized and simpler version for bpf iterator.
* The following are differences from seq_read():
- * . fixed buffer size (PAGE_SIZE)
+ * . fixed buffer size (PAGE_SIZE << 3)
* . assuming NULL ->llseek()
* . stop() may call bpf program, handling potential overflow there
*/
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index e2fe6c32822b..b28f07d3a0db 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -19,9 +19,9 @@
static struct bpf_local_storage_map_bucket *
select_bucket(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem)
+ struct bpf_local_storage *local_storage)
{
- return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
+ return &smap->buckets[hash_ptr(local_storage, smap->bucket_log)];
}
static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
@@ -61,11 +61,6 @@ static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
return !hlist_unhashed(&selem->snode);
}
-static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem)
-{
- return !hlist_unhashed_lockless(&selem->map_node);
-}
-
static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->map_node);
@@ -90,6 +85,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (selem) {
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
+ atomic_set(&selem->state, 0);
+ selem->use_kmalloc_nolock = smap->use_kmalloc_nolock;
if (value) {
/* No need to call check_and_init_map_value as memory is zero init */
@@ -198,9 +195,11 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
/* The bpf_local_storage_map_free will wait for rcu_barrier */
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
- migrate_disable();
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
- migrate_enable();
+ if (smap) {
+ migrate_disable();
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ migrate_enable();
+ }
kfree_nolock(selem);
}
@@ -219,13 +218,14 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- if (!smap->use_kmalloc_nolock) {
+ if (!selem->use_kmalloc_nolock) {
/*
* No uptr will be unpin even when reuse_now == false since uptr
* is only supported in task local storage, where
* smap->use_kmalloc_nolock == true.
*/
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ if (smap)
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
__bpf_selem_free(selem, reuse_now);
return;
}
@@ -256,6 +256,36 @@ static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now)
bpf_selem_free(selem, reuse_now);
}
+static void bpf_selem_unlink_storage_nolock_misc(struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage *local_storage,
+ bool free_local_storage, bool pin_owner)
+{
+ void *owner = local_storage->owner;
+ u32 uncharge = smap->elem_size;
+
+ if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
+ SDATA(selem))
+ RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
+
+ if (pin_owner && !refcount_inc_not_zero(&local_storage->owner_refcnt))
+ return;
+
+ uncharge += free_local_storage ? sizeof(*local_storage) : 0;
+ mem_uncharge(smap, local_storage->owner, uncharge);
+ local_storage->mem_charge -= uncharge;
+
+ if (free_local_storage) {
+ local_storage->owner = NULL;
+
+ /* After this RCU_INIT, owner may be freed and cannot be used */
+ RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
+ }
+
+ if (pin_owner)
+ refcount_dec(&local_storage->owner_refcnt);
+}
+
/* local_storage->lock must be held and selem->local_storage == local_storage.
* The caller must ensure selem->smap is still valid to be
* dereferenced for its smap->elem_size and smap->cache_idx.
@@ -266,124 +296,219 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
- void *owner;
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- owner = local_storage->owner;
-
- /* All uncharging on the owner must be done first.
- * The owner may be freed once the last selem is unlinked
- * from local_storage.
- */
- mem_uncharge(smap, owner, smap->elem_size);
free_local_storage = hlist_is_singular_node(&selem->snode,
&local_storage->list);
- if (free_local_storage) {
- mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
- local_storage->owner = NULL;
- /* After this RCU_INIT, owner may be freed and cannot be used */
- RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
+ bpf_selem_unlink_storage_nolock_misc(selem, smap, local_storage,
+ free_local_storage, false);
- /* local_storage is not freed now. local_storage->lock is
- * still held and raw_spin_unlock_bh(&local_storage->lock)
- * will be done by the caller.
- *
- * Although the unlock will be done under
- * rcu_read_lock(), it is more intuitive to
- * read if the freeing of the storage is done
- * after the raw_spin_unlock_bh(&local_storage->lock).
- *
- * Hence, a "bool free_local_storage" is returned
- * to the caller which then calls then frees the storage after
- * all the RCU grace periods have expired.
- */
- }
hlist_del_init_rcu(&selem->snode);
- if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
- SDATA(selem))
- RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
hlist_add_head(&selem->free_node, free_selem_list);
- if (rcu_access_pointer(local_storage->smap) == smap)
- RCU_INIT_POINTER(local_storage->smap, NULL);
-
return free_local_storage;
}
-static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
- bool reuse_now)
+void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
+ local_storage->mem_charge += smap->elem_size;
+
+ RCU_INIT_POINTER(selem->local_storage, local_storage);
+ hlist_add_head_rcu(&selem->snode, &local_storage->list);
+}
+
+static int bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage *local_storage;
+ struct bpf_local_storage_map *smap;
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
+ int err;
+
+ local_storage = rcu_dereference_check(selem->local_storage,
+ bpf_rcu_lock_held());
+ smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
+ b = select_bucket(smap, local_storage);
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (err)
+ return err;
+
+ hlist_del_init_rcu(&selem->map_node);
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
+
+ return 0;
+}
+
+static void bpf_selem_unlink_map_nolock(struct bpf_local_storage_elem *selem)
+{
+ hlist_del_init_rcu(&selem->map_node);
+}
+
+int bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
+ int err;
+
+ b = select_bucket(smap, local_storage);
+
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (err)
+ return err;
+
+ hlist_add_head_rcu(&selem->map_node, &b->list);
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
+
+ return 0;
+}
+
+static void bpf_selem_link_map_nolock(struct bpf_local_storage_map_bucket *b,
+ struct bpf_local_storage_elem *selem)
+{
+ hlist_add_head_rcu(&selem->map_node, &b->list);
+}
+
+/*
+ * Unlink an selem from map and local storage with lock held.
+ * This is the common path used by local storages to delete an selem.
+ */
+int bpf_selem_unlink(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage *local_storage;
bool free_local_storage = false;
HLIST_HEAD(selem_free_list);
unsigned long flags;
+ int err;
if (unlikely(!selem_linked_to_storage_lockless(selem)))
/* selem has already been unlinked from sk */
- return;
+ return 0;
local_storage = rcu_dereference_check(selem->local_storage,
bpf_rcu_lock_held());
- raw_spin_lock_irqsave(&local_storage->lock, flags);
- if (likely(selem_linked_to_storage(selem)))
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (err)
+ return err;
+
+ if (likely(selem_linked_to_storage(selem))) {
+ /* Always unlink from map before unlinking from local_storage
+ * because selem will be freed after successfully unlinked from
+ * the local_storage.
+ */
+ err = bpf_selem_unlink_map(selem);
+ if (err)
+ goto out;
+
free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, &selem_free_list);
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ }
+out:
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
- bpf_selem_free_list(&selem_free_list, reuse_now);
+ bpf_selem_free_list(&selem_free_list, false);
if (free_local_storage)
- bpf_local_storage_free(local_storage, reuse_now);
-}
+ bpf_local_storage_free(local_storage, false);
-void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
- struct bpf_local_storage_elem *selem)
-{
- RCU_INIT_POINTER(selem->local_storage, local_storage);
- hlist_add_head_rcu(&selem->snode, &local_storage->list);
+ return err;
}
-static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+/*
+ * Unlink an selem from map and local storage with lockless fallback if callers
+ * are racing or rqspinlock returns error. It should only be called by
+ * bpf_local_storage_destroy() or bpf_local_storage_map_free().
+ */
+static void bpf_selem_unlink_nofail(struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map_bucket *b)
{
+ bool in_map_free = !!b, free_storage = false;
+ struct bpf_local_storage *local_storage;
struct bpf_local_storage_map *smap;
- struct bpf_local_storage_map_bucket *b;
unsigned long flags;
+ int err, unlink = 0;
- if (unlikely(!selem_linked_to_map_lockless(selem)))
- /* selem has already be unlinked from smap */
- return;
-
+ local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held());
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- b = select_bucket(smap, selem);
- raw_spin_lock_irqsave(&b->lock, flags);
- if (likely(selem_linked_to_map(selem)))
- hlist_del_init_rcu(&selem->map_node);
- raw_spin_unlock_irqrestore(&b->lock, flags);
-}
-void bpf_selem_link_map(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem)
-{
- struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
- unsigned long flags;
+ if (smap) {
+ b = b ? : select_bucket(smap, local_storage);
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (!err) {
+ /*
+ * Call bpf_obj_free_fields() under b->lock to make sure it is done
+ * exactly once for an selem. Safe to free special fields immediately
+ * as no BPF program should be referencing the selem.
+ */
+ if (likely(selem_linked_to_map(selem))) {
+ hlist_del_init_rcu(&selem->map_node);
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ unlink++;
+ }
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
+ }
+ /*
+ * Highly unlikely scenario: resource leak
+ *
+ * When map_free(selem1), destroy(selem1) and destroy(selem2) are racing
+ * and both selem belong to the same bucket, if destroy(selem2) acquired
+ * b->lock and block for too long, neither map_free(selem1) and
+ * destroy(selem1) will be able to free the special field associated
+ * with selem1 as raw_res_spin_lock_irqsave() returns -ETIMEDOUT.
+ */
+ WARN_ON_ONCE(err && in_map_free);
+ if (!err || in_map_free)
+ RCU_INIT_POINTER(SDATA(selem)->smap, NULL);
+ }
- raw_spin_lock_irqsave(&b->lock, flags);
- hlist_add_head_rcu(&selem->map_node, &b->list);
- raw_spin_unlock_irqrestore(&b->lock, flags);
-}
+ if (local_storage) {
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (!err) {
+ if (likely(selem_linked_to_storage(selem))) {
+ free_storage = hlist_is_singular_node(&selem->snode,
+ &local_storage->list);
+ /*
+ * Okay to skip clearing owner_storage and storage->owner in
+ * destroy() since the owner is going away. No user or bpf
+ * programs should be able to reference it.
+ */
+ if (smap && in_map_free)
+ bpf_selem_unlink_storage_nolock_misc(
+ selem, smap, local_storage,
+ free_storage, true);
+ hlist_del_init_rcu(&selem->snode);
+ unlink++;
+ }
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
+ }
+ if (!err || !in_map_free)
+ RCU_INIT_POINTER(selem->local_storage, NULL);
+ }
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
-{
- /* Always unlink from map before unlinking from local_storage
- * because selem will be freed after successfully unlinked from
- * the local_storage.
+ if (unlink != 2)
+ atomic_or(in_map_free ? SELEM_MAP_UNLINKED : SELEM_STORAGE_UNLINKED, &selem->state);
+
+ /*
+ * Normally, an selem can be unlinked under local_storage->lock and b->lock, and
+ * then freed after an RCU grace period. However, if destroy() and map_free() are
+ * racing or rqspinlock returns errors in unlikely situations (unlink != 2), free
+ * the selem only after both map_free() and destroy() see the selem.
*/
- bpf_selem_unlink_map(selem);
- bpf_selem_unlink_storage(selem, reuse_now);
+ if (unlink == 2 ||
+ atomic_cmpxchg(&selem->state, SELEM_UNLINKED, SELEM_TOFREE) == SELEM_UNLINKED)
+ bpf_selem_free(selem, true);
+
+ if (free_storage)
+ bpf_local_storage_free(local_storage, true);
}
void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
@@ -391,16 +516,20 @@ void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem)
{
unsigned long flags;
+ int err;
/* spinlock is needed to avoid racing with the
* parallel delete. Otherwise, publishing an already
* deleted sdata to the cache will become a use-after-free
* problem in the next bpf_local_storage_lookup().
*/
- raw_spin_lock_irqsave(&local_storage->lock, flags);
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (err)
+ return;
+
if (selem_linked_to_storage(selem))
rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem));
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
}
static int check_flags(const struct bpf_local_storage_data *old_sdata,
@@ -424,6 +553,8 @@ int bpf_local_storage_alloc(void *owner,
{
struct bpf_local_storage *prev_storage, *storage;
struct bpf_local_storage **owner_storage_ptr;
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
int err;
err = mem_charge(smap, owner, sizeof(*storage));
@@ -441,14 +572,21 @@ int bpf_local_storage_alloc(void *owner,
goto uncharge;
}
- RCU_INIT_POINTER(storage->smap, smap);
INIT_HLIST_HEAD(&storage->list);
- raw_spin_lock_init(&storage->lock);
+ raw_res_spin_lock_init(&storage->lock);
storage->owner = owner;
+ storage->mem_charge = sizeof(*storage);
storage->use_kmalloc_nolock = smap->use_kmalloc_nolock;
+ refcount_set(&storage->owner_refcnt, 1);
bpf_selem_link_storage_nolock(storage, first_selem);
- bpf_selem_link_map(smap, first_selem);
+
+ b = select_bucket(smap, storage);
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (err)
+ goto uncharge;
+
+ bpf_selem_link_map_nolock(b, first_selem);
owner_storage_ptr =
(struct bpf_local_storage **)owner_storage(smap, owner);
@@ -464,10 +602,12 @@ int bpf_local_storage_alloc(void *owner,
*/
prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
if (unlikely(prev_storage)) {
- bpf_selem_unlink_map(first_selem);
+ bpf_selem_unlink_map_nolock(first_selem);
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
err = -EAGAIN;
goto uncharge;
}
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
return 0;
@@ -489,8 +629,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
struct bpf_local_storage *local_storage;
+ struct bpf_local_storage_map_bucket *b;
HLIST_HEAD(old_selem_free_list);
- unsigned long flags;
+ unsigned long flags, b_flags;
int err;
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
@@ -549,7 +690,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (!alloc_selem)
return ERR_PTR(-ENOMEM);
- raw_spin_lock_irqsave(&local_storage->lock, flags);
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (err)
+ goto free_selem;
/* Recheck local_storage->list under local_storage->lock */
if (unlikely(hlist_empty(&local_storage->list))) {
@@ -574,22 +717,30 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
goto unlock;
}
+ b = select_bucket(smap, local_storage);
+
+ err = raw_res_spin_lock_irqsave(&b->lock, b_flags);
+ if (err)
+ goto unlock;
+
alloc_selem = NULL;
/* First, link the new selem to the map */
- bpf_selem_link_map(smap, selem);
+ bpf_selem_link_map_nolock(b, selem);
/* Second, link (and publish) the new selem to local_storage */
bpf_selem_link_storage_nolock(local_storage, selem);
/* Third, remove old selem, SELEM(old_sdata) */
if (old_sdata) {
- bpf_selem_unlink_map(SELEM(old_sdata));
+ bpf_selem_unlink_map_nolock(SELEM(old_sdata));
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
&old_selem_free_list);
}
+ raw_res_spin_unlock_irqrestore(&b->lock, b_flags);
unlock:
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
+free_selem:
bpf_selem_free_list(&old_selem_free_list, false);
if (alloc_selem) {
mem_uncharge(smap, owner, smap->elem_size);
@@ -657,13 +808,13 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
return 0;
}
-void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
+/*
+ * Destroy local storage when the owner is going away. Caller must uncharge memory
+ * if memory charging is used.
+ */
+u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
{
struct bpf_local_storage_elem *selem;
- bool free_storage = false;
- HLIST_HEAD(free_selem_list);
- struct hlist_node *n;
- unsigned long flags;
/* Neither the bpf_prog nor the bpf_map's syscall
* could be modifying the local_storage->list now.
@@ -674,27 +825,20 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
* when unlinking elem from the local_storage->list and
* the map's bucket->list.
*/
- raw_spin_lock_irqsave(&local_storage->lock, flags);
- hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
- /* Always unlink from map before unlinking from
- * local_storage.
- */
- bpf_selem_unlink_map(selem);
- /* If local_storage list has only one element, the
- * bpf_selem_unlink_storage_nolock() will return true.
- * Otherwise, it will return false. The current loop iteration
- * intends to remove all local storage. So the last iteration
- * of the loop will set the free_cgroup_storage to true.
+ hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
+ bpf_selem_unlink_nofail(selem, NULL);
+
+ if (!refcount_dec_and_test(&local_storage->owner_refcnt)) {
+ while (refcount_read(&local_storage->owner_refcnt))
+ cpu_relax();
+ /*
+ * Paired with refcount_dec() in bpf_selem_unlink_nofail()
+ * to make sure destroy() sees the correct local_storage->mem_charge.
*/
- free_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, &free_selem_list);
+ smp_mb();
}
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
-
- bpf_selem_free_list(&free_selem_list, true);
- if (free_storage)
- bpf_local_storage_free(local_storage, true);
+ return local_storage->mem_charge;
}
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
@@ -736,7 +880,7 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
for (i = 0; i < nbuckets; i++) {
INIT_HLIST_HEAD(&smap->buckets[i].list);
- raw_spin_lock_init(&smap->buckets[i].lock);
+ raw_res_spin_lock_init(&smap->buckets[i].lock);
}
smap->elem_size = offsetof(struct bpf_local_storage_elem,
@@ -758,8 +902,7 @@ free_smap:
}
void bpf_local_storage_map_free(struct bpf_map *map,
- struct bpf_local_storage_cache *cache,
- int __percpu *busy_counter)
+ struct bpf_local_storage_cache *cache)
{
struct bpf_local_storage_map_bucket *b;
struct bpf_local_storage_elem *selem;
@@ -789,15 +932,14 @@ void bpf_local_storage_map_free(struct bpf_map *map,
rcu_read_lock();
/* No one is adding to b->list now */
- while ((selem = hlist_entry_safe(
- rcu_dereference_raw(hlist_first_rcu(&b->list)),
- struct bpf_local_storage_elem, map_node))) {
- if (busy_counter)
- this_cpu_inc(*busy_counter);
- bpf_selem_unlink(selem, true);
- if (busy_counter)
- this_cpu_dec(*busy_counter);
- cond_resched_rcu();
+restart:
+ hlist_for_each_entry_rcu(selem, &b->list, map_node) {
+ bpf_selem_unlink_nofail(selem, b);
+
+ if (need_resched()) {
+ cond_resched_rcu();
+ goto restart;
+ }
}
rcu_read_unlock();
}
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 7cb6e8d4282c..0c4a0c8e6f70 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -18,10 +18,11 @@
#include <linux/bpf-cgroup.h>
/* For every LSM hook that allows attachment of BPF programs, declare a nop
- * function where a BPF program can be attached.
+ * function where a BPF program can be attached. Notably, we qualify each with
+ * weak linkage such that strong overrides can be implemented if need be.
*/
#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
-noinline RET bpf_lsm_##NAME(__VA_ARGS__) \
+__weak noinline RET bpf_lsm_##NAME(__VA_ARGS__) \
{ \
return DEFAULT; \
}
diff --git a/kernel/bpf/bpf_lsm_proto.c b/kernel/bpf/bpf_lsm_proto.c
new file mode 100644
index 000000000000..44a54fd8045e
--- /dev/null
+++ b/kernel/bpf/bpf_lsm_proto.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2025 Google LLC.
+ */
+
+#include <linux/fs.h>
+#include <linux/bpf_lsm.h>
+
+/*
+ * Strong definition of the mmap_file() BPF LSM hook. The __nullable suffix on
+ * the struct file pointer parameter name marks it as PTR_MAYBE_NULL. This
+ * explicitly enforces that BPF LSM programs check for NULL before attempting to
+ * dereference it.
+ */
+int bpf_lsm_mmap_file(struct file *file__nullable, unsigned long reqprot,
+ unsigned long prot, unsigned long flags)
+{
+ return 0;
+}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 278490683d28..c43346cb3d76 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -533,6 +533,17 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
}
}
+static void bpf_struct_ops_map_dissoc_progs(struct bpf_struct_ops_map *st_map)
+{
+ u32 i;
+
+ for (i = 0; i < st_map->funcs_cnt; i++) {
+ if (!st_map->links[i])
+ break;
+ bpf_prog_disassoc_struct_ops(st_map->links[i]->prog);
+ }
+}
+
static void bpf_struct_ops_map_free_image(struct bpf_struct_ops_map *st_map)
{
int i;
@@ -801,6 +812,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto reset_unlock;
}
+ /* Poison pointer on error instead of return for backward compatibility */
+ bpf_prog_assoc_struct_ops(prog, &st_map->map);
+
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
bpf_prog_put(prog);
@@ -980,6 +994,8 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
if (btf_is_module(st_map->btf))
module_put(st_map->st_ops_desc->st_ops->owner);
+ bpf_struct_ops_map_dissoc_progs(st_map);
+
bpf_struct_ops_map_del_ksyms(st_map);
/* The struct_ops's function may switch to another struct_ops.
@@ -1396,6 +1412,78 @@ err_out:
return err;
}
+int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map)
+{
+ struct bpf_map *st_ops_assoc;
+
+ guard(mutex)(&prog->aux->st_ops_assoc_mutex);
+
+ st_ops_assoc = rcu_dereference_protected(prog->aux->st_ops_assoc,
+ lockdep_is_held(&prog->aux->st_ops_assoc_mutex));
+ if (st_ops_assoc && st_ops_assoc == map)
+ return 0;
+
+ if (st_ops_assoc) {
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
+ return -EBUSY;
+
+ rcu_assign_pointer(prog->aux->st_ops_assoc, BPF_PTR_POISON);
+ } else {
+ /*
+ * struct_ops map does not track associated non-struct_ops programs.
+ * Bump the refcount to make sure st_ops_assoc is always valid.
+ */
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
+ bpf_map_inc(map);
+
+ rcu_assign_pointer(prog->aux->st_ops_assoc, map);
+ }
+
+ return 0;
+}
+
+void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog)
+{
+ struct bpf_map *st_ops_assoc;
+
+ guard(mutex)(&prog->aux->st_ops_assoc_mutex);
+
+ st_ops_assoc = rcu_dereference_protected(prog->aux->st_ops_assoc,
+ lockdep_is_held(&prog->aux->st_ops_assoc_mutex));
+ if (!st_ops_assoc || st_ops_assoc == BPF_PTR_POISON)
+ return;
+
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
+ bpf_map_put(st_ops_assoc);
+
+ RCU_INIT_POINTER(prog->aux->st_ops_assoc, NULL);
+}
+
+/*
+ * Get a reference to the struct_ops struct (i.e., kdata) associated with a
+ * program. Should only be called in BPF program context (e.g., in a kfunc).
+ *
+ * If the returned pointer is not NULL, it must points to a valid struct_ops.
+ * The struct_ops map is not guaranteed to be initialized nor attached.
+ * Kernel struct_ops implementers are responsible for tracking and checking
+ * the state of the struct_ops if the use case requires an initialized or
+ * attached struct_ops.
+ */
+void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux)
+{
+ struct bpf_struct_ops_map *st_map;
+ struct bpf_map *st_ops_assoc;
+
+ st_ops_assoc = rcu_dereference_check(aux->st_ops_assoc, bpf_rcu_lock_held());
+ if (!st_ops_assoc || st_ops_assoc == BPF_PTR_POISON)
+ return NULL;
+
+ st_map = (struct bpf_struct_ops_map *)st_ops_assoc;
+
+ return &st_map->kvalue.data;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_get_assoc_struct_ops);
+
void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index a1dc1bf0848a..605506792b5b 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -20,29 +20,6 @@
DEFINE_BPF_STORAGE_CACHE(task_cache);
-static DEFINE_PER_CPU(int, bpf_task_storage_busy);
-
-static void bpf_task_storage_lock(void)
-{
- cant_migrate();
- this_cpu_inc(bpf_task_storage_busy);
-}
-
-static void bpf_task_storage_unlock(void)
-{
- this_cpu_dec(bpf_task_storage_busy);
-}
-
-static bool bpf_task_storage_trylock(void)
-{
- cant_migrate();
- if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
- this_cpu_dec(bpf_task_storage_busy);
- return false;
- }
- return true;
-}
-
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
{
struct task_struct *task = owner;
@@ -70,17 +47,15 @@ void bpf_task_storage_free(struct task_struct *task)
{
struct bpf_local_storage *local_storage;
- rcu_read_lock_dont_migrate();
+ rcu_read_lock();
local_storage = rcu_dereference(task->bpf_storage);
if (!local_storage)
goto out;
- bpf_task_storage_lock();
bpf_local_storage_destroy(local_storage);
- bpf_task_storage_unlock();
out:
- rcu_read_unlock_migrate();
+ rcu_read_unlock();
}
static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
@@ -106,9 +81,7 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
goto out;
}
- bpf_task_storage_lock();
sdata = task_storage_lookup(task, map, true);
- bpf_task_storage_unlock();
put_pid(pid);
return sdata ? sdata->data : NULL;
out:
@@ -143,11 +116,9 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
goto out;
}
- bpf_task_storage_lock();
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags,
true, GFP_ATOMIC);
- bpf_task_storage_unlock();
err = PTR_ERR_OR_ZERO(sdata);
out:
@@ -155,8 +126,7 @@ out:
return err;
}
-static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
- bool nobusy)
+static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
{
struct bpf_local_storage_data *sdata;
@@ -164,12 +134,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
if (!sdata)
return -ENOENT;
- if (!nobusy)
- return -EBUSY;
-
- bpf_selem_unlink(SELEM(sdata), false);
-
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
@@ -194,111 +159,50 @@ static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
goto out;
}
- bpf_task_storage_lock();
- err = task_storage_delete(task, map, true);
- bpf_task_storage_unlock();
+ err = task_storage_delete(task, map);
out:
put_pid(pid);
return err;
}
-/* Called by bpf_task_storage_get*() helpers */
-static void *__bpf_task_storage_get(struct bpf_map *map,
- struct task_struct *task, void *value,
- u64 flags, gfp_t gfp_flags, bool nobusy)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+ task, void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
- sdata = task_storage_lookup(task, map, nobusy);
+ WARN_ON_ONCE(!bpf_rcu_lock_held());
+ if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
+ return (unsigned long)NULL;
+
+ sdata = task_storage_lookup(task, map, true);
if (sdata)
- return sdata->data;
+ return (unsigned long)sdata->data;
/* only allocate new storage, when the task is refcounted */
if (refcount_read(&task->usage) &&
- (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
+ (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST, false, gfp_flags);
- return IS_ERR(sdata) ? NULL : sdata->data;
+ return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
}
- return NULL;
-}
-
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *,
- task, void *, value, u64, flags, gfp_t, gfp_flags)
-{
- bool nobusy;
- void *data;
-
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
- return (unsigned long)NULL;
-
- nobusy = bpf_task_storage_trylock();
- data = __bpf_task_storage_get(map, task, value, flags,
- gfp_flags, nobusy);
- if (nobusy)
- bpf_task_storage_unlock();
- return (unsigned long)data;
-}
-
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
- task, void *, value, u64, flags, gfp_t, gfp_flags)
-{
- void *data;
-
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
- return (unsigned long)NULL;
-
- bpf_task_storage_lock();
- data = __bpf_task_storage_get(map, task, value, flags,
- gfp_flags, true);
- bpf_task_storage_unlock();
- return (unsigned long)data;
-}
-
-BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *,
- task)
-{
- bool nobusy;
- int ret;
-
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (!task)
- return -EINVAL;
-
- nobusy = bpf_task_storage_trylock();
- /* This helper must only be called from places where the lifetime of the task
- * is guaranteed. Either by being refcounted or by being protected
- * by an RCU read-side critical section.
- */
- ret = task_storage_delete(task, map, nobusy);
- if (nobusy)
- bpf_task_storage_unlock();
- return ret;
+ return (unsigned long)NULL;
}
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
task)
{
- int ret;
-
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!task)
return -EINVAL;
- bpf_task_storage_lock();
/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
- ret = task_storage_delete(task, map, true);
- bpf_task_storage_unlock();
- return ret;
+ return task_storage_delete(task, map);
}
static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -313,7 +217,7 @@ static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
static void task_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy);
+ bpf_local_storage_map_free(map, &task_cache);
}
BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map)
@@ -332,17 +236,6 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_owner_storage_ptr = task_storage_ptr,
};
-const struct bpf_func_proto bpf_task_storage_get_recur_proto = {
- .func = bpf_task_storage_get_recur,
- .gpl_only = false,
- .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
- .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
- .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
- .arg4_type = ARG_ANYTHING,
-};
-
const struct bpf_func_proto bpf_task_storage_get_proto = {
.func = bpf_task_storage_get,
.gpl_only = false,
@@ -354,15 +247,6 @@ const struct bpf_func_proto bpf_task_storage_get_proto = {
.arg4_type = ARG_ANYTHING,
};
-const struct bpf_func_proto bpf_task_storage_delete_recur_proto = {
- .func = bpf_task_storage_delete_recur,
- .gpl_only = false,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
- .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
-};
-
const struct bpf_func_proto bpf_task_storage_delete_proto = {
.func = bpf_task_storage_delete,
.gpl_only = false,
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 0de8fc8a0e0b..7708958e3fb8 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -25,6 +25,7 @@
#include <linux/perf_event.h>
#include <linux/bsearch.h>
#include <linux/kobject.h>
+#include <linux/string.h>
#include <linux/sysfs.h>
#include <linux/overflow.h>
@@ -259,6 +260,7 @@ struct btf {
void *nohdr_data;
struct btf_header hdr;
u32 nr_types; /* includes VOID for base BTF */
+ u32 named_start_id;
u32 types_size;
u32 data_size;
refcount_t refcnt;
@@ -494,6 +496,11 @@ static bool btf_type_is_modifier(const struct btf_type *t)
return false;
}
+static int btf_start_id(const struct btf *btf)
+{
+ return btf->start_id + (btf->base_btf ? 0 : 1);
+}
+
bool btf_type_is_void(const struct btf_type *t)
{
return t == &btf_void;
@@ -544,21 +551,125 @@ u32 btf_nr_types(const struct btf *btf)
return total;
}
+/*
+ * Note that vmlinux and kernel module BTFs are always sorted
+ * during the building phase.
+ */
+static void btf_check_sorted(struct btf *btf)
+{
+ u32 i, n, named_start_id = 0;
+
+ n = btf_nr_types(btf);
+ if (btf_is_vmlinux(btf)) {
+ for (i = btf_start_id(btf); i < n; i++) {
+ const struct btf_type *t = btf_type_by_id(btf, i);
+ const char *n = btf_name_by_offset(btf, t->name_off);
+
+ if (n[0] != '\0') {
+ btf->named_start_id = i;
+ return;
+ }
+ }
+ return;
+ }
+
+ for (i = btf_start_id(btf) + 1; i < n; i++) {
+ const struct btf_type *ta = btf_type_by_id(btf, i - 1);
+ const struct btf_type *tb = btf_type_by_id(btf, i);
+ const char *na = btf_name_by_offset(btf, ta->name_off);
+ const char *nb = btf_name_by_offset(btf, tb->name_off);
+
+ if (strcmp(na, nb) > 0)
+ return;
+
+ if (named_start_id == 0 && na[0] != '\0')
+ named_start_id = i - 1;
+ if (named_start_id == 0 && nb[0] != '\0')
+ named_start_id = i;
+ }
+
+ if (named_start_id)
+ btf->named_start_id = named_start_id;
+}
+
+/*
+ * btf_named_start_id - Get the named starting ID for the BTF
+ * @btf: Pointer to the target BTF object
+ * @own: Flag indicating whether to query only the current BTF (true = current BTF only,
+ * false = recursively traverse the base BTF chain)
+ *
+ * Return value rules:
+ * 1. For a sorted btf, return its named_start_id
+ * 2. Else for a split BTF, return its start_id
+ * 3. Else for a base BTF, return 1
+ */
+u32 btf_named_start_id(const struct btf *btf, bool own)
+{
+ const struct btf *base_btf = btf;
+
+ while (!own && base_btf->base_btf)
+ base_btf = base_btf->base_btf;
+
+ return base_btf->named_start_id ?: (base_btf->start_id ?: 1);
+}
+
+static s32 btf_find_by_name_kind_bsearch(const struct btf *btf, const char *name)
+{
+ const struct btf_type *t;
+ const char *tname;
+ s32 l, r, m;
+
+ l = btf_named_start_id(btf, true);
+ r = btf_nr_types(btf) - 1;
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ t = btf_type_by_id(btf, m);
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (strcmp(tname, name) >= 0) {
+ if (l == r)
+ return r;
+ r = m;
+ } else {
+ l = m + 1;
+ }
+ }
+
+ return btf_nr_types(btf);
+}
+
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
{
+ const struct btf *base_btf = btf_base_btf(btf);
const struct btf_type *t;
const char *tname;
- u32 i, total;
+ s32 id, total;
- total = btf_nr_types(btf);
- for (i = 1; i < total; i++) {
- t = btf_type_by_id(btf, i);
- if (BTF_INFO_KIND(t->info) != kind)
- continue;
+ if (base_btf) {
+ id = btf_find_by_name_kind(base_btf, name, kind);
+ if (id > 0)
+ return id;
+ }
- tname = btf_name_by_offset(btf, t->name_off);
- if (!strcmp(tname, name))
- return i;
+ total = btf_nr_types(btf);
+ if (btf->named_start_id > 0 && name[0]) {
+ id = btf_find_by_name_kind_bsearch(btf, name);
+ for (; id < total; id++) {
+ t = btf_type_by_id(btf, id);
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (strcmp(tname, name) != 0)
+ return -ENOENT;
+ if (BTF_INFO_KIND(t->info) == kind)
+ return id;
+ }
+ } else {
+ for (id = btf_start_id(btf); id < total; id++) {
+ t = btf_type_by_id(btf, id);
+ if (BTF_INFO_KIND(t->info) != kind)
+ continue;
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (strcmp(tname, name) == 0)
+ return id;
+ }
}
return -ENOENT;
@@ -3424,7 +3535,8 @@ const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type
const struct btf_type *t;
int len, id;
- id = btf_find_next_decl_tag(btf, pt, comp_idx, tag_key, 0);
+ id = btf_find_next_decl_tag(btf, pt, comp_idx, tag_key,
+ btf_named_start_id(btf, false) - 1);
if (id < 0)
return ERR_PTR(id);
@@ -5791,6 +5903,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat
goto errout;
}
env->btf = btf;
+ btf->named_start_id = 0;
data = kvmalloc(attr->btf_size, GFP_KERNEL | __GFP_NOWARN);
if (!data) {
@@ -6107,6 +6220,7 @@ static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
+ case BPF_TRACE_FSESSION:
/* allow u64* as ctx */
if (btf_is_int(t) && t->size == 8)
return 0;
@@ -6210,7 +6324,8 @@ static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name
btf->data = data;
btf->data_size = data_size;
btf->kernel_btf = true;
- snprintf(btf->name, sizeof(btf->name), "%s", name);
+ btf->named_start_id = 0;
+ strscpy(btf->name, name);
err = btf_parse_hdr(env);
if (err)
@@ -6230,6 +6345,7 @@ static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name
if (err)
goto errout;
+ btf_check_sorted(btf);
refcount_set(&btf->refcnt, 1);
return btf;
@@ -6327,7 +6443,8 @@ static struct btf *btf_parse_module(const char *module_name, const void *data,
btf->start_id = base_btf->nr_types;
btf->start_str_off = base_btf->hdr.str_len;
btf->kernel_btf = true;
- snprintf(btf->name, sizeof(btf->name), "%s", module_name);
+ btf->named_start_id = 0;
+ strscpy(btf->name, module_name);
btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN);
if (!btf->data) {
@@ -6363,6 +6480,7 @@ static struct btf *btf_parse_module(const char *module_name, const void *data,
}
btf_verifier_env_free(env);
+ btf_check_sorted(btf);
refcount_set(&btf->refcnt, 1);
return btf;
@@ -6704,6 +6822,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
fallthrough;
case BPF_LSM_CGROUP:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
/* When LSM programs are attached to void LSM hooks
* they use FEXIT trampolines and when attached to
* int LSM hooks, they use MODIFY_RETURN trampolines.
@@ -7729,12 +7848,13 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
tname);
return -EINVAL;
}
+
/* Convert BTF function arguments into verifier types.
* Only PTR_TO_CTX and SCALAR are supported atm.
*/
for (i = 0; i < nargs; i++) {
u32 tags = 0;
- int id = 0;
+ int id = btf_named_start_id(btf, false) - 1;
/* 'arg:<tag>' decl_tag takes precedence over derivation of
* register type from BTF type itself
@@ -8640,24 +8760,17 @@ end:
return ret;
}
-static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
- enum btf_kfunc_hook hook,
- u32 kfunc_btf_id,
- const struct bpf_prog *prog)
+static u32 *btf_kfunc_id_set_contains(const struct btf *btf,
+ enum btf_kfunc_hook hook,
+ u32 kfunc_btf_id)
{
- struct btf_kfunc_hook_filter *hook_filter;
struct btf_id_set8 *set;
- u32 *id, i;
+ u32 *id;
if (hook >= BTF_KFUNC_HOOK_MAX)
return NULL;
if (!btf->kfunc_set_tab)
return NULL;
- hook_filter = &btf->kfunc_set_tab->hook_filters[hook];
- for (i = 0; i < hook_filter->nr_filters; i++) {
- if (hook_filter->filters[i](prog, kfunc_btf_id))
- return NULL;
- }
set = btf->kfunc_set_tab->sets[hook];
if (!set)
return NULL;
@@ -8668,6 +8781,28 @@ static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
return id + 1;
}
+static bool __btf_kfunc_is_allowed(const struct btf *btf,
+ enum btf_kfunc_hook hook,
+ u32 kfunc_btf_id,
+ const struct bpf_prog *prog)
+{
+ struct btf_kfunc_hook_filter *hook_filter;
+ int i;
+
+ if (hook >= BTF_KFUNC_HOOK_MAX)
+ return false;
+ if (!btf->kfunc_set_tab)
+ return false;
+
+ hook_filter = &btf->kfunc_set_tab->hook_filters[hook];
+ for (i = 0; i < hook_filter->nr_filters; i++) {
+ if (hook_filter->filters[i](prog, kfunc_btf_id))
+ return false;
+ }
+
+ return true;
+}
+
static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
{
switch (prog_type) {
@@ -8681,6 +8816,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
return BTF_KFUNC_HOOK_STRUCT_OPS;
case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_LSM:
return BTF_KFUNC_HOOK_TRACING;
@@ -8714,6 +8850,26 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
}
}
+bool btf_kfunc_is_allowed(const struct btf *btf,
+ u32 kfunc_btf_id,
+ const struct bpf_prog *prog)
+{
+ enum bpf_prog_type prog_type = resolve_prog_type(prog);
+ enum btf_kfunc_hook hook;
+ u32 *kfunc_flags;
+
+ kfunc_flags = btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id);
+ if (kfunc_flags && __btf_kfunc_is_allowed(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog))
+ return true;
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ kfunc_flags = btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
+ if (kfunc_flags && __btf_kfunc_is_allowed(btf, hook, kfunc_btf_id, prog))
+ return true;
+
+ return false;
+}
+
/* Caution:
* Reference to the module (obtained using btf_try_get_module) corresponding to
* the struct btf *MUST* be held when calling this function from verifier
@@ -8721,26 +8877,27 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
* keeping the reference for the duration of the call provides the necessary
* protection for looking up a well-formed btf->kfunc_set_tab.
*/
-u32 *btf_kfunc_id_set_contains(const struct btf *btf,
- u32 kfunc_btf_id,
- const struct bpf_prog *prog)
+u32 *btf_kfunc_flags(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog)
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
enum btf_kfunc_hook hook;
u32 *kfunc_flags;
- kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog);
+ kfunc_flags = btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id);
if (kfunc_flags)
return kfunc_flags;
hook = bpf_prog_type_to_kfunc_hook(prog_type);
- return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog);
+ return btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
}
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
const struct bpf_prog *prog)
{
- return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog);
+ if (!__btf_kfunc_is_allowed(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog))
+ return NULL;
+
+ return btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id);
}
static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
@@ -8845,6 +9002,13 @@ static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc
*/
if (!t || !btf_type_is_ptr(t))
return -EINVAL;
+
+ if (IS_ENABLED(CONFIG_CFI_CLANG)) {
+ /* Ensure the destructor kfunc type matches btf_dtor_kfunc_t */
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_void(t))
+ return -EINVAL;
+ }
}
return 0;
}
@@ -9215,7 +9379,7 @@ bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
}
/* Attempt to find target candidates in vmlinux BTF first */
- cands = bpf_core_add_cands(cands, main_btf, 1);
+ cands = bpf_core_add_cands(cands, main_btf, btf_named_start_id(main_btf, true));
if (IS_ERR(cands))
return ERR_CAST(cands);
@@ -9247,7 +9411,7 @@ check_modules:
*/
btf_get(mod_btf);
spin_unlock_bh(&btf_idr_lock);
- cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf));
+ cands = bpf_core_add_cands(cands, mod_btf, btf_named_start_id(mod_btf, true));
btf_put(mod_btf);
if (IS_ERR(cands))
return ERR_CAST(cands);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 69988af44b37..b029f0369ecf 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1680,11 +1680,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct cgroup *cgrp;
int ret;
- /* Check socket family since not all sockets represent network
- * endpoint (e.g. AF_UNIX).
- */
- if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 &&
- sk->sk_family != AF_UNIX)
+ if (!sk_is_inet(sk) && !sk_is_unix(sk))
return 0;
if (!ctx.uaddr) {
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index f04a468cf6a7..fd51fe3d92cc 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -8,12 +8,13 @@
#include "../cgroup/cgroup-internal.h" /* cgroup_mutex and cgroup_is_dead */
-/* cgroup_iter provides four modes of traversal to the cgroup hierarchy.
+/* cgroup_iter provides five modes of traversal to the cgroup hierarchy.
*
* 1. Walk the descendants of a cgroup in pre-order.
* 2. Walk the descendants of a cgroup in post-order.
* 3. Walk the ancestors of a cgroup.
* 4. Show the given cgroup only.
+ * 5. Walk the children of a given parent cgroup.
*
* For walking descendants, cgroup_iter can walk in either pre-order or
* post-order. For walking ancestors, the iter walks up from a cgroup to
@@ -78,6 +79,8 @@ static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos)
return css_next_descendant_pre(NULL, p->start_css);
else if (p->order == BPF_CGROUP_ITER_DESCENDANTS_POST)
return css_next_descendant_post(NULL, p->start_css);
+ else if (p->order == BPF_CGROUP_ITER_CHILDREN)
+ return css_next_child(NULL, p->start_css);
else /* BPF_CGROUP_ITER_SELF_ONLY and BPF_CGROUP_ITER_ANCESTORS_UP */
return p->start_css;
}
@@ -113,6 +116,8 @@ static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return css_next_descendant_post(curr, p->start_css);
else if (p->order == BPF_CGROUP_ITER_ANCESTORS_UP)
return curr->parent;
+ else if (p->order == BPF_CGROUP_ITER_CHILDREN)
+ return css_next_child(curr, p->start_css);
else /* BPF_CGROUP_ITER_SELF_ONLY */
return NULL;
}
@@ -200,11 +205,16 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog,
int order = linfo->cgroup.order;
struct cgroup *cgrp;
- if (order != BPF_CGROUP_ITER_DESCENDANTS_PRE &&
- order != BPF_CGROUP_ITER_DESCENDANTS_POST &&
- order != BPF_CGROUP_ITER_ANCESTORS_UP &&
- order != BPF_CGROUP_ITER_SELF_ONLY)
+ switch (order) {
+ case BPF_CGROUP_ITER_DESCENDANTS_PRE:
+ case BPF_CGROUP_ITER_DESCENDANTS_POST:
+ case BPF_CGROUP_ITER_ANCESTORS_UP:
+ case BPF_CGROUP_ITER_SELF_ONLY:
+ case BPF_CGROUP_ITER_CHILDREN:
+ break;
+ default:
return -EINVAL;
+ }
if (fd && id)
return -EINVAL;
@@ -257,6 +267,8 @@ show_order:
seq_puts(seq, "order: descendants_post\n");
else if (aux->cgroup.order == BPF_CGROUP_ITER_ANCESTORS_UP)
seq_puts(seq, "order: ancestors_up\n");
+ else if (aux->cgroup.order == BPF_CGROUP_ITER_CHILDREN)
+ seq_puts(seq, "order: children\n");
else /* BPF_CGROUP_ITER_SELF_ONLY */
seq_puts(seq, "order: self_only\n");
}
@@ -320,6 +332,7 @@ __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it,
case BPF_CGROUP_ITER_DESCENDANTS_PRE:
case BPF_CGROUP_ITER_DESCENDANTS_POST:
case BPF_CGROUP_ITER_ANCESTORS_UP:
+ case BPF_CGROUP_ITER_CHILDREN:
break;
default:
return -EINVAL;
@@ -345,6 +358,9 @@ __bpf_kfunc struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *i
case BPF_CGROUP_ITER_DESCENDANTS_POST:
kit->pos = css_next_descendant_post(kit->pos, kit->start);
break;
+ case BPF_CGROUP_ITER_CHILDREN:
+ kit->pos = css_next_child(kit->pos, kit->start);
+ break;
case BPF_CGROUP_ITER_ANCESTORS_UP:
kit->pos = kit->pos ? kit->pos->parent : kit->start;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1b9b18e5b03c..dc906dfdff94 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -112,7 +112,8 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
vfree(fp);
return NULL;
}
- fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
+ fp->active = __alloc_percpu_gfp(sizeof(u8[BPF_NR_CONTEXTS]), 4,
+ bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
if (!fp->active) {
vfree(fp);
kfree(aux);
@@ -136,6 +137,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
mutex_init(&fp->aux->used_maps_mutex);
mutex_init(&fp->aux->ext_mutex);
mutex_init(&fp->aux->dst_mutex);
+ mutex_init(&fp->aux->st_ops_assoc_mutex);
#ifdef CONFIG_BPF_SYSCALL
bpf_prog_stream_init(fp);
@@ -286,6 +288,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
if (fp->aux) {
mutex_destroy(&fp->aux->used_maps_mutex);
mutex_destroy(&fp->aux->dst_mutex);
+ mutex_destroy(&fp->aux->st_ops_assoc_mutex);
kfree(fp->aux->poke_tab);
kfree(fp->aux);
}
@@ -2398,6 +2401,7 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map,
map->owner->type = prog_type;
map->owner->jited = fp->jited;
map->owner->xdp_has_frags = aux->xdp_has_frags;
+ map->owner->sleepable = fp->sleepable;
map->owner->expected_attach_type = fp->expected_attach_type;
map->owner->attach_func_proto = aux->attach_func_proto;
for_each_cgroup_storage_type(i) {
@@ -2409,7 +2413,8 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map,
} else {
ret = map->owner->type == prog_type &&
map->owner->jited == fp->jited &&
- map->owner->xdp_has_frags == aux->xdp_has_frags;
+ map->owner->xdp_has_frags == aux->xdp_has_frags &&
+ map->owner->sleepable == fp->sleepable;
if (ret &&
map->map_type == BPF_MAP_TYPE_PROG_ARRAY &&
map->owner->expected_attach_type != fp->expected_attach_type)
@@ -2912,6 +2917,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
#endif
bpf_free_used_maps(aux);
bpf_free_used_btfs(aux);
+ bpf_prog_disassoc_struct_ops(aux->prog);
if (bpf_prog_is_dev_bound(aux))
bpf_prog_dev_bound_destroy(aux->prog);
#ifdef CONFIG_PERF_EVENTS
@@ -3138,6 +3144,11 @@ bool __weak bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
return false;
}
+bool __weak bpf_jit_supports_fsession(void)
+{
+ return false;
+}
+
u64 __weak bpf_arch_uaddress_limit(void)
{
#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE)
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 703e5df1f4ef..04171fbc39cb 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -430,7 +430,7 @@ static struct bpf_cpu_map_entry *
__cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
u32 cpu)
{
- int numa, err, i, fd = value->bpf_prog.fd;
+ int numa, err = -ENOMEM, i, fd = value->bpf_prog.fd;
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct bpf_cpu_map_entry *rcpu;
struct xdp_bulk_queue *bq;
@@ -440,7 +440,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
rcpu = bpf_map_kmalloc_node(map, sizeof(*rcpu), gfp | __GFP_ZERO, numa);
if (!rcpu)
- return NULL;
+ return ERR_PTR(err);
/* Alloc percpu bulkq */
rcpu->bulkq = bpf_map_alloc_percpu(map, sizeof(*rcpu->bulkq),
@@ -468,16 +468,21 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
rcpu->value.qsize = value->qsize;
gro_init(&rcpu->gro);
- if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
- goto free_ptr_ring;
+ if (fd > 0) {
+ err = __cpu_map_load_bpf_program(rcpu, map, fd);
+ if (err)
+ goto free_ptr_ring;
+ }
/* Setup kthread */
init_completion(&rcpu->kthread_running);
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
"cpumap/%d/map:%d", cpu,
map->id);
- if (IS_ERR(rcpu->kthread))
+ if (IS_ERR(rcpu->kthread)) {
+ err = PTR_ERR(rcpu->kthread);
goto free_prog;
+ }
/* Make sure kthread runs on a single CPU */
kthread_bind(rcpu->kthread, cpu);
@@ -503,7 +508,7 @@ free_bulkq:
free_percpu(rcpu->bulkq);
free_rcu:
kfree(rcpu);
- return NULL;
+ return ERR_PTR(err);
}
static void __cpu_map_entry_free(struct work_struct *work)
@@ -596,8 +601,8 @@ static long cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
} else {
/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
rcpu = __cpu_map_entry_alloc(map, &cpumap_value, key_cpu);
- if (!rcpu)
- return -ENOMEM;
+ if (IS_ERR(rcpu))
+ return PTR_ERR(rcpu);
}
rcu_read_lock();
__cpu_map_entry_replace(cmap, key_cpu, rcpu);
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index 9876c5fe6c2a..b8c805b4b06a 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -477,7 +477,7 @@ __bpf_kfunc_end_defs();
BTF_KFUNCS_START(cpumask_kfunc_btf_ids)
BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_first_and, KF_RCU)
diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c
index 83c4d9943084..7e75a1936256 100644
--- a/kernel/bpf/crypto.c
+++ b/kernel/bpf/crypto.c
@@ -60,7 +60,7 @@ struct bpf_crypto_ctx {
int bpf_crypto_register_type(const struct bpf_crypto_type *type)
{
struct bpf_crypto_type_list *node;
- int err = -EEXIST;
+ int err = -EBUSY;
down_write(&bpf_crypto_types_sem);
list_for_each_entry(node, &bpf_crypto_types, list) {
@@ -261,6 +261,12 @@ __bpf_kfunc void bpf_crypto_ctx_release(struct bpf_crypto_ctx *ctx)
call_rcu(&ctx->rcu, crypto_free_cb);
}
+__bpf_kfunc void bpf_crypto_ctx_release_dtor(void *ctx)
+{
+ bpf_crypto_ctx_release(ctx);
+}
+CFI_NOSEAL(bpf_crypto_ctx_release_dtor);
+
static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx,
const struct bpf_dynptr_kern *src,
const struct bpf_dynptr_kern *dst,
@@ -368,7 +374,7 @@ static const struct btf_kfunc_id_set crypt_kfunc_set = {
BTF_ID_LIST(bpf_crypto_dtor_ids)
BTF_ID(struct, bpf_crypto_ctx)
-BTF_ID(func, bpf_crypto_ctx_release)
+BTF_ID(func, bpf_crypto_ctx_release_dtor)
static int __init crypto_kfunc_init(void)
{
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index c8a9b27f8663..3b9d297a53be 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -82,9 +82,6 @@ struct bucket {
rqspinlock_t raw_lock;
};
-#define HASHTAB_MAP_LOCK_COUNT 8
-#define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
-
struct bpf_htab {
struct bpf_map map;
struct bpf_mem_alloc ma;
@@ -932,7 +929,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
}
static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
- void *value, bool onallcpus)
+ void *value, bool onallcpus, u64 map_flags)
{
void *ptr;
@@ -943,19 +940,28 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
bpf_obj_free_fields(htab->map.record, ptr);
} else {
u32 size = round_up(htab->map.value_size, 8);
- int off = 0, cpu;
+ void *val;
+ int cpu;
+
+ if (map_flags & BPF_F_CPU) {
+ cpu = map_flags >> 32;
+ ptr = per_cpu_ptr(pptr, cpu);
+ copy_map_value(&htab->map, ptr, value);
+ bpf_obj_free_fields(htab->map.record, ptr);
+ return;
+ }
for_each_possible_cpu(cpu) {
ptr = per_cpu_ptr(pptr, cpu);
- copy_map_value_long(&htab->map, ptr, value + off);
+ val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
+ copy_map_value(&htab->map, ptr, val);
bpf_obj_free_fields(htab->map.record, ptr);
- off += size;
}
}
}
static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
- void *value, bool onallcpus)
+ void *value, bool onallcpus, u64 map_flags)
{
/* When not setting the initial value on all cpus, zero-fill element
* values for other cpus. Otherwise, bpf program has no way to ensure
@@ -973,7 +979,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
}
} else {
- pcpu_copy_value(htab, pptr, value, onallcpus);
+ pcpu_copy_value(htab, pptr, value, onallcpus, map_flags);
}
}
@@ -985,7 +991,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
bool percpu, bool onallcpus,
- struct htab_elem *old_elem)
+ struct htab_elem *old_elem, u64 map_flags)
{
u32 size = htab->map.value_size;
bool prealloc = htab_is_prealloc(htab);
@@ -1043,7 +1049,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
pptr = *(void __percpu **)ptr;
}
- pcpu_init_value(htab, pptr, value, onallcpus);
+ pcpu_init_value(htab, pptr, value, onallcpus, map_flags);
if (!prealloc)
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1147,7 +1153,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
}
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
- l_old);
+ l_old, map_flags);
if (IS_ERR(l_new)) {
/* all pre-allocated elements are in use or memory exhausted */
ret = PTR_ERR(l_new);
@@ -1249,6 +1255,15 @@ err_lock_bucket:
return ret;
}
+static int htab_map_check_update_flags(bool onallcpus, u64 map_flags)
+{
+ if (unlikely(!onallcpus && map_flags > BPF_EXIST))
+ return -EINVAL;
+ if (unlikely(onallcpus && ((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS)))
+ return -EINVAL;
+ return 0;
+}
+
static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
void *value, u64 map_flags,
bool percpu, bool onallcpus)
@@ -1262,9 +1277,9 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
u32 key_size, hash;
int ret;
- if (unlikely(map_flags > BPF_EXIST))
- /* unknown flags */
- return -EINVAL;
+ ret = htab_map_check_update_flags(onallcpus, map_flags);
+ if (unlikely(ret))
+ return ret;
WARN_ON_ONCE(!bpf_rcu_lock_held());
@@ -1289,7 +1304,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
/* Update value in-place */
if (percpu) {
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
- value, onallcpus);
+ value, onallcpus, map_flags);
} else {
void **inner_map_pptr = htab_elem_value(l_old, key_size);
@@ -1298,7 +1313,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
}
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
- hash, percpu, onallcpus, NULL);
+ hash, percpu, onallcpus, NULL, map_flags);
if (IS_ERR(l_new)) {
ret = PTR_ERR(l_new);
goto err;
@@ -1324,9 +1339,9 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
u32 key_size, hash;
int ret;
- if (unlikely(map_flags > BPF_EXIST))
- /* unknown flags */
- return -EINVAL;
+ ret = htab_map_check_update_flags(onallcpus, map_flags);
+ if (unlikely(ret))
+ return ret;
WARN_ON_ONCE(!bpf_rcu_lock_held());
@@ -1363,10 +1378,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
/* per-cpu hash map can update value in-place */
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
- value, onallcpus);
+ value, onallcpus, map_flags);
} else {
pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
- value, onallcpus);
+ value, onallcpus, map_flags);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;
}
@@ -1678,9 +1693,9 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
u32 batch, max_count, size, bucket_size, map_id;
+ u64 elem_map_flags, map_flags, allowed_flags;
u32 bucket_cnt, total, key_size, value_size;
struct htab_elem *node_to_free = NULL;
- u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
unsigned long flags = 0;
@@ -1690,9 +1705,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
int ret = 0;
elem_map_flags = attr->batch.elem_flags;
- if ((elem_map_flags & ~BPF_F_LOCK) ||
- ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
- return -EINVAL;
+ allowed_flags = BPF_F_LOCK;
+ if (!do_delete && is_percpu)
+ allowed_flags |= BPF_F_CPU;
+ ret = bpf_map_check_op_flags(map, elem_map_flags, allowed_flags);
+ if (ret)
+ return ret;
map_flags = attr->batch.flags;
if (map_flags)
@@ -1715,7 +1733,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
key_size = htab->map.key_size;
value_size = htab->map.value_size;
size = round_up(value_size, 8);
- if (is_percpu)
+ if (is_percpu && !(elem_map_flags & BPF_F_CPU))
value_size = size * num_possible_cpus();
total = 0;
/* while experimenting with hash tables with sizes ranging from 10 to
@@ -1798,10 +1816,17 @@ again_nocopy:
void __percpu *pptr;
pptr = htab_elem_get_ptr(l, map->key_size);
- for_each_possible_cpu(cpu) {
- copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu));
- check_and_init_map_value(&htab->map, dst_val + off);
- off += size;
+ if (elem_map_flags & BPF_F_CPU) {
+ cpu = elem_map_flags >> 32;
+ copy_map_value(&htab->map, dst_val, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(&htab->map, dst_val);
+ } else {
+ for_each_possible_cpu(cpu) {
+ copy_map_value_long(&htab->map, dst_val + off,
+ per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(&htab->map, dst_val + off);
+ off += size;
+ }
}
} else {
value = htab_elem_value(l, key_size);
@@ -2209,11 +2234,11 @@ static u64 htab_map_mem_usage(const struct bpf_map *map)
bool prealloc = htab_is_prealloc(htab);
bool percpu = htab_is_percpu(htab);
bool lru = htab_is_lru(htab);
- u64 num_entries;
- u64 usage = sizeof(struct bpf_htab);
+ u64 num_entries, usage;
+
+ usage = sizeof(struct bpf_htab) +
+ sizeof(struct bucket) * htab->n_buckets;
- usage += sizeof(struct bucket) * htab->n_buckets;
- usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT;
if (prealloc) {
num_entries = map->max_entries;
if (htab_has_extra_elems(htab))
@@ -2357,7 +2382,7 @@ static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *k
return NULL;
}
-int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
+int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags)
{
struct htab_elem *l;
void __percpu *pptr;
@@ -2374,16 +2399,22 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
l = __htab_map_lookup_elem(map, key);
if (!l)
goto out;
+ ret = 0;
/* We do not mark LRU map element here in order to not mess up
* eviction heuristics when user space does a map walk.
*/
pptr = htab_elem_get_ptr(l, map->key_size);
+ if (map_flags & BPF_F_CPU) {
+ cpu = map_flags >> 32;
+ copy_map_value(map, value, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, value);
+ goto out;
+ }
for_each_possible_cpu(cpu) {
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
check_and_init_map_value(map, value + off);
off += size;
}
- ret = 0;
out:
rcu_read_unlock();
return ret;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index db72b96f9c8c..7ac32798eb04 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1077,7 +1077,7 @@ const struct bpf_func_proto bpf_snprintf_proto = {
.func = bpf_snprintf,
.gpl_only = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg1_type = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_PTR_TO_CONST_STR,
.arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
@@ -1095,16 +1095,34 @@ static void *map_key_from_value(struct bpf_map *map, void *value, u32 *arr_idx)
return (void *)value - round_up(map->key_size, 8);
}
+enum bpf_async_type {
+ BPF_ASYNC_TYPE_TIMER = 0,
+ BPF_ASYNC_TYPE_WQ,
+};
+
+enum bpf_async_op {
+ BPF_ASYNC_START,
+ BPF_ASYNC_CANCEL
+};
+
+struct bpf_async_cmd {
+ struct llist_node node;
+ u64 nsec;
+ u32 mode;
+ enum bpf_async_op op;
+};
+
struct bpf_async_cb {
struct bpf_map *map;
struct bpf_prog *prog;
void __rcu *callback_fn;
void *value;
- union {
- struct rcu_head rcu;
- struct work_struct delete_work;
- };
+ struct rcu_head rcu;
u64 flags;
+ struct irq_work worker;
+ refcount_t refcnt;
+ enum bpf_async_type type;
+ struct llist_head async_cmds;
};
/* BPF map elements can contain 'struct bpf_timer'.
@@ -1132,7 +1150,6 @@ struct bpf_hrtimer {
struct bpf_work {
struct bpf_async_cb cb;
struct work_struct work;
- struct work_struct delete_work;
};
/* the actual struct hidden inside uapi struct bpf_timer and bpf_wq */
@@ -1142,20 +1159,12 @@ struct bpf_async_kern {
struct bpf_hrtimer *timer;
struct bpf_work *work;
};
- /* bpf_spin_lock is used here instead of spinlock_t to make
- * sure that it always fits into space reserved by struct bpf_timer
- * regardless of LOCKDEP and spinlock debug flags.
- */
- struct bpf_spin_lock lock;
} __attribute__((aligned(8)));
-enum bpf_async_type {
- BPF_ASYNC_TYPE_TIMER = 0,
- BPF_ASYNC_TYPE_WQ,
-};
-
static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
+static void bpf_async_refcount_put(struct bpf_async_cb *cb);
+
static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
{
struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
@@ -1219,45 +1228,85 @@ static void bpf_async_cb_rcu_free(struct rcu_head *rcu)
{
struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu);
+ /*
+ * Drop the last reference to prog only after RCU GP, as set_callback()
+ * may race with cancel_and_free()
+ */
+ if (cb->prog)
+ bpf_prog_put(cb->prog);
+
kfree_nolock(cb);
}
-static void bpf_wq_delete_work(struct work_struct *work)
+/* Callback from call_rcu_tasks_trace, chains to call_rcu for final free */
+static void bpf_async_cb_rcu_tasks_trace_free(struct rcu_head *rcu)
{
- struct bpf_work *w = container_of(work, struct bpf_work, delete_work);
+ struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu);
+ struct bpf_hrtimer *t = container_of(cb, struct bpf_hrtimer, cb);
+ struct bpf_work *w = container_of(cb, struct bpf_work, cb);
+ bool retry = false;
- cancel_work_sync(&w->work);
+ /*
+ * bpf_async_cancel_and_free() tried to cancel timer/wq, but it
+ * could have raced with timer/wq_start. Now refcnt is zero and
+ * srcu/rcu GP completed. Cancel timer/wq again.
+ */
+ switch (cb->type) {
+ case BPF_ASYNC_TYPE_TIMER:
+ if (hrtimer_try_to_cancel(&t->timer) < 0)
+ retry = true;
+ break;
+ case BPF_ASYNC_TYPE_WQ:
+ if (!cancel_work(&w->work) && work_busy(&w->work))
+ retry = true;
+ break;
+ }
+ if (retry) {
+ /*
+ * hrtimer or wq callback may still be running. It must be
+ * in rcu_tasks_trace or rcu CS, so wait for GP again.
+ * It won't retry forever, since refcnt zero prevents all
+ * operations on timer/wq.
+ */
+ call_rcu_tasks_trace(&cb->rcu, bpf_async_cb_rcu_tasks_trace_free);
+ return;
+ }
- call_rcu(&w->cb.rcu, bpf_async_cb_rcu_free);
+ /* rcu_trace_implies_rcu_gp() is true and will remain so */
+ bpf_async_cb_rcu_free(rcu);
}
-static void bpf_timer_delete_work(struct work_struct *work)
+static void worker_for_call_rcu(struct irq_work *work)
{
- struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work);
+ struct bpf_async_cb *cb = container_of(work, struct bpf_async_cb, worker);
- /* Cancel the timer and wait for callback to complete if it was running.
- * If hrtimer_cancel() can be safely called it's safe to call
- * call_rcu() right after for both preallocated and non-preallocated
- * maps. The async->cb = NULL was already done and no code path can see
- * address 't' anymore. Timer if armed for existing bpf_hrtimer before
- * bpf_timer_cancel_and_free will have been cancelled.
- */
- hrtimer_cancel(&t->timer);
- call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free);
+ call_rcu_tasks_trace(&cb->rcu, bpf_async_cb_rcu_tasks_trace_free);
}
+static void bpf_async_refcount_put(struct bpf_async_cb *cb)
+{
+ if (!refcount_dec_and_test(&cb->refcnt))
+ return;
+
+ if (irqs_disabled()) {
+ cb->worker = IRQ_WORK_INIT(worker_for_call_rcu);
+ irq_work_queue(&cb->worker);
+ } else {
+ call_rcu_tasks_trace(&cb->rcu, bpf_async_cb_rcu_tasks_trace_free);
+ }
+}
+
+static void bpf_async_cancel_and_free(struct bpf_async_kern *async);
+static void bpf_async_irq_worker(struct irq_work *work);
+
static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
enum bpf_async_type type)
{
- struct bpf_async_cb *cb;
+ struct bpf_async_cb *cb, *old_cb;
struct bpf_hrtimer *t;
struct bpf_work *w;
clockid_t clockid;
size_t size;
- int ret = 0;
-
- if (in_nmi())
- return -EOPNOTSUPP;
switch (type) {
case BPF_ASYNC_TYPE_TIMER:
@@ -1270,18 +1319,13 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
return -EINVAL;
}
- __bpf_spin_lock_irqsave(&async->lock);
- t = async->timer;
- if (t) {
- ret = -EBUSY;
- goto out;
- }
+ old_cb = READ_ONCE(async->cb);
+ if (old_cb)
+ return -EBUSY;
cb = bpf_map_kmalloc_nolock(map, size, 0, map->numa_node);
- if (!cb) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!cb)
+ return -ENOMEM;
switch (type) {
case BPF_ASYNC_TYPE_TIMER:
@@ -1289,7 +1333,6 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
t = (struct bpf_hrtimer *)cb;
atomic_set(&t->cancelling, 0);
- INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work);
hrtimer_setup(&t->timer, bpf_timer_cb, clockid, HRTIMER_MODE_REL_SOFT);
cb->value = (void *)async - map->record->timer_off;
break;
@@ -1297,16 +1340,24 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
w = (struct bpf_work *)cb;
INIT_WORK(&w->work, bpf_wq_work);
- INIT_WORK(&w->delete_work, bpf_wq_delete_work);
cb->value = (void *)async - map->record->wq_off;
break;
}
cb->map = map;
cb->prog = NULL;
cb->flags = flags;
+ cb->worker = IRQ_WORK_INIT(bpf_async_irq_worker);
+ init_llist_head(&cb->async_cmds);
+ refcount_set(&cb->refcnt, 1); /* map's reference */
+ cb->type = type;
rcu_assign_pointer(cb->callback_fn, NULL);
- WRITE_ONCE(async->cb, cb);
+ old_cb = cmpxchg(&async->cb, NULL, cb);
+ if (old_cb) {
+ /* Lost the race to initialize this bpf_async_kern, drop the allocated object */
+ kfree_nolock(cb);
+ return -EBUSY;
+ }
/* Guarantee the order between async->cb and map->usercnt. So
* when there are concurrent uref release and bpf timer init, either
* bpf_timer_cancel_and_free() called by uref release reads a no-NULL
@@ -1317,13 +1368,11 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
/* maps with timers must be either held by user space
* or pinned in bpffs.
*/
- WRITE_ONCE(async->cb, NULL);
- kfree_nolock(cb);
- ret = -EPERM;
+ bpf_async_cancel_and_free(async);
+ return -EPERM;
}
-out:
- __bpf_spin_unlock_irqrestore(&async->lock);
- return ret;
+
+ return 0;
}
BPF_CALL_3(bpf_timer_init, struct bpf_async_kern *, timer, struct bpf_map *, map,
@@ -1354,56 +1403,90 @@ static const struct bpf_func_proto bpf_timer_init_proto = {
.arg3_type = ARG_ANYTHING,
};
-static int __bpf_async_set_callback(struct bpf_async_kern *async, void *callback_fn,
- struct bpf_prog_aux *aux, unsigned int flags,
- enum bpf_async_type type)
+static int bpf_async_update_prog_callback(struct bpf_async_cb *cb,
+ struct bpf_prog *prog,
+ void *callback_fn)
{
- struct bpf_prog *prev, *prog = aux->prog;
- struct bpf_async_cb *cb;
- int ret = 0;
+ struct bpf_prog *prev;
- if (in_nmi())
- return -EOPNOTSUPP;
- __bpf_spin_lock_irqsave(&async->lock);
- cb = async->cb;
- if (!cb) {
- ret = -EINVAL;
- goto out;
- }
- if (!atomic64_read(&cb->map->usercnt)) {
- /* maps with timers must be either held by user space
- * or pinned in bpffs. Otherwise timer might still be
- * running even when bpf prog is detached and user space
- * is gone, since map_release_uref won't ever be called.
- */
- ret = -EPERM;
- goto out;
+ /* Acquire a guard reference on prog to prevent it from being freed during the loop */
+ if (prog) {
+ prog = bpf_prog_inc_not_zero(prog);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
}
- prev = cb->prog;
- if (prev != prog) {
- /* Bump prog refcnt once. Every bpf_timer_set_callback()
- * can pick different callback_fn-s within the same prog.
+
+ do {
+ if (prog)
+ prog = bpf_prog_inc_not_zero(prog);
+ prev = xchg(&cb->prog, prog);
+ rcu_assign_pointer(cb->callback_fn, callback_fn);
+
+ /*
+ * Release previous prog, make sure that if other CPU is contending,
+ * to set bpf_prog, references are not leaked as each iteration acquires and
+ * releases one reference.
*/
- prog = bpf_prog_inc_not_zero(prog);
- if (IS_ERR(prog)) {
- ret = PTR_ERR(prog);
- goto out;
- }
if (prev)
- /* Drop prev prog refcnt when swapping with new prog */
bpf_prog_put(prev);
- cb->prog = prog;
+
+ } while (READ_ONCE(cb->prog) != prog ||
+ (void __force *)READ_ONCE(cb->callback_fn) != callback_fn);
+
+ if (prog)
+ bpf_prog_put(prog);
+
+ return 0;
+}
+
+static DEFINE_PER_CPU(struct bpf_async_cb *, async_cb_running);
+
+static int bpf_async_schedule_op(struct bpf_async_cb *cb, enum bpf_async_op op,
+ u64 nsec, u32 timer_mode)
+{
+ /*
+ * Do not schedule another operation on this cpu if it's in irq_work
+ * callback that is processing async_cmds queue. Otherwise the following
+ * loop is possible:
+ * bpf_timer_start() -> bpf_async_schedule_op() -> irq_work_queue().
+ * irqrestore -> bpf_async_irq_worker() -> tracepoint -> bpf_timer_start().
+ */
+ if (this_cpu_read(async_cb_running) == cb) {
+ bpf_async_refcount_put(cb);
+ return -EDEADLK;
}
- rcu_assign_pointer(cb->callback_fn, callback_fn);
-out:
- __bpf_spin_unlock_irqrestore(&async->lock);
- return ret;
+
+ struct bpf_async_cmd *cmd = kmalloc_nolock(sizeof(*cmd), 0, NUMA_NO_NODE);
+
+ if (!cmd) {
+ bpf_async_refcount_put(cb);
+ return -ENOMEM;
+ }
+ init_llist_node(&cmd->node);
+ cmd->nsec = nsec;
+ cmd->mode = timer_mode;
+ cmd->op = op;
+ if (llist_add(&cmd->node, &cb->async_cmds))
+ irq_work_queue(&cb->worker);
+ return 0;
+}
+
+static int __bpf_async_set_callback(struct bpf_async_kern *async, void *callback_fn,
+ struct bpf_prog *prog)
+{
+ struct bpf_async_cb *cb;
+
+ cb = READ_ONCE(async->cb);
+ if (!cb)
+ return -EINVAL;
+
+ return bpf_async_update_prog_callback(cb, prog, callback_fn);
}
BPF_CALL_3(bpf_timer_set_callback, struct bpf_async_kern *, timer, void *, callback_fn,
struct bpf_prog_aux *, aux)
{
- return __bpf_async_set_callback(timer, callback_fn, aux, 0, BPF_ASYNC_TYPE_TIMER);
+ return __bpf_async_set_callback(timer, callback_fn, aux->prog);
}
static const struct bpf_func_proto bpf_timer_set_callback_proto = {
@@ -1414,22 +1497,22 @@ static const struct bpf_func_proto bpf_timer_set_callback_proto = {
.arg2_type = ARG_PTR_TO_FUNC,
};
-BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, timer, u64, nsecs, u64, flags)
+static bool defer_timer_wq_op(void)
+{
+ return in_hardirq() || irqs_disabled();
+}
+
+BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, async, u64, nsecs, u64, flags)
{
struct bpf_hrtimer *t;
- int ret = 0;
- enum hrtimer_mode mode;
+ u32 mode;
- if (in_nmi())
- return -EOPNOTSUPP;
if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN))
return -EINVAL;
- __bpf_spin_lock_irqsave(&timer->lock);
- t = timer->timer;
- if (!t || !t->cb.prog) {
- ret = -EINVAL;
- goto out;
- }
+
+ t = READ_ONCE(async->timer);
+ if (!t || !READ_ONCE(t->cb.prog))
+ return -EINVAL;
if (flags & BPF_F_TIMER_ABS)
mode = HRTIMER_MODE_ABS_SOFT;
@@ -1439,10 +1522,20 @@ BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, timer, u64, nsecs, u64, fla
if (flags & BPF_F_TIMER_CPU_PIN)
mode |= HRTIMER_MODE_PINNED;
- hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
-out:
- __bpf_spin_unlock_irqrestore(&timer->lock);
- return ret;
+ /*
+ * bpf_async_cancel_and_free() could have dropped refcnt to zero. In
+ * such case BPF progs are not allowed to arm the timer to prevent UAF.
+ */
+ if (!refcount_inc_not_zero(&t->cb.refcnt))
+ return -ENOENT;
+
+ if (!defer_timer_wq_op()) {
+ hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
+ bpf_async_refcount_put(&t->cb);
+ return 0;
+ } else {
+ return bpf_async_schedule_op(&t->cb, BPF_ASYNC_START, nsecs, mode);
+ }
}
static const struct bpf_func_proto bpf_timer_start_proto = {
@@ -1454,32 +1547,18 @@ static const struct bpf_func_proto bpf_timer_start_proto = {
.arg3_type = ARG_ANYTHING,
};
-static void drop_prog_refcnt(struct bpf_async_cb *async)
-{
- struct bpf_prog *prog = async->prog;
-
- if (prog) {
- bpf_prog_put(prog);
- async->prog = NULL;
- rcu_assign_pointer(async->callback_fn, NULL);
- }
-}
-
-BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
+BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, async)
{
struct bpf_hrtimer *t, *cur_t;
bool inc = false;
int ret = 0;
- if (in_nmi())
+ if (defer_timer_wq_op())
return -EOPNOTSUPP;
- rcu_read_lock();
- __bpf_spin_lock_irqsave(&timer->lock);
- t = timer->timer;
- if (!t) {
- ret = -EINVAL;
- goto out;
- }
+
+ t = READ_ONCE(async->timer);
+ if (!t)
+ return -EINVAL;
cur_t = this_cpu_read(hrtimer_running);
if (cur_t == t) {
@@ -1487,8 +1566,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
* its own timer the hrtimer_cancel() will deadlock
* since it waits for callback_fn to finish.
*/
- ret = -EDEADLK;
- goto out;
+ return -EDEADLK;
}
/* Only account in-flight cancellations when invoked from a timer
@@ -1511,20 +1589,17 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
* cancelling and waiting for it synchronously, since it might
* do the same. Bail!
*/
- ret = -EDEADLK;
- goto out;
+ atomic_dec(&t->cancelling);
+ return -EDEADLK;
}
drop:
- drop_prog_refcnt(&t->cb);
-out:
- __bpf_spin_unlock_irqrestore(&timer->lock);
+ bpf_async_update_prog_callback(&t->cb, NULL, NULL);
/* Cancel the timer and wait for associated callback to finish
* if it was running.
*/
- ret = ret ?: hrtimer_cancel(&t->timer);
+ ret = hrtimer_cancel(&t->timer);
if (inc)
atomic_dec(&t->cancelling);
- rcu_read_unlock();
return ret;
}
@@ -1535,107 +1610,107 @@ static const struct bpf_func_proto bpf_timer_cancel_proto = {
.arg1_type = ARG_PTR_TO_TIMER,
};
-static struct bpf_async_cb *__bpf_async_cancel_and_free(struct bpf_async_kern *async)
+static void bpf_async_process_op(struct bpf_async_cb *cb, u32 op,
+ u64 timer_nsec, u32 timer_mode)
{
- struct bpf_async_cb *cb;
+ switch (cb->type) {
+ case BPF_ASYNC_TYPE_TIMER: {
+ struct bpf_hrtimer *t = container_of(cb, struct bpf_hrtimer, cb);
- /* Performance optimization: read async->cb without lock first. */
- if (!READ_ONCE(async->cb))
- return NULL;
+ switch (op) {
+ case BPF_ASYNC_START:
+ hrtimer_start(&t->timer, ns_to_ktime(timer_nsec), timer_mode);
+ break;
+ case BPF_ASYNC_CANCEL:
+ hrtimer_try_to_cancel(&t->timer);
+ break;
+ }
+ break;
+ }
+ case BPF_ASYNC_TYPE_WQ: {
+ struct bpf_work *w = container_of(cb, struct bpf_work, cb);
+
+ switch (op) {
+ case BPF_ASYNC_START:
+ schedule_work(&w->work);
+ break;
+ case BPF_ASYNC_CANCEL:
+ cancel_work(&w->work);
+ break;
+ }
+ break;
+ }
+ }
+ bpf_async_refcount_put(cb);
+}
- __bpf_spin_lock_irqsave(&async->lock);
- /* re-read it under lock */
- cb = async->cb;
- if (!cb)
- goto out;
- drop_prog_refcnt(cb);
- /* The subsequent bpf_timer_start/cancel() helpers won't be able to use
- * this timer, since it won't be initialized.
- */
- WRITE_ONCE(async->cb, NULL);
-out:
- __bpf_spin_unlock_irqrestore(&async->lock);
- return cb;
+static void bpf_async_irq_worker(struct irq_work *work)
+{
+ struct bpf_async_cb *cb = container_of(work, struct bpf_async_cb, worker);
+ struct llist_node *pos, *n, *list;
+
+ list = llist_del_all(&cb->async_cmds);
+ if (!list)
+ return;
+
+ list = llist_reverse_order(list);
+ this_cpu_write(async_cb_running, cb);
+ llist_for_each_safe(pos, n, list) {
+ struct bpf_async_cmd *cmd;
+
+ cmd = container_of(pos, struct bpf_async_cmd, node);
+ bpf_async_process_op(cb, cmd->op, cmd->nsec, cmd->mode);
+ kfree_nolock(cmd);
+ }
+ this_cpu_write(async_cb_running, NULL);
}
-/* This function is called by map_delete/update_elem for individual element and
- * by ops->map_release_uref when the user space reference to a map reaches zero.
- */
-void bpf_timer_cancel_and_free(void *val)
+static void bpf_async_cancel_and_free(struct bpf_async_kern *async)
{
- struct bpf_hrtimer *t;
+ struct bpf_async_cb *cb;
- t = (struct bpf_hrtimer *)__bpf_async_cancel_and_free(val);
+ if (!READ_ONCE(async->cb))
+ return;
- if (!t)
+ cb = xchg(&async->cb, NULL);
+ if (!cb)
return;
- /* We check that bpf_map_delete/update_elem() was called from timer
- * callback_fn. In such case we don't call hrtimer_cancel() (since it
- * will deadlock) and don't call hrtimer_try_to_cancel() (since it will
- * just return -1). Though callback_fn is still running on this cpu it's
- * safe to do kfree(t) because bpf_timer_cb() read everything it needed
- * from 't'. The bpf subprog callback_fn won't be able to access 't',
- * since async->cb = NULL was already done. The timer will be
- * effectively cancelled because bpf_timer_cb() will return
- * HRTIMER_NORESTART.
- *
- * However, it is possible the timer callback_fn calling us armed the
- * timer _before_ calling us, such that failing to cancel it here will
- * cause it to possibly use struct hrtimer after freeing bpf_hrtimer.
- * Therefore, we _need_ to cancel any outstanding timers before we do
- * call_rcu, even though no more timers can be armed.
- *
- * Moreover, we need to schedule work even if timer does not belong to
- * the calling callback_fn, as on two different CPUs, we can end up in a
- * situation where both sides run in parallel, try to cancel one
- * another, and we end up waiting on both sides in hrtimer_cancel
- * without making forward progress, since timer1 depends on time2
- * callback to finish, and vice versa.
- *
- * CPU 1 (timer1_cb) CPU 2 (timer2_cb)
- * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1)
- *
- * To avoid these issues, punt to workqueue context when we are in a
- * timer callback.
+
+ bpf_async_update_prog_callback(cb, NULL, NULL);
+ /*
+ * No refcount_inc_not_zero(&cb->refcnt) here. Dropping the last
+ * refcnt. Either synchronously or asynchronously in irq_work.
*/
- if (this_cpu_read(hrtimer_running)) {
- queue_work(system_dfl_wq, &t->cb.delete_work);
- return;
- }
- if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
- /* If the timer is running on other CPU, also use a kworker to
- * wait for the completion of the timer instead of trying to
- * acquire a sleepable lock in hrtimer_cancel() to wait for its
- * completion.
- */
- if (hrtimer_try_to_cancel(&t->timer) >= 0)
- call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free);
- else
- queue_work(system_dfl_wq, &t->cb.delete_work);
+ if (!defer_timer_wq_op()) {
+ bpf_async_process_op(cb, BPF_ASYNC_CANCEL, 0, 0);
} else {
- bpf_timer_delete_work(&t->cb.delete_work);
+ (void)bpf_async_schedule_op(cb, BPF_ASYNC_CANCEL, 0, 0);
+ /*
+ * bpf_async_schedule_op() either enqueues allocated cmd into llist
+ * or fails with ENOMEM and drop the last refcnt.
+ * This is unlikely, but safe, since bpf_async_cb_rcu_tasks_trace_free()
+ * callback will do additional timer/wq_cancel due to races anyway.
+ */
}
}
-/* This function is called by map_delete/update_elem for individual element and
+/*
+ * This function is called by map_delete/update_elem for individual element and
* by ops->map_release_uref when the user space reference to a map reaches zero.
*/
-void bpf_wq_cancel_and_free(void *val)
+void bpf_timer_cancel_and_free(void *val)
{
- struct bpf_work *work;
-
- BTF_TYPE_EMIT(struct bpf_wq);
+ bpf_async_cancel_and_free(val);
+}
- work = (struct bpf_work *)__bpf_async_cancel_and_free(val);
- if (!work)
- return;
- /* Trigger cancel of the sleepable work, but *do not* wait for
- * it to finish if it was running as we might not be in a
- * sleepable context.
- * kfree will be called once the work has finished.
- */
- schedule_work(&work->delete_work);
+/*
+ * This function is called by map_delete/update_elem for individual element and
+ * by ops->map_release_uref when the user space reference to a map reaches zero.
+ */
+void bpf_wq_cancel_and_free(void *val)
+{
+ bpf_async_cancel_and_free(val);
}
BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr)
@@ -2092,12 +2167,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
case BPF_FUNC_task_storage_get:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_get_recur_proto;
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_delete_recur_proto;
return &bpf_task_storage_delete_proto;
default:
break;
@@ -2709,14 +2780,14 @@ __bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid)
* bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
* @p: The dynptr whose data slice to retrieve
* @offset: Offset into the dynptr
- * @buffer__opt: User-provided buffer to copy contents into. May be NULL
+ * @buffer__nullable: User-provided buffer to copy contents into. May be NULL
* @buffer__szk: Size (in bytes) of the buffer if present. This is the
* length of the requested slice. This must be a constant.
*
* For non-skb and non-xdp type dynptrs, there is no difference between
* bpf_dynptr_slice and bpf_dynptr_data.
*
- * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
+ * If buffer__nullable is NULL, the call will fail if buffer_opt was needed.
*
* If the intention is to write to the data slice, please use
* bpf_dynptr_slice_rdwr.
@@ -2734,7 +2805,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid)
* direct pointer)
*/
__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset,
- void *buffer__opt, u64 buffer__szk)
+ void *buffer__nullable, u64 buffer__szk)
{
const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
enum bpf_dynptr_type type;
@@ -2755,8 +2826,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset,
case BPF_DYNPTR_TYPE_RINGBUF:
return ptr->data + ptr->offset + offset;
case BPF_DYNPTR_TYPE_SKB:
- if (buffer__opt)
- return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
+ if (buffer__nullable)
+ return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__nullable);
else
return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len);
case BPF_DYNPTR_TYPE_XDP:
@@ -2765,16 +2836,16 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset,
if (!IS_ERR_OR_NULL(xdp_ptr))
return xdp_ptr;
- if (!buffer__opt)
+ if (!buffer__nullable)
return NULL;
- bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
- return buffer__opt;
+ bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__nullable, len, false);
+ return buffer__nullable;
}
case BPF_DYNPTR_TYPE_SKB_META:
return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset);
case BPF_DYNPTR_TYPE_FILE:
- err = bpf_file_fetch_bytes(ptr->data, offset, buffer__opt, buffer__szk);
- return err ? NULL : buffer__opt;
+ err = bpf_file_fetch_bytes(ptr->data, offset, buffer__nullable, buffer__szk);
+ return err ? NULL : buffer__nullable;
default:
WARN_ONCE(true, "unknown dynptr type %d\n", type);
return NULL;
@@ -2785,14 +2856,14 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset,
* bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
* @p: The dynptr whose data slice to retrieve
* @offset: Offset into the dynptr
- * @buffer__opt: User-provided buffer to copy contents into. May be NULL
+ * @buffer__nullable: User-provided buffer to copy contents into. May be NULL
* @buffer__szk: Size (in bytes) of the buffer if present. This is the
* length of the requested slice. This must be a constant.
*
* For non-skb and non-xdp type dynptrs, there is no difference between
* bpf_dynptr_slice and bpf_dynptr_data.
*
- * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
+ * If buffer__nullable is NULL, the call will fail if buffer_opt was needed.
*
* The returned pointer is writable and may point to either directly the dynptr
* data at the requested offset or to the buffer if unable to obtain a direct
@@ -2824,7 +2895,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset,
* direct pointer)
*/
__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
- void *buffer__opt, u64 buffer__szk)
+ void *buffer__nullable, u64 buffer__szk)
{
const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
@@ -2853,7 +2924,7 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
* will be copied out into the buffer and the user will need to call
* bpf_dynptr_write() to commit changes.
*/
- return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk);
+ return bpf_dynptr_slice(p, offset, buffer__nullable, buffer__szk);
}
__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end)
@@ -3108,30 +3179,36 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
struct bpf_work *w;
- if (in_nmi())
- return -EOPNOTSUPP;
if (flags)
return -EINVAL;
+
w = READ_ONCE(async->work);
if (!w || !READ_ONCE(w->cb.prog))
return -EINVAL;
- schedule_work(&w->work);
- return 0;
+ if (!refcount_inc_not_zero(&w->cb.refcnt))
+ return -ENOENT;
+
+ if (!defer_timer_wq_op()) {
+ schedule_work(&w->work);
+ bpf_async_refcount_put(&w->cb);
+ return 0;
+ } else {
+ return bpf_async_schedule_op(&w->cb, BPF_ASYNC_START, 0, 0);
+ }
}
-__bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, void *value),
- unsigned int flags,
- void *aux__prog)
+__bpf_kfunc int bpf_wq_set_callback(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, void *value),
+ unsigned int flags,
+ struct bpf_prog_aux *aux)
{
- struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__prog;
struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
if (flags)
return -EINVAL;
- return __bpf_async_set_callback(async, callback_fn, aux, flags, BPF_ASYNC_TYPE_WQ);
+ return __bpf_async_set_callback(async, callback_fn, aux->prog);
}
__bpf_kfunc void bpf_preempt_disable(void)
@@ -3406,7 +3483,7 @@ __bpf_kfunc void __bpf_trap(void)
* __get_kernel_nofault instead of plain dereference to make them safe.
*/
-static int __bpf_strcasecmp(const char *s1, const char *s2, bool ignore_case)
+static int __bpf_strncasecmp(const char *s1, const char *s2, bool ignore_case, size_t len)
{
char c1, c2;
int i;
@@ -3417,7 +3494,7 @@ static int __bpf_strcasecmp(const char *s1, const char *s2, bool ignore_case)
}
guard(pagefault)();
- for (i = 0; i < XATTR_SIZE_MAX; i++) {
+ for (i = 0; i < len && i < XATTR_SIZE_MAX; i++) {
__get_kernel_nofault(&c1, s1, char, err_out);
__get_kernel_nofault(&c2, s2, char, err_out);
if (ignore_case) {
@@ -3431,7 +3508,7 @@ static int __bpf_strcasecmp(const char *s1, const char *s2, bool ignore_case)
s1++;
s2++;
}
- return -E2BIG;
+ return i == XATTR_SIZE_MAX ? -E2BIG : 0;
err_out:
return -EFAULT;
}
@@ -3451,7 +3528,7 @@ err_out:
*/
__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign)
{
- return __bpf_strcasecmp(s1__ign, s2__ign, false);
+ return __bpf_strncasecmp(s1__ign, s2__ign, false, XATTR_SIZE_MAX);
}
/**
@@ -3469,7 +3546,26 @@ __bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign)
*/
__bpf_kfunc int bpf_strcasecmp(const char *s1__ign, const char *s2__ign)
{
- return __bpf_strcasecmp(s1__ign, s2__ign, true);
+ return __bpf_strncasecmp(s1__ign, s2__ign, true, XATTR_SIZE_MAX);
+}
+
+/*
+ * bpf_strncasecmp - Compare two length-limited strings, ignoring case
+ * @s1__ign: One string
+ * @s2__ign: Another string
+ * @len: The maximum number of characters to compare
+ *
+ * Return:
+ * * %0 - Strings are equal
+ * * %-1 - @s1__ign is smaller
+ * * %1 - @s2__ign is smaller
+ * * %-EFAULT - Cannot read one of the strings
+ * * %-E2BIG - One of strings is too large
+ * * %-ERANGE - One of strings is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strncasecmp(const char *s1__ign, const char *s2__ign, size_t len)
+{
+ return __bpf_strncasecmp(s1__ign, s2__ign, true, len);
}
/**
@@ -4275,41 +4371,39 @@ release_prog:
}
/**
- * bpf_task_work_schedule_signal_impl - Schedule BPF callback using task_work_add with TWA_SIGNAL
+ * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL
* mode
* @task: Task struct for which callback should be scheduled
* @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
* @map__map: bpf_map that embeds struct bpf_task_work in the values
* @callback: pointer to BPF subprogram to call
- * @aux__prog: user should pass NULL
+ * @aux: pointer to bpf_prog_aux of the caller BPF program, implicitly set by the verifier
*
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
*/
-__bpf_kfunc int bpf_task_work_schedule_signal_impl(struct task_struct *task,
- struct bpf_task_work *tw, void *map__map,
- bpf_task_work_callback_t callback,
- void *aux__prog)
+__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw,
+ void *map__map, bpf_task_work_callback_t callback,
+ struct bpf_prog_aux *aux)
{
- return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_SIGNAL);
+ return bpf_task_work_schedule(task, tw, map__map, callback, aux, TWA_SIGNAL);
}
/**
- * bpf_task_work_schedule_resume_impl - Schedule BPF callback using task_work_add with TWA_RESUME
+ * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME
* mode
* @task: Task struct for which callback should be scheduled
* @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
* @map__map: bpf_map that embeds struct bpf_task_work in the values
* @callback: pointer to BPF subprogram to call
- * @aux__prog: user should pass NULL
+ * @aux: pointer to bpf_prog_aux of the caller BPF program, implicitly set by the verifier
*
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
*/
-__bpf_kfunc int bpf_task_work_schedule_resume_impl(struct task_struct *task,
- struct bpf_task_work *tw, void *map__map,
- bpf_task_work_callback_t callback,
- void *aux__prog)
+__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw,
+ void *map__map, bpf_task_work_callback_t callback,
+ struct bpf_prog_aux *aux)
{
- return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME);
+ return bpf_task_work_schedule(task, tw, map__map, callback, aux, TWA_RESUME);
}
static int make_file_dynptr(struct file *file, u32 flags, bool may_sleep,
@@ -4360,6 +4454,53 @@ __bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr)
return 0;
}
+/**
+ * bpf_timer_cancel_async - try to deactivate a timer
+ * @timer: bpf_timer to stop
+ *
+ * Returns:
+ *
+ * * 0 when the timer was not active
+ * * 1 when the timer was active
+ * * -1 when the timer is currently executing the callback function and
+ * cannot be stopped
+ * * -ECANCELED when the timer will be cancelled asynchronously
+ * * -ENOMEM when out of memory
+ * * -EINVAL when the timer was not initialized
+ * * -ENOENT when this kfunc is racing with timer deletion
+ */
+__bpf_kfunc int bpf_timer_cancel_async(struct bpf_timer *timer)
+{
+ struct bpf_async_kern *async = (void *)timer;
+ struct bpf_async_cb *cb;
+ int ret;
+
+ cb = READ_ONCE(async->cb);
+ if (!cb)
+ return -EINVAL;
+
+ /*
+ * Unlike hrtimer_start() it's ok to synchronously call
+ * hrtimer_try_to_cancel() when refcnt reached zero, but deferring to
+ * irq_work is not, since irq callback may execute after RCU GP and
+ * cb could be freed at that time. Check for refcnt zero for
+ * consistency.
+ */
+ if (!refcount_inc_not_zero(&cb->refcnt))
+ return -ENOENT;
+
+ if (!defer_timer_wq_op()) {
+ struct bpf_hrtimer *t = container_of(cb, struct bpf_hrtimer, cb);
+
+ ret = hrtimer_try_to_cancel(&t->timer);
+ bpf_async_refcount_put(cb);
+ return ret;
+ } else {
+ ret = bpf_async_schedule_op(cb, BPF_ASYNC_CANCEL, 0, 0);
+ return ret ? ret : -ECANCELED;
+ }
+}
+
__bpf_kfunc_end_defs();
static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work)
@@ -4427,7 +4568,7 @@ BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_from_vpid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_throw)
#ifdef CONFIG_BPF_EVENTS
-BTF_ID_FLAGS(func, bpf_send_signal_task, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_send_signal_task)
#endif
#ifdef CONFIG_KEYS
BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
@@ -4467,14 +4608,14 @@ BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU)
BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY)
#ifdef CONFIG_CGROUPS
-BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY)
-BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
+BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
#endif
-BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
+BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_dynptr_adjust)
@@ -4488,7 +4629,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_memset)
BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
#endif
BTF_ID_FLAGS(func, bpf_wq_init)
-BTF_ID_FLAGS(func, bpf_wq_set_callback_impl)
+BTF_ID_FLAGS(func, bpf_wq_set_callback, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_wq_start)
BTF_ID_FLAGS(func, bpf_preempt_disable)
BTF_ID_FLAGS(func, bpf_preempt_enable)
@@ -4510,8 +4651,8 @@ BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr)
BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr)
BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE)
#endif
#ifdef CONFIG_DMA_SHARED_BUFFER
BTF_ID_FLAGS(func, bpf_iter_dmabuf_new, KF_ITER_NEW | KF_SLEEPABLE)
@@ -4521,6 +4662,7 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
BTF_ID_FLAGS(func, __bpf_trap)
BTF_ID_FLAGS(func, bpf_strcmp);
BTF_ID_FLAGS(func, bpf_strcasecmp);
+BTF_ID_FLAGS(func, bpf_strncasecmp);
BTF_ID_FLAGS(func, bpf_strchr);
BTF_ID_FLAGS(func, bpf_strchrnul);
BTF_ID_FLAGS(func, bpf_strnchr);
@@ -4536,11 +4678,13 @@ BTF_ID_FLAGS(func, bpf_strncasestr);
#if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS)
BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU)
#endif
-BTF_ID_FLAGS(func, bpf_stream_vprintk_impl, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_task_work_schedule_signal_impl, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_task_work_schedule_resume_impl, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_dynptr_from_file, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_stream_print_stack, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_dynptr_from_file)
BTF_ID_FLAGS(func, bpf_dynptr_file_discard)
+BTF_ID_FLAGS(func, bpf_timer_cancel_async)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 9f866a010dad..005ea3a2cda7 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -600,10 +600,17 @@ struct bpffs_btf_enums {
static int find_bpffs_btf_enums(struct bpffs_btf_enums *info)
{
+ struct {
+ const struct btf_type **type;
+ const char *name;
+ } btf_enums[] = {
+ {&info->cmd_t, "bpf_cmd"},
+ {&info->map_t, "bpf_map_type"},
+ {&info->prog_t, "bpf_prog_type"},
+ {&info->attach_t, "bpf_attach_type"},
+ };
const struct btf *btf;
- const struct btf_type *t;
- const char *name;
- int i, n;
+ int i, id;
memset(info, 0, sizeof(*info));
@@ -615,31 +622,16 @@ static int find_bpffs_btf_enums(struct bpffs_btf_enums *info)
info->btf = btf;
- for (i = 1, n = btf_nr_types(btf); i < n; i++) {
- t = btf_type_by_id(btf, i);
- if (!btf_type_is_enum(t))
- continue;
-
- name = btf_name_by_offset(btf, t->name_off);
- if (!name)
- continue;
-
- if (strcmp(name, "bpf_cmd") == 0)
- info->cmd_t = t;
- else if (strcmp(name, "bpf_map_type") == 0)
- info->map_t = t;
- else if (strcmp(name, "bpf_prog_type") == 0)
- info->prog_t = t;
- else if (strcmp(name, "bpf_attach_type") == 0)
- info->attach_t = t;
- else
- continue;
+ for (i = 0; i < ARRAY_SIZE(btf_enums); i++) {
+ id = btf_find_by_name_kind(btf, btf_enums[i].name,
+ BTF_KIND_ENUM);
+ if (id < 0)
+ return -ESRCH;
- if (info->cmd_t && info->map_t && info->prog_t && info->attach_t)
- return 0;
+ *btf_enums[i].type = btf_type_by_id(btf, id);
}
- return -ESRCH;
+ return 0;
}
static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t,
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index c93a756e035c..1ccbf28b2ad9 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -180,7 +180,7 @@ static long cgroup_storage_update_elem(struct bpf_map *map, void *key,
}
int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key,
- void *value)
+ void *value, u64 map_flags)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
struct bpf_cgroup_storage *storage;
@@ -198,12 +198,17 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key,
* access 'value_size' of them, so copying rounded areas
* will not leak any kernel data
*/
+ if (map_flags & BPF_F_CPU) {
+ cpu = map_flags >> 32;
+ copy_map_value(_map, value, per_cpu_ptr(storage->percpu_buf, cpu));
+ goto unlock;
+ }
size = round_up(_map->value_size, 8);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(value + off,
- per_cpu_ptr(storage->percpu_buf, cpu), size);
+ copy_map_value_long(_map, value + off, per_cpu_ptr(storage->percpu_buf, cpu));
off += size;
}
+unlock:
rcu_read_unlock();
return 0;
}
@@ -213,10 +218,11 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key,
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
struct bpf_cgroup_storage *storage;
- int cpu, off = 0;
+ void *val;
u32 size;
+ int cpu;
- if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
+ if ((u32)map_flags & ~(BPF_ANY | BPF_EXIST | BPF_F_CPU | BPF_F_ALL_CPUS))
return -EINVAL;
rcu_read_lock();
@@ -232,12 +238,17 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key,
* returned or zeros which were zero-filled by percpu_alloc,
* so no kernel data leaks possible
*/
+ if (map_flags & BPF_F_CPU) {
+ cpu = map_flags >> 32;
+ copy_map_value(_map, per_cpu_ptr(storage->percpu_buf, cpu), value);
+ goto unlock;
+ }
size = round_up(_map->value_size, 8);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
- value + off, size);
- off += size;
+ val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
+ copy_map_value(_map, per_cpu_ptr(storage->percpu_buf, cpu), val);
}
+unlock:
rcu_read_unlock();
return 0;
}
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index 9575314f40a6..261a03ea73d3 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -214,7 +214,7 @@ __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map)
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_map_iter_kfunc_ids)
-BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_map_sum_elem_count)
BTF_KFUNCS_END(bpf_map_iter_kfunc_ids)
static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 42ae8d595c2c..227f9b5f388b 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -1,16 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2017-2018 Netronome Systems, Inc.
- *
- * This software is licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree.
- *
- * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
- * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
- * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
- * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
*/
#include <linux/bpf.h>
diff --git a/kernel/bpf/range_tree.c b/kernel/bpf/range_tree.c
index 99c63d982c5d..2f28886f3ff7 100644
--- a/kernel/bpf/range_tree.c
+++ b/kernel/bpf/range_tree.c
@@ -149,7 +149,8 @@ int range_tree_clear(struct range_tree *rt, u32 start, u32 len)
range_it_insert(rn, rt);
/* Add a range */
- new_rn = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
+ new_rn = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT,
+ NUMA_NO_NODE);
if (!new_rn)
return -ENOMEM;
new_rn->rn_start = last + 1;
@@ -234,7 +235,7 @@ int range_tree_set(struct range_tree *rt, u32 start, u32 len)
right->rn_start = start;
range_it_insert(right, rt);
} else {
- left = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
+ left = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, NUMA_NO_NODE);
if (!left)
return -ENOMEM;
left->rn_start = start;
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index f6a075ffac63..35ae64ade36b 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/err.h>
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index f7d0c8d4644e..2fdfa828e3d3 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -265,10 +265,11 @@ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock)
RES_INIT_TIMEOUT(ts);
/*
- * The fast path is not invoked for the TAS fallback, so we must grab
- * the deadlock detection entry here.
+ * We are either called directly from res_spin_lock after grabbing the
+ * deadlock detection entry when queued spinlocks are disabled, or from
+ * resilient_queued_spin_lock_slowpath after grabbing the deadlock
+ * detection entry. No need to obtain it here.
*/
- grab_held_lock_entry(lock);
/*
* Since the waiting loop's time is dependent on the amount of
diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c
index 0b6bc3f30335..be9ce98e9469 100644
--- a/kernel/bpf/stream.c
+++ b/kernel/bpf/stream.c
@@ -212,14 +212,13 @@ __bpf_kfunc_start_defs();
* Avoid using enum bpf_stream_id so that kfunc users don't have to pull in the
* enum in headers.
*/
-__bpf_kfunc int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args,
- u32 len__sz, void *aux__prog)
+__bpf_kfunc int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args,
+ u32 len__sz, struct bpf_prog_aux *aux)
{
struct bpf_bprintf_data data = {
.get_bin_args = true,
.get_buf = true,
};
- struct bpf_prog_aux *aux = aux__prog;
u32 fmt_size = strlen(fmt__str) + 1;
struct bpf_stream *stream;
u32 data_len = len__sz;
@@ -246,6 +245,25 @@ __bpf_kfunc int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, con
return ret;
}
+/* Directly trigger a stack dump from the program. */
+__bpf_kfunc int bpf_stream_print_stack(int stream_id, struct bpf_prog_aux *aux)
+{
+ struct bpf_stream_stage ss;
+ struct bpf_prog *prog;
+
+ /* Make sure the stream ID is valid. */
+ if (!bpf_stream_get(stream_id, aux))
+ return -ENOENT;
+
+ prog = aux->main_prog_aux->prog;
+
+ bpf_stream_stage(ss, prog, stream_id, ({
+ bpf_stream_dump_stack(ss);
+ }));
+
+ return 0;
+}
+
__bpf_kfunc_end_defs();
/* Added kfunc to common_btf_ids */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4ff82144f885..683c332dbafb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -133,12 +133,14 @@ bool bpf_map_write_active(const struct bpf_map *map)
return atomic64_read(&map->writecnt) != 0;
}
-static u32 bpf_map_value_size(const struct bpf_map *map)
-{
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
- map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags)
+{
+ if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS))
+ return map->value_size;
+ else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
return round_up(map->value_size, 8) * num_possible_cpus();
else if (IS_FD_MAP(map))
return sizeof(u32);
@@ -314,11 +316,11 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
bpf_disable_instrumentation();
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
- err = bpf_percpu_hash_copy(map, key, value);
+ err = bpf_percpu_hash_copy(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
- err = bpf_percpu_array_copy(map, key, value);
+ err = bpf_percpu_array_copy(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
- err = bpf_percpu_cgroup_storage_copy(map, key, value);
+ err = bpf_percpu_cgroup_storage_copy(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
err = bpf_stackmap_extract(map, key, value, false);
} else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
@@ -505,17 +507,29 @@ static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map)
return root_mem_cgroup;
}
+void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg)
+{
+ *new_memcg = bpf_map_get_memcg(map);
+ *old_memcg = set_active_memcg(*new_memcg);
+}
+
+void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *new_memcg)
+{
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(new_memcg);
+}
+
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node)
{
struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -526,11 +540,9 @@ void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags
struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -540,11 +552,9 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = kzalloc(size, flags | __GFP_ACCOUNT);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -555,11 +565,9 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -570,11 +578,9 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
struct mem_cgroup *memcg, *old_memcg;
void __percpu *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -612,12 +618,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
unsigned long i, j;
struct page *pg;
int ret = 0;
-#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg, *old_memcg;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
-#endif
for (i = 0; i < nr_pages; i++) {
pg = __bpf_alloc_page(nid);
@@ -631,10 +632,6 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
break;
}
-#ifdef CONFIG_MEMCG
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
-#endif
return ret;
}
@@ -1366,11 +1363,6 @@ free_map_tab:
return ret;
}
-static bool bpf_net_capable(void)
-{
- return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
-}
-
#define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
/* called via syscall */
static int map_create(union bpf_attr *attr, bpfptr_t uattr)
@@ -1734,7 +1726,7 @@ static int map_lookup_elem(union bpf_attr *attr)
if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
return -EPERM;
- err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK);
+ err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK | BPF_F_CPU);
if (err)
return err;
@@ -1742,7 +1734,7 @@ static int map_lookup_elem(union bpf_attr *attr)
if (IS_ERR(key))
return PTR_ERR(key);
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->flags);
err = -ENOMEM;
value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
@@ -1809,7 +1801,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
goto err_put;
}
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->flags);
value = kvmemdup_bpfptr(uvalue, value_size);
if (IS_ERR(value)) {
err = PTR_ERR(value);
@@ -2005,11 +1997,12 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
void *key, *value;
int err = 0;
- err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK);
+ err = bpf_map_check_op_flags(map, attr->batch.elem_flags,
+ BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS);
if (err)
return err;
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->batch.elem_flags);
max_count = attr->batch.count;
if (!max_count)
@@ -2064,11 +2057,11 @@ int generic_map_lookup_batch(struct bpf_map *map,
u32 value_size, cp, max_count;
int err;
- err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK);
+ err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK | BPF_F_CPU);
if (err)
return err;
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->batch.elem_flags);
max_count = attr->batch.count;
if (!max_count)
@@ -2190,7 +2183,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
goto err_put;
}
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, 0);
err = -ENOMEM;
value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
@@ -2820,6 +2813,13 @@ static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr
void *sig;
int err = 0;
+ /*
+ * Don't attempt to use kmalloc_large or vmalloc for signatures.
+ * Practical signature for BPF program should be below this limit.
+ */
+ if (attr->signature_size > KMALLOC_MAX_CACHE_SIZE)
+ return -EINVAL;
+
if (system_keyring_id_check(attr->keyring_id) == 0)
key = bpf_lookup_system_key(attr->keyring_id);
else
@@ -3579,6 +3579,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
case BPF_PROG_TYPE_TRACING:
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT &&
+ prog->expected_attach_type != BPF_TRACE_FSESSION &&
prog->expected_attach_type != BPF_MODIFY_RETURN) {
err = -EINVAL;
goto out_put_prog;
@@ -3628,7 +3629,21 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
}
- link = kzalloc(sizeof(*link), GFP_USER);
+ if (prog->expected_attach_type == BPF_TRACE_FSESSION) {
+ struct bpf_fsession_link *fslink;
+
+ fslink = kzalloc(sizeof(*fslink), GFP_USER);
+ if (fslink) {
+ bpf_link_init(&fslink->fexit.link, BPF_LINK_TYPE_TRACING,
+ &bpf_tracing_link_lops, prog, attach_type);
+ fslink->fexit.cookie = bpf_cookie;
+ link = &fslink->link;
+ } else {
+ link = NULL;
+ }
+ } else {
+ link = kzalloc(sizeof(*link), GFP_USER);
+ }
if (!link) {
err = -ENOMEM;
goto out_put_prog;
@@ -4352,6 +4367,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_TRACE_RAW_TP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
case BPF_MODIFY_RETURN:
return BPF_PROG_TYPE_TRACING;
case BPF_LSM_MAC:
@@ -4565,6 +4581,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
return PTR_ERR(prog);
+ } else if (!bpf_mprog_detach_empty(ptype)) {
+ return -EPERM;
}
} else if (is_cgroup_prog_type(ptype, 0, false)) {
if (attr->attach_flags || attr->relative_fd)
@@ -5310,6 +5328,9 @@ static int bpf_map_get_info_by_fd(struct file *file,
if (info.hash_size != SHA256_DIGEST_SIZE)
return -EINVAL;
+ if (!READ_ONCE(map->frozen))
+ return -EPERM;
+
err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha);
if (err != 0)
return err;
@@ -6122,6 +6143,49 @@ static int prog_stream_read(union bpf_attr *attr)
return ret;
}
+#define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.prog_fd
+
+static int prog_assoc_struct_ops(union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ int ret;
+
+ if (CHECK_ATTR(BPF_PROG_ASSOC_STRUCT_OPS))
+ return -EINVAL;
+
+ if (attr->prog_assoc_struct_ops.flags)
+ return -EINVAL;
+
+ prog = bpf_prog_get(attr->prog_assoc_struct_ops.prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+ ret = -EINVAL;
+ goto put_prog;
+ }
+
+ map = bpf_map_get(attr->prog_assoc_struct_ops.map_fd);
+ if (IS_ERR(map)) {
+ ret = PTR_ERR(map);
+ goto put_prog;
+ }
+
+ if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS) {
+ ret = -EINVAL;
+ goto put_map;
+ }
+
+ ret = bpf_prog_assoc_struct_ops(prog, map);
+
+put_map:
+ bpf_map_put(map);
+put_prog:
+ bpf_prog_put(prog);
+ return ret;
+}
+
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
@@ -6261,6 +6325,9 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
case BPF_PROG_STREAM_READ_BY_FD:
err = prog_stream_read(&attr);
break;
+ case BPF_PROG_ASSOC_STRUCT_OPS:
+ err = prog_assoc_struct_ops(&attr);
+ break;
default:
err = -EINVAL;
break;
@@ -6407,7 +6474,7 @@ static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
.func = bpf_kallsyms_lookup_name,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM,
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index f8e70e9c3998..26fbfbb01700 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -8,6 +8,7 @@
*/
#include <linux/kernel.h>
#include <linux/tnum.h>
+#include <linux/swab.h>
#define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m}
/* A completely unknown value */
@@ -253,3 +254,18 @@ struct tnum tnum_const_subreg(struct tnum a, u32 value)
{
return tnum_with_subreg(a, tnum_const(value));
}
+
+struct tnum tnum_bswap16(struct tnum a)
+{
+ return TNUM(swab16(a.value & 0xFFFF), swab16(a.mask & 0xFFFF));
+}
+
+struct tnum tnum_bswap32(struct tnum a)
+{
+ return TNUM(swab32(a.value & 0xFFFFFFFF), swab32(a.mask & 0xFFFFFFFF));
+}
+
+struct tnum tnum_bswap64(struct tnum a)
+{
+ return TNUM(swab64(a.value), swab64(a.mask));
+}
diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c
index feecd8f4dbf9..7e4aa1e44b50 100644
--- a/kernel/bpf/token.c
+++ b/kernel/bpf/token.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/vmalloc.h>
#include <linux/file.h>
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 976d89011b15..952cd7932461 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -24,19 +24,49 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
#define TRAMPOLINE_HASH_BITS 10
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
-static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
+static struct hlist_head trampoline_key_table[TRAMPOLINE_TABLE_SIZE];
+static struct hlist_head trampoline_ip_table[TRAMPOLINE_TABLE_SIZE];
-/* serializes access to trampoline_table */
+/* serializes access to trampoline tables */
static DEFINE_MUTEX(trampoline_mutex);
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
-static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
+#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
+static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
{
- struct bpf_trampoline *tr = ops->private;
+ struct hlist_head *head_ip;
+ struct bpf_trampoline *tr;
+
+ mutex_lock(&trampoline_mutex);
+ head_ip = &trampoline_ip_table[hash_64(ip, TRAMPOLINE_HASH_BITS)];
+ hlist_for_each_entry(tr, head_ip, hlist_ip) {
+ if (tr->ip == ip)
+ goto out;
+ }
+ tr = NULL;
+out:
+ mutex_unlock(&trampoline_mutex);
+ return tr;
+}
+#else
+static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
+{
+ return ops->private;
+}
+#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
+
+static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
+ enum ftrace_ops_cmd cmd)
+{
+ struct bpf_trampoline *tr;
int ret = 0;
+ tr = direct_ops_ip_lookup(ops, ip);
+ if (!tr)
+ return -EINVAL;
+
if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
/* This is called inside register_ftrace_direct_multi(), so
* tr->mutex is already locked.
@@ -109,10 +139,17 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
enum bpf_attach_type eatype = prog->expected_attach_type;
enum bpf_prog_type ptype = prog->type;
- return (ptype == BPF_PROG_TYPE_TRACING &&
- (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
- eatype == BPF_MODIFY_RETURN)) ||
- (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
+ switch (ptype) {
+ case BPF_PROG_TYPE_TRACING:
+ if (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
+ eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION)
+ return true;
+ return false;
+ case BPF_PROG_TYPE_LSM:
+ return eatype == BPF_LSM_MAC;
+ default:
+ return false;
+ }
}
void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym)
@@ -135,15 +172,171 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)
PAGE_SIZE, true, ksym->name);
}
-static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
+/*
+ * We have only single direct_ops which contains all the direct call
+ * sites and is the only global ftrace_ops for all trampolines.
+ *
+ * We use 'update_ftrace_direct_*' api for attachment.
+ */
+struct ftrace_ops direct_ops = {
+ .ops_func = bpf_tramp_ftrace_ops_func,
+};
+
+static int direct_ops_alloc(struct bpf_trampoline *tr)
+{
+ tr->fops = &direct_ops;
+ return 0;
+}
+
+static void direct_ops_free(struct bpf_trampoline *tr) { }
+
+static struct ftrace_hash *hash_from_ip(struct bpf_trampoline *tr, void *ptr)
+{
+ unsigned long ip, addr = (unsigned long) ptr;
+ struct ftrace_hash *hash;
+
+ ip = ftrace_location(tr->ip);
+ if (!ip)
+ return NULL;
+ hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+ if (!hash)
+ return NULL;
+ if (bpf_trampoline_use_jmp(tr->flags))
+ addr = ftrace_jmp_set(addr);
+ if (!add_ftrace_hash_entry_direct(hash, ip, addr)) {
+ free_ftrace_hash(hash);
+ return NULL;
+ }
+ return hash;
+}
+
+static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
+{
+ struct ftrace_hash *hash = hash_from_ip(tr, addr);
+ int err;
+
+ if (!hash)
+ return -ENOMEM;
+ err = update_ftrace_direct_add(tr->fops, hash);
+ free_ftrace_hash(hash);
+ return err;
+}
+
+static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
+{
+ struct ftrace_hash *hash = hash_from_ip(tr, addr);
+ int err;
+
+ if (!hash)
+ return -ENOMEM;
+ err = update_ftrace_direct_del(tr->fops, hash);
+ free_ftrace_hash(hash);
+ return err;
+}
+
+static int direct_ops_mod(struct bpf_trampoline *tr, void *addr, bool lock_direct_mutex)
+{
+ struct ftrace_hash *hash = hash_from_ip(tr, addr);
+ int err;
+
+ if (!hash)
+ return -ENOMEM;
+ err = update_ftrace_direct_mod(tr->fops, hash, lock_direct_mutex);
+ free_ftrace_hash(hash);
+ return err;
+}
+#else
+/*
+ * We allocate ftrace_ops object for each trampoline and it contains
+ * call site specific for that trampoline.
+ *
+ * We use *_ftrace_direct api for attachment.
+ */
+static int direct_ops_alloc(struct bpf_trampoline *tr)
+{
+ tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
+ if (!tr->fops)
+ return -ENOMEM;
+ tr->fops->private = tr;
+ tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
+ return 0;
+}
+
+static void direct_ops_free(struct bpf_trampoline *tr)
+{
+ if (!tr->fops)
+ return;
+ ftrace_free_filter(tr->fops);
+ kfree(tr->fops);
+}
+
+static int direct_ops_add(struct bpf_trampoline *tr, void *ptr)
+{
+ unsigned long addr = (unsigned long) ptr;
+ struct ftrace_ops *ops = tr->fops;
+ int ret;
+
+ if (bpf_trampoline_use_jmp(tr->flags))
+ addr = ftrace_jmp_set(addr);
+
+ ret = ftrace_set_filter_ip(ops, tr->ip, 0, 1);
+ if (ret)
+ return ret;
+ return register_ftrace_direct(ops, addr);
+}
+
+static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
+{
+ return unregister_ftrace_direct(tr->fops, (long)addr, false);
+}
+
+static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
+{
+ unsigned long addr = (unsigned long) ptr;
+ struct ftrace_ops *ops = tr->fops;
+
+ if (bpf_trampoline_use_jmp(tr->flags))
+ addr = ftrace_jmp_set(addr);
+ if (lock_direct_mutex)
+ return modify_ftrace_direct(ops, addr);
+ return modify_ftrace_direct_nolock(ops, addr);
+}
+#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
+#else
+static void direct_ops_free(struct bpf_trampoline *tr) { }
+
+static int direct_ops_alloc(struct bpf_trampoline *tr)
+{
+ return 0;
+}
+
+static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
+{
+ return -ENODEV;
+}
+
+static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
+{
+ return -ENODEV;
+}
+
+static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
+static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
{
struct bpf_trampoline *tr;
struct hlist_head *head;
int i;
mutex_lock(&trampoline_mutex);
- head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
- hlist_for_each_entry(tr, head, hlist) {
+ head = &trampoline_key_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
+ hlist_for_each_entry(tr, head, hlist_key) {
if (tr->key == key) {
refcount_inc(&tr->refcnt);
goto out;
@@ -152,20 +345,19 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
tr = kzalloc(sizeof(*tr), GFP_KERNEL);
if (!tr)
goto out;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
- tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
- if (!tr->fops) {
+ if (direct_ops_alloc(tr)) {
kfree(tr);
tr = NULL;
goto out;
}
- tr->fops->private = tr;
- tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
-#endif
tr->key = key;
- INIT_HLIST_NODE(&tr->hlist);
- hlist_add_head(&tr->hlist, head);
+ tr->ip = ftrace_location(ip);
+ INIT_HLIST_NODE(&tr->hlist_key);
+ INIT_HLIST_NODE(&tr->hlist_ip);
+ hlist_add_head(&tr->hlist_key, head);
+ head = &trampoline_ip_table[hash_64(tr->ip, TRAMPOLINE_HASH_BITS)];
+ hlist_add_head(&tr->hlist_ip, head);
refcount_set(&tr->refcnt, 1);
mutex_init(&tr->mutex);
for (i = 0; i < BPF_TRAMP_MAX; i++)
@@ -200,7 +392,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
int ret;
if (tr->func.ftrace_managed)
- ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
+ ret = direct_ops_del(tr, old_addr);
else
ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
@@ -214,10 +406,7 @@ static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
int ret;
if (tr->func.ftrace_managed) {
- if (lock_direct_mutex)
- ret = modify_ftrace_direct(tr->fops, (long)new_addr);
- else
- ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr);
+ ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
} else {
ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
new_addr);
@@ -240,10 +429,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
}
if (tr->func.ftrace_managed) {
- ret = ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
- if (ret)
- return ret;
- ret = register_ftrace_direct(tr->fops, (long)new_addr);
+ ret = direct_ops_add(tr, new_addr);
} else {
ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
}
@@ -499,13 +685,6 @@ again:
if (err)
goto out_free;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
- if (bpf_trampoline_use_jmp(tr->flags))
- tr->fops->flags |= FTRACE_OPS_FL_JMP;
- else
- tr->fops->flags &= ~FTRACE_OPS_FL_JMP;
-#endif
-
WARN_ON(tr->cur_image && total == 0);
if (tr->cur_image)
/* progs already running at this address */
@@ -533,15 +712,8 @@ again:
tr->cur_image = im;
out:
/* If any error happens, restore previous flags */
- if (err) {
+ if (err)
tr->flags = orig_flags;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
- if (bpf_trampoline_use_jmp(tr->flags))
- tr->fops->flags |= FTRACE_OPS_FL_JMP;
- else
- tr->fops->flags &= ~FTRACE_OPS_FL_JMP;
-#endif
- }
kfree(tlinks);
return err;
@@ -559,6 +731,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT:
return BPF_TRAMP_FEXIT;
+ case BPF_TRACE_FSESSION:
+ return BPF_TRAMP_FSESSION;
case BPF_LSM_MAC:
if (!prog->aux->attach_func_proto->type)
/* The function returns void, we cannot modify its
@@ -594,8 +768,10 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
+ struct bpf_fsession_link *fslink = NULL;
enum bpf_tramp_prog_type kind;
struct bpf_tramp_link *link_exiting;
+ struct hlist_head *prog_list;
int err = 0;
int cnt = 0, i;
@@ -621,24 +797,43 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
BPF_MOD_JUMP, NULL,
link->link.prog->bpf_func);
}
+ if (kind == BPF_TRAMP_FSESSION) {
+ prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY];
+ cnt++;
+ } else {
+ prog_list = &tr->progs_hlist[kind];
+ }
if (cnt >= BPF_MAX_TRAMP_LINKS)
return -E2BIG;
if (!hlist_unhashed(&link->tramp_hlist))
/* prog already linked */
return -EBUSY;
- hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
+ hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) {
if (link_exiting->link.prog != link->link.prog)
continue;
/* prog already linked */
return -EBUSY;
}
- hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
- tr->progs_cnt[kind]++;
+ hlist_add_head(&link->tramp_hlist, prog_list);
+ if (kind == BPF_TRAMP_FSESSION) {
+ tr->progs_cnt[BPF_TRAMP_FENTRY]++;
+ fslink = container_of(link, struct bpf_fsession_link, link.link);
+ hlist_add_head(&fslink->fexit.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
+ tr->progs_cnt[BPF_TRAMP_FEXIT]++;
+ } else {
+ tr->progs_cnt[kind]++;
+ }
err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
if (err) {
hlist_del_init(&link->tramp_hlist);
- tr->progs_cnt[kind]--;
+ if (kind == BPF_TRAMP_FSESSION) {
+ tr->progs_cnt[BPF_TRAMP_FENTRY]--;
+ hlist_del_init(&fslink->fexit.tramp_hlist);
+ tr->progs_cnt[BPF_TRAMP_FEXIT]--;
+ } else {
+ tr->progs_cnt[kind]--;
+ }
}
return err;
}
@@ -672,6 +867,13 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
guard(mutex)(&tgt_prog->aux->ext_mutex);
tgt_prog->aux->is_extended = false;
return err;
+ } else if (kind == BPF_TRAMP_FSESSION) {
+ struct bpf_fsession_link *fslink =
+ container_of(link, struct bpf_fsession_link, link.link);
+
+ hlist_del_init(&fslink->fexit.tramp_hlist);
+ tr->progs_cnt[BPF_TRAMP_FEXIT]--;
+ kind = BPF_TRAMP_FENTRY;
}
hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
@@ -850,7 +1052,7 @@ void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
prog->aux->attach_btf_id);
bpf_lsm_find_cgroup_shim(prog, &bpf_func);
- tr = bpf_trampoline_lookup(key);
+ tr = bpf_trampoline_lookup(key, 0);
if (WARN_ON_ONCE(!tr))
return;
@@ -870,7 +1072,7 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
{
struct bpf_trampoline *tr;
- tr = bpf_trampoline_lookup(key);
+ tr = bpf_trampoline_lookup(key, tgt_info->tgt_addr);
if (!tr)
return NULL;
@@ -906,11 +1108,9 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
* fexit progs. The fentry-only trampoline will be freed via
* multiple rcu callbacks.
*/
- hlist_del(&tr->hlist);
- if (tr->fops) {
- ftrace_free_filter(tr->fops);
- kfree(tr->fops);
- }
+ hlist_del(&tr->hlist_key);
+ hlist_del(&tr->hlist_ip);
+ direct_ops_free(tr);
kfree(tr);
out:
mutex_unlock(&trampoline_mutex);
@@ -949,7 +1149,7 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
- if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
+ if (unlikely(!bpf_prog_get_recursion_context(prog))) {
bpf_prog_inc_misses_counter(prog);
if (prog->aux->recursion_detected)
prog->aux->recursion_detected(prog);
@@ -993,7 +1193,7 @@ static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
update_prog_stats(prog, start);
- this_cpu_dec(*(prog->active));
+ bpf_prog_put_recursion_context(prog);
rcu_read_unlock_migrate();
}
@@ -1029,7 +1229,7 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
- if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
+ if (unlikely(!bpf_prog_get_recursion_context(prog))) {
bpf_prog_inc_misses_counter(prog);
if (prog->aux->recursion_detected)
prog->aux->recursion_detected(prog);
@@ -1044,7 +1244,7 @@ void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
update_prog_stats(prog, start);
- this_cpu_dec(*(prog->active));
+ bpf_prog_put_recursion_context(prog);
migrate_enable();
rcu_read_unlock_trace();
}
@@ -1179,7 +1379,9 @@ static int __init init_trampolines(void)
int i;
for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
- INIT_HLIST_HEAD(&trampoline_table[i]);
+ INIT_HLIST_HEAD(&trampoline_key_table[i]);
+ for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
+ INIT_HLIST_HEAD(&trampoline_ip_table[i]);
return 0;
}
late_initcall(init_trampolines);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3135643d5695..edf5342b982f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -272,8 +272,13 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
}
+struct bpf_map_desc {
+ struct bpf_map *ptr;
+ int uid;
+};
+
struct bpf_call_arg_meta {
- struct bpf_map *map_ptr;
+ struct bpf_map_desc map;
bool raw_mode;
bool pkt_access;
u8 release_regno;
@@ -283,7 +288,6 @@ struct bpf_call_arg_meta {
u64 msize_max_value;
int ref_obj_id;
int dynptr_id;
- int map_uid;
int func_id;
struct btf *btf;
u32 btf_id;
@@ -294,6 +298,14 @@ struct bpf_call_arg_meta {
s64 const_map_key;
};
+struct bpf_kfunc_meta {
+ struct btf *btf;
+ const struct btf_type *proto;
+ const char *name;
+ const u32 *flags;
+ s32 id;
+};
+
struct bpf_kfunc_call_arg_meta {
/* In parameters */
struct btf *btf;
@@ -343,10 +355,7 @@ struct bpf_kfunc_call_arg_meta {
u8 spi;
u8 frameno;
} iter;
- struct {
- struct bpf_map *ptr;
- int uid;
- } map;
+ struct bpf_map_desc map;
u64 mem_size;
};
@@ -512,7 +521,7 @@ static bool is_async_callback_calling_kfunc(u32 btf_id);
static bool is_callback_calling_kfunc(u32 btf_id);
static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
-static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id);
+static bool is_bpf_wq_set_callback_kfunc(u32 btf_id);
static bool is_task_work_add_kfunc(u32 func_id);
static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
@@ -554,7 +563,7 @@ static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn
/* bpf_wq and bpf_task_work callbacks are always sleepable. */
if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
- (is_bpf_wq_set_callback_impl_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm)))
+ (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm)))
return true;
verifier_bug(env, "unhandled async callback in is_async_cb_sleepable");
@@ -2341,6 +2350,18 @@ static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
reg->u32_max_value = U32_MAX;
}
+static void reset_reg64_and_tnum(struct bpf_reg_state *reg)
+{
+ __mark_reg64_unbounded(reg);
+ reg->var_off = tnum_unknown;
+}
+
+static void reset_reg32_and_tnum(struct bpf_reg_state *reg)
+{
+ __mark_reg32_unbounded(reg);
+ reg->var_off = tnum_unknown;
+}
+
static void __update_reg32_bounds(struct bpf_reg_state *reg)
{
struct tnum var32_off = tnum_subreg(reg->var_off);
@@ -3263,16 +3284,105 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
return btf_vmlinux ?: ERR_PTR(-ENOENT);
}
-static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
+#define KF_IMPL_SUFFIX "_impl"
+
+static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env,
+ struct btf *btf,
+ const char *func_name)
+{
+ char *buf = env->tmp_str_buf;
+ const struct btf_type *func;
+ s32 impl_id;
+ int len;
+
+ len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX);
+ if (len < 0 || len >= TMP_STR_BUF_LEN) {
+ verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX);
+ return NULL;
+ }
+
+ impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC);
+ if (impl_id <= 0) {
+ verbose(env, "cannot find function %s in BTF\n", buf);
+ return NULL;
+ }
+
+ func = btf_type_by_id(btf, impl_id);
+
+ return btf_type_by_id(btf, func->type);
+}
+
+static int fetch_kfunc_meta(struct bpf_verifier_env *env,
+ s32 func_id,
+ s16 offset,
+ struct bpf_kfunc_meta *kfunc)
{
const struct btf_type *func, *func_proto;
+ const char *func_name;
+ u32 *kfunc_flags;
+ struct btf *btf;
+
+ if (func_id <= 0) {
+ verbose(env, "invalid kernel function btf_id %d\n", func_id);
+ return -EINVAL;
+ }
+
+ btf = find_kfunc_desc_btf(env, offset);
+ if (IS_ERR(btf)) {
+ verbose(env, "failed to find BTF for kernel function\n");
+ return PTR_ERR(btf);
+ }
+
+ /*
+ * Note that kfunc_flags may be NULL at this point, which
+ * means that we couldn't find func_id in any relevant
+ * kfunc_id_set. This most likely indicates an invalid kfunc
+ * call. However we don't fail with an error here,
+ * and let the caller decide what to do with NULL kfunc->flags.
+ */
+ kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog);
+
+ func = btf_type_by_id(btf, func_id);
+ if (!func || !btf_type_is_func(func)) {
+ verbose(env, "kernel btf_id %d is not a function\n", func_id);
+ return -EINVAL;
+ }
+
+ func_name = btf_name_by_offset(btf, func->name_off);
+
+ /*
+ * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag
+ * can be found through the counterpart _impl kfunc.
+ */
+ if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS))
+ func_proto = find_kfunc_impl_proto(env, btf, func_name);
+ else
+ func_proto = btf_type_by_id(btf, func->type);
+
+ if (!func_proto || !btf_type_is_func_proto(func_proto)) {
+ verbose(env, "kernel function btf_id %d does not have a valid func_proto\n",
+ func_id);
+ return -EINVAL;
+ }
+
+ memset(kfunc, 0, sizeof(*kfunc));
+ kfunc->btf = btf;
+ kfunc->id = func_id;
+ kfunc->name = func_name;
+ kfunc->proto = func_proto;
+ kfunc->flags = kfunc_flags;
+
+ return 0;
+}
+
+static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
+{
struct bpf_kfunc_btf_tab *btf_tab;
struct btf_func_model func_model;
struct bpf_kfunc_desc_tab *tab;
struct bpf_prog_aux *prog_aux;
+ struct bpf_kfunc_meta kfunc;
struct bpf_kfunc_desc *desc;
- const char *func_name;
- struct btf *desc_btf;
unsigned long addr;
int err;
@@ -3322,12 +3432,6 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
prog_aux->kfunc_btf_tab = btf_tab;
}
- desc_btf = find_kfunc_desc_btf(env, offset);
- if (IS_ERR(desc_btf)) {
- verbose(env, "failed to find BTF for kernel function\n");
- return PTR_ERR(desc_btf);
- }
-
if (find_kfunc_desc(env->prog, func_id, offset))
return 0;
@@ -3336,24 +3440,13 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return -E2BIG;
}
- func = btf_type_by_id(desc_btf, func_id);
- if (!func || !btf_type_is_func(func)) {
- verbose(env, "kernel btf_id %u is not a function\n",
- func_id);
- return -EINVAL;
- }
- func_proto = btf_type_by_id(desc_btf, func->type);
- if (!func_proto || !btf_type_is_func_proto(func_proto)) {
- verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
- func_id);
- return -EINVAL;
- }
+ err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
+ if (err)
+ return err;
- func_name = btf_name_by_offset(desc_btf, func->name_off);
- addr = kallsyms_lookup_name(func_name);
+ addr = kallsyms_lookup_name(kfunc.name);
if (!addr) {
- verbose(env, "cannot find address for kernel function %s\n",
- func_name);
+ verbose(env, "cannot find address for kernel function %s\n", kfunc.name);
return -EINVAL;
}
@@ -3363,9 +3456,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return err;
}
- err = btf_distill_func_proto(&env->log, desc_btf,
- func_proto, func_name,
- &func_model);
+ err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model);
if (err)
return err;
@@ -5427,6 +5518,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
*/
s32 subreg_def = state->regs[dst_regno].subreg_def;
+ if (env->bpf_capable && size == 4 && spill_size == 4 &&
+ get_reg_width(reg) <= 32)
+ /* Ensure stack slot has an ID to build a relation
+ * with the destination register on fill.
+ */
+ assign_scalar_id_before_mov(env, reg);
copy_register_state(&state->regs[dst_regno], reg);
state->regs[dst_regno].subreg_def = subreg_def;
@@ -5472,6 +5569,11 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
}
} else if (dst_regno >= 0) {
/* restore register state from stack */
+ if (env->bpf_capable)
+ /* Ensure stack slot has an ID to build a relation
+ * with the destination register on fill.
+ */
+ assign_scalar_id_before_mov(env, reg);
copy_register_state(&state->regs[dst_regno], reg);
/* mark reg as written since spilled pointer state likely
* has its liveness marks cleared by is_state_visited()
@@ -5654,8 +5756,8 @@ static int check_stack_write(struct bpf_verifier_env *env,
static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
int off, int size, enum bpf_access_type type)
{
- struct bpf_reg_state *regs = cur_regs(env);
- struct bpf_map *map = regs[regno].map_ptr;
+ struct bpf_reg_state *reg = reg_state(env, regno);
+ struct bpf_map *map = reg->map_ptr;
u32 cap = bpf_map_flags_to_cap(map);
if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
@@ -6168,8 +6270,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
int size, bool zero_size_allowed)
{
- struct bpf_reg_state *regs = cur_regs(env);
- struct bpf_reg_state *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
int err;
/* We may have added a variable offset to the packet pointer; but any
@@ -6256,8 +6357,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
u32 regno, int off, int size,
enum bpf_access_type t)
{
- struct bpf_reg_state *regs = cur_regs(env);
- struct bpf_reg_state *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_insn_access_aux info = {};
bool valid;
@@ -7453,8 +7553,7 @@ static int check_stack_access_within_bounds(
int regno, int off, int access_size,
enum bpf_access_type type)
{
- struct bpf_reg_state *regs = cur_regs(env);
- struct bpf_reg_state *reg = regs + regno;
+ struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_func_state *state = func(env, reg);
s64 min_off, max_off;
int err;
@@ -8408,7 +8507,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
{
bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK;
const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin";
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_verifier_state *cur = env->cur_state;
bool is_const = tnum_is_const(reg->var_off);
bool is_irq = flags & PROCESS_LOCK_IRQ;
@@ -8522,9 +8621,10 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
/* Check if @regno is a pointer to a specific field in a map value */
static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno,
- enum btf_field_type field_type)
+ enum btf_field_type field_type,
+ struct bpf_map_desc *map_desc)
{
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
bool is_const = tnum_is_const(reg->var_off);
struct bpf_map *map = reg->map_ptr;
u64 val = reg->var_off.value;
@@ -8565,78 +8665,41 @@ static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno,
val + reg->off, struct_name, field_off);
return -EINVAL;
}
+ if (map_desc->ptr) {
+ verifier_bug(env, "Two map pointers in a %s helper", struct_name);
+ return -EFAULT;
+ }
+ map_desc->uid = reg->map_uid;
+ map_desc->ptr = map;
return 0;
}
static int process_timer_func(struct bpf_verifier_env *env, int regno,
- struct bpf_call_arg_meta *meta)
+ struct bpf_map_desc *map)
{
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- struct bpf_map *map = reg->map_ptr;
- int err;
-
- err = check_map_field_pointer(env, regno, BPF_TIMER);
- if (err)
- return err;
-
- if (meta->map_ptr) {
- verifier_bug(env, "Two map pointers in a timer helper");
- return -EFAULT;
- }
if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
return -EOPNOTSUPP;
}
- meta->map_uid = reg->map_uid;
- meta->map_ptr = map;
- return 0;
+ return check_map_field_pointer(env, regno, BPF_TIMER, map);
}
-static int process_wq_func(struct bpf_verifier_env *env, int regno,
- struct bpf_kfunc_call_arg_meta *meta)
+static int process_timer_helper(struct bpf_verifier_env *env, int regno,
+ struct bpf_call_arg_meta *meta)
{
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- struct bpf_map *map = reg->map_ptr;
- int err;
-
- err = check_map_field_pointer(env, regno, BPF_WORKQUEUE);
- if (err)
- return err;
-
- if (meta->map.ptr) {
- verifier_bug(env, "Two map pointers in a bpf_wq helper");
- return -EFAULT;
- }
-
- meta->map.uid = reg->map_uid;
- meta->map.ptr = map;
- return 0;
+ return process_timer_func(env, regno, &meta->map);
}
-static int process_task_work_func(struct bpf_verifier_env *env, int regno,
- struct bpf_kfunc_call_arg_meta *meta)
+static int process_timer_kfunc(struct bpf_verifier_env *env, int regno,
+ struct bpf_kfunc_call_arg_meta *meta)
{
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- struct bpf_map *map = reg->map_ptr;
- int err;
-
- err = check_map_field_pointer(env, regno, BPF_TASK_WORK);
- if (err)
- return err;
-
- if (meta->map.ptr) {
- verifier_bug(env, "Two map pointers in a bpf_task_work helper");
- return -EFAULT;
- }
- meta->map.uid = reg->map_uid;
- meta->map.ptr = map;
- return 0;
+ return process_timer_func(env, regno, &meta->map);
}
static int process_kptr_func(struct bpf_verifier_env *env, int regno,
struct bpf_call_arg_meta *meta)
{
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
struct btf_field *kptr_field;
struct bpf_map *map_ptr;
struct btf_record *rec;
@@ -8652,7 +8715,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
return -EINVAL;
}
rec = map_ptr->record;
- meta->map_ptr = map_ptr;
+ meta->map.ptr = map_ptr;
}
if (!tnum_is_const(reg->var_off)) {
@@ -8709,7 +8772,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
enum bpf_arg_type arg_type, int clone_ref_obj_id)
{
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
int err;
if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
@@ -8829,7 +8892,7 @@ static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
struct bpf_kfunc_call_arg_meta *meta)
{
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
const struct btf_type *t;
int spi, err, i, nr_slots, btf_id;
@@ -8944,15 +9007,24 @@ static bool regs_exact(const struct bpf_reg_state *rold,
const struct bpf_reg_state *rcur,
struct bpf_idmap *idmap);
+/*
+ * Check if scalar registers are exact for the purpose of not widening.
+ * More lenient than regs_exact()
+ */
+static bool scalars_exact_for_widen(const struct bpf_reg_state *rold,
+ const struct bpf_reg_state *rcur)
+{
+ return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id));
+}
+
static void maybe_widen_reg(struct bpf_verifier_env *env,
- struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
- struct bpf_idmap *idmap)
+ struct bpf_reg_state *rold, struct bpf_reg_state *rcur)
{
if (rold->type != SCALAR_VALUE)
return;
if (rold->type != rcur->type)
return;
- if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap))
+ if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur))
return;
__mark_reg_unknown(env, rcur);
}
@@ -8964,7 +9036,6 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env,
struct bpf_func_state *fold, *fcur;
int i, fr, num_slots;
- reset_idmap_scratch(env);
for (fr = old->curframe; fr >= 0; fr--) {
fold = old->frame[fr];
fcur = cur->frame[fr];
@@ -8972,8 +9043,7 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env,
for (i = 0; i < MAX_BPF_REG; i++)
maybe_widen_reg(env,
&fold->regs[i],
- &fcur->regs[i],
- &env->idmap_scratch);
+ &fcur->regs[i]);
num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
fcur->allocated_stack / BPF_REG_SIZE);
@@ -8984,8 +9054,7 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env,
maybe_widen_reg(env,
&fold->stack[i].spilled_ptr,
- &fcur->stack[i].spilled_ptr,
- &env->idmap_scratch);
+ &fcur->stack[i].spilled_ptr);
}
}
return 0;
@@ -9159,13 +9228,13 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_arg_type *arg_type)
{
- if (!meta->map_ptr) {
+ if (!meta->map.ptr) {
/* kernel subsystem misconfigured verifier */
verifier_bug(env, "invalid map_ptr to access map->type");
return -EFAULT;
}
- switch (meta->map_ptr->map_type) {
+ switch (meta->map.ptr->map_type) {
case BPF_MAP_TYPE_SOCKMAP:
case BPF_MAP_TYPE_SOCKHASH:
if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
@@ -9301,7 +9370,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
const u32 *arg_btf_id,
struct bpf_call_arg_meta *meta)
{
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
enum bpf_reg_type expected, type = reg->type;
const struct bpf_reg_types *compatible;
int i, j;
@@ -9719,7 +9788,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
int insn_idx)
{
u32 regno = BPF_REG_1 + arg;
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
enum bpf_arg_type arg_type = fn->arg_type[arg];
enum bpf_reg_type type = reg->type;
u32 *arg_btf_id = NULL;
@@ -9819,7 +9888,7 @@ skip_type_check:
switch (base_type(arg_type)) {
case ARG_CONST_MAP_PTR:
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
- if (meta->map_ptr) {
+ if (meta->map.ptr) {
/* Use map_uid (which is unique id of inner map) to reject:
* inner_map1 = bpf_map_lookup_elem(outer_map, key1)
* inner_map2 = bpf_map_lookup_elem(outer_map, key2)
@@ -9832,23 +9901,23 @@ skip_type_check:
*
* Comparing map_ptr is enough to distinguish normal and outer maps.
*/
- if (meta->map_ptr != reg->map_ptr ||
- meta->map_uid != reg->map_uid) {
+ if (meta->map.ptr != reg->map_ptr ||
+ meta->map.uid != reg->map_uid) {
verbose(env,
"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
- meta->map_uid, reg->map_uid);
+ meta->map.uid, reg->map_uid);
return -EINVAL;
}
}
- meta->map_ptr = reg->map_ptr;
- meta->map_uid = reg->map_uid;
+ meta->map.ptr = reg->map_ptr;
+ meta->map.uid = reg->map_uid;
break;
case ARG_PTR_TO_MAP_KEY:
/* bpf_map_xxx(..., map_ptr, ..., key) call:
* check that [key, key + map->key_size) are within
* stack limits and initialized
*/
- if (!meta->map_ptr) {
+ if (!meta->map.ptr) {
/* in function declaration map_ptr must come before
* map_key, so that it's verified and known before
* we have to check map_key here. Otherwise it means
@@ -9857,11 +9926,11 @@ skip_type_check:
verifier_bug(env, "invalid map_ptr to access map->key");
return -EFAULT;
}
- key_size = meta->map_ptr->key_size;
+ key_size = meta->map.ptr->key_size;
err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
if (err)
return err;
- if (can_elide_value_nullness(meta->map_ptr->map_type)) {
+ if (can_elide_value_nullness(meta->map.ptr->map_type)) {
err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
if (err < 0) {
meta->const_map_key = -1;
@@ -9879,13 +9948,13 @@ skip_type_check:
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
*/
- if (!meta->map_ptr) {
+ if (!meta->map.ptr) {
/* kernel subsystem misconfigured verifier */
verifier_bug(env, "invalid map_ptr to access map->value");
return -EFAULT;
}
meta->raw_mode = arg_type & MEM_UNINIT;
- err = check_helper_mem_access(env, regno, meta->map_ptr->value_size,
+ err = check_helper_mem_access(env, regno, meta->map.ptr->value_size,
arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
false, meta);
break;
@@ -9916,7 +9985,7 @@ skip_type_check:
}
break;
case ARG_PTR_TO_TIMER:
- err = process_timer_func(env, regno, meta);
+ err = process_timer_helper(env, regno, meta);
if (err)
return err;
break;
@@ -10354,10 +10423,27 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
return true;
}
-static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
+static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
+ enum bpf_arg_type arg_type = fn->arg_type[i];
+
+ if (base_type(arg_type) != ARG_PTR_TO_MEM)
+ continue;
+ if (!(arg_type & (MEM_WRITE | MEM_RDONLY)))
+ return false;
+ }
+
+ return true;
+}
+
+static int check_func_proto(const struct bpf_func_proto *fn)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
+ check_mem_arg_rw_flag_ok(fn) &&
check_btf_id_ok(fn) ? 0 : -EINVAL;
}
@@ -11206,7 +11292,7 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
int func_id, int insn_idx)
{
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
- struct bpf_map *map = meta->map_ptr;
+ struct bpf_map *map = meta->map.ptr;
if (func_id != BPF_FUNC_tail_call &&
func_id != BPF_FUNC_map_lookup_elem &&
@@ -11239,11 +11325,11 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
}
if (!aux->map_ptr_state.map_ptr)
- bpf_map_ptr_store(aux, meta->map_ptr,
- !meta->map_ptr->bypass_spec_v1, false);
- else if (aux->map_ptr_state.map_ptr != meta->map_ptr)
- bpf_map_ptr_store(aux, meta->map_ptr,
- !meta->map_ptr->bypass_spec_v1, true);
+ bpf_map_ptr_store(aux, meta->map.ptr,
+ !meta->map.ptr->bypass_spec_v1, false);
+ else if (aux->map_ptr_state.map_ptr != meta->map.ptr)
+ bpf_map_ptr_store(aux, meta->map.ptr,
+ !meta->map.ptr->bypass_spec_v1, true);
return 0;
}
@@ -11252,8 +11338,8 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
int func_id, int insn_idx)
{
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
- struct bpf_reg_state *regs = cur_regs(env), *reg;
- struct bpf_map *map = meta->map_ptr;
+ struct bpf_reg_state *reg;
+ struct bpf_map *map = meta->map.ptr;
u64 val, max;
int err;
@@ -11264,7 +11350,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return -EINVAL;
}
- reg = &regs[BPF_REG_3];
+ reg = reg_state(env, BPF_REG_3);
val = reg->var_off.value;
max = map->max_entries;
@@ -11410,8 +11496,7 @@ static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
static bool loop_flag_is_zero(struct bpf_verifier_env *env)
{
- struct bpf_reg_state *regs = cur_regs(env);
- struct bpf_reg_state *reg = &regs[BPF_REG_4];
+ struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
bool reg_is_null = register_is_null(reg);
if (reg_is_null)
@@ -11471,6 +11556,7 @@ static inline bool in_sleepable_context(struct bpf_verifier_env *env)
{
return !env->cur_state->active_rcu_locks &&
!env->cur_state->active_preempt_locks &&
+ !env->cur_state->active_locks &&
!env->cur_state->active_irq_id &&
in_sleepable(env);
}
@@ -11529,7 +11615,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
- err = check_func_proto(fn, func_id);
+ err = check_func_proto(fn);
if (err) {
verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id);
return err;
@@ -11809,22 +11895,22 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
* can check 'value_size' boundary of memory access
* to map element returned from bpf_map_lookup_elem()
*/
- if (meta.map_ptr == NULL) {
+ if (meta.map.ptr == NULL) {
verifier_bug(env, "unexpected null map_ptr");
return -EFAULT;
}
if (func_id == BPF_FUNC_map_lookup_elem &&
- can_elide_value_nullness(meta.map_ptr->map_type) &&
+ can_elide_value_nullness(meta.map.ptr->map_type) &&
meta.const_map_key >= 0 &&
- meta.const_map_key < meta.map_ptr->max_entries)
+ meta.const_map_key < meta.map.ptr->max_entries)
ret_flag &= ~PTR_MAYBE_NULL;
- regs[BPF_REG_0].map_ptr = meta.map_ptr;
- regs[BPF_REG_0].map_uid = meta.map_uid;
+ regs[BPF_REG_0].map_ptr = meta.map.ptr;
+ regs[BPF_REG_0].map_uid = meta.map.uid;
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
if (!type_may_be_null(ret_flag) &&
- btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
+ btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
regs[BPF_REG_0].id = ++env->id_gen;
}
break;
@@ -11927,7 +12013,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (type_may_be_null(regs[BPF_REG_0].type))
regs[BPF_REG_0].id = ++env->id_gen;
- if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
+ if (helper_multiple_ref_obj_use(func_id, meta.map.ptr)) {
verifier_bug(env, "func %s#%d sets ref_obj_id more than once",
func_id_name(func_id), func_id);
return -EFAULT;
@@ -11939,7 +12025,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
- } else if (is_acquire_function(func_id, meta.map_ptr)) {
+ } else if (is_acquire_function(func_id, meta.map.ptr)) {
int id = acquire_reference(env, insn_idx);
if (id < 0)
@@ -11954,7 +12040,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (err)
return err;
- err = check_map_func_compatibility(env, meta.map_ptr, func_id);
+ err = check_map_func_compatibility(env, meta.map.ptr, func_id);
if (err)
return err;
@@ -12045,11 +12131,6 @@ static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_RELEASE;
}
-static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
-{
- return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta);
-}
-
static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->kfunc_flags & KF_SLEEPABLE;
@@ -12096,11 +12177,6 @@ static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
return btf_param_match_suffix(btf, arg, "__szk");
}
-static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg)
-{
- return btf_param_match_suffix(btf, arg, "__opt");
-}
-
static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
{
return btf_param_match_suffix(btf, arg, "__k");
@@ -12146,11 +12222,6 @@ static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param
return btf_param_match_suffix(btf, arg, "__irq_flag");
}
-static bool is_kfunc_arg_prog(const struct btf *btf, const struct btf_param *arg)
-{
- return btf_param_match_suffix(btf, arg, "__prog");
-}
-
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
const struct btf_param *arg,
const char *name)
@@ -12179,6 +12250,8 @@ enum {
KF_ARG_WORKQUEUE_ID,
KF_ARG_RES_SPIN_LOCK_ID,
KF_ARG_TASK_WORK_ID,
+ KF_ARG_PROG_AUX_ID,
+ KF_ARG_TIMER_ID
};
BTF_ID_LIST(kf_arg_btf_ids)
@@ -12190,6 +12263,8 @@ BTF_ID(struct, bpf_rb_node)
BTF_ID(struct, bpf_wq)
BTF_ID(struct, bpf_res_spin_lock)
BTF_ID(struct, bpf_task_work)
+BTF_ID(struct, bpf_prog_aux)
+BTF_ID(struct, bpf_timer)
static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
const struct btf_param *arg, int type)
@@ -12233,6 +12308,11 @@ static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_par
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
}
+static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
+}
+
static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
{
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
@@ -12270,6 +12350,11 @@ static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf
return true;
}
+static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
+}
+
/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
const struct btf *btf,
@@ -12327,6 +12412,7 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_NULL,
KF_ARG_PTR_TO_CONST_STR,
KF_ARG_PTR_TO_MAP,
+ KF_ARG_PTR_TO_TIMER,
KF_ARG_PTR_TO_WORKQUEUE,
KF_ARG_PTR_TO_IRQ_FLAG,
KF_ARG_PTR_TO_RES_SPIN_LOCK,
@@ -12363,7 +12449,7 @@ enum special_kfunc_type {
KF_bpf_percpu_obj_new_impl,
KF_bpf_percpu_obj_drop_impl,
KF_bpf_throw,
- KF_bpf_wq_set_callback_impl,
+ KF_bpf_wq_set_callback,
KF_bpf_preempt_disable,
KF_bpf_preempt_enable,
KF_bpf_iter_css_task_new,
@@ -12383,8 +12469,14 @@ enum special_kfunc_type {
KF_bpf_dynptr_from_file,
KF_bpf_dynptr_file_discard,
KF___bpf_trap,
- KF_bpf_task_work_schedule_signal_impl,
- KF_bpf_task_work_schedule_resume_impl,
+ KF_bpf_task_work_schedule_signal,
+ KF_bpf_task_work_schedule_resume,
+ KF_bpf_arena_alloc_pages,
+ KF_bpf_arena_free_pages,
+ KF_bpf_arena_reserve_pages,
+ KF_bpf_session_is_return,
+ KF_bpf_stream_vprintk,
+ KF_bpf_stream_print_stack,
};
BTF_ID_LIST(special_kfunc_list)
@@ -12424,7 +12516,7 @@ BTF_ID(func, bpf_dynptr_clone)
BTF_ID(func, bpf_percpu_obj_new_impl)
BTF_ID(func, bpf_percpu_obj_drop_impl)
BTF_ID(func, bpf_throw)
-BTF_ID(func, bpf_wq_set_callback_impl)
+BTF_ID(func, bpf_wq_set_callback)
BTF_ID(func, bpf_preempt_disable)
BTF_ID(func, bpf_preempt_enable)
#ifdef CONFIG_CGROUPS
@@ -12457,13 +12549,19 @@ BTF_ID(func, bpf_res_spin_unlock_irqrestore)
BTF_ID(func, bpf_dynptr_from_file)
BTF_ID(func, bpf_dynptr_file_discard)
BTF_ID(func, __bpf_trap)
-BTF_ID(func, bpf_task_work_schedule_signal_impl)
-BTF_ID(func, bpf_task_work_schedule_resume_impl)
+BTF_ID(func, bpf_task_work_schedule_signal)
+BTF_ID(func, bpf_task_work_schedule_resume)
+BTF_ID(func, bpf_arena_alloc_pages)
+BTF_ID(func, bpf_arena_free_pages)
+BTF_ID(func, bpf_arena_reserve_pages)
+BTF_ID(func, bpf_session_is_return)
+BTF_ID(func, bpf_stream_vprintk)
+BTF_ID(func, bpf_stream_print_stack)
static bool is_task_work_add_kfunc(u32 func_id)
{
- return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal_impl] ||
- func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume_impl];
+ return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
+ func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
}
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
@@ -12513,9 +12611,16 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
struct bpf_reg_state *reg = &regs[regno];
bool arg_mem_size = false;
- if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
+ if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
+ meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
+ meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
return KF_ARG_PTR_TO_CTX;
+ if (argno + 1 < nargs &&
+ (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
+ is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
+ arg_mem_size = true;
+
/* In this function, we verify the kfunc's BTF as per the argument type,
* leaving the rest of the verification with respect to the register
* type to our caller. When a set of conditions hold in the BTF type of
@@ -12524,7 +12629,8 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
return KF_ARG_PTR_TO_CTX;
- if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg))
+ if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg) &&
+ !arg_mem_size)
return KF_ARG_PTR_TO_NULL;
if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
@@ -12560,6 +12666,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_wq(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_WORKQUEUE;
+ if (is_kfunc_arg_timer(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_TIMER;
+
if (is_kfunc_arg_task_work(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_TASK_WORK;
@@ -12581,11 +12690,6 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
return KF_ARG_PTR_TO_CALLBACK;
- if (argno + 1 < nargs &&
- (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
- is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
- arg_mem_size = true;
-
/* This is the catch all argument type of register types supported by
* check_helper_mem_access. However, we only allow when argument type is
* pointer to scalar, or struct composed (recursively) of scalars. When
@@ -12625,7 +12729,7 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
/* Enforce strict type matching for calls to kfuncs that are acquiring
* or releasing a reference, or are no-cast aliases. We do _not_
- * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
+ * enforce strict matching for kfuncs by default,
* as we want to enable BPF programs to pass types that are bitwise
* equivalent without forcing them to explicitly cast with something
* like bpf_cast_to_kern_ctx().
@@ -12675,7 +12779,7 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
static int process_irq_flag(struct bpf_verifier_env *env, int regno,
struct bpf_kfunc_call_arg_meta *meta)
{
- struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_reg_state *reg = reg_state(env, regno);
int err, kfunc_class = IRQ_NATIVE_KFUNC;
bool irq_save;
@@ -12893,10 +12997,24 @@ static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore];
}
+static bool is_bpf_arena_kfunc(u32 btf_id)
+{
+ return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] ||
+ btf_id == special_kfunc_list[KF_bpf_arena_free_pages] ||
+ btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages];
+}
+
+static bool is_bpf_stream_kfunc(u32 btf_id)
+{
+ return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] ||
+ btf_id == special_kfunc_list[KF_bpf_stream_print_stack];
+}
+
static bool kfunc_spin_allowed(u32 btf_id)
{
return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) ||
- is_bpf_res_spin_lock_kfunc(btf_id);
+ is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) ||
+ is_bpf_stream_kfunc(btf_id);
}
static bool is_sync_callback_calling_kfunc(u32 btf_id)
@@ -12906,7 +13024,7 @@ static bool is_sync_callback_calling_kfunc(u32 btf_id)
static bool is_async_callback_calling_kfunc(u32 btf_id)
{
- return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl] ||
+ return is_bpf_wq_set_callback_kfunc(btf_id) ||
is_task_work_add_kfunc(btf_id);
}
@@ -12916,9 +13034,9 @@ static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
insn->imm == special_kfunc_list[KF_bpf_throw];
}
-static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id)
+static bool is_bpf_wq_set_callback_kfunc(u32 btf_id)
{
- return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
+ return btf_id == special_kfunc_list[KF_bpf_wq_set_callback];
}
static bool is_callback_calling_kfunc(u32 btf_id)
@@ -13192,8 +13310,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (is_kfunc_arg_ignore(btf, &args[i]))
continue;
- if (is_kfunc_arg_prog(btf, &args[i])) {
- /* Used to reject repeated use of __prog. */
+ if (is_kfunc_arg_prog_aux(btf, &args[i])) {
+ /* Reject repeated use bpf_prog_aux */
if (meta->arg_prog) {
verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
return -EFAULT;
@@ -13254,9 +13372,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return -EINVAL;
}
- if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
- (register_is_null(reg) || type_may_be_null(reg->type)) &&
- !is_kfunc_arg_nullable(meta->btf, &args[i])) {
+ if ((register_is_null(reg) || type_may_be_null(reg->type)) &&
+ !is_kfunc_arg_nullable(meta->btf, &args[i])) {
verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
return -EACCES;
}
@@ -13321,9 +13438,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
fallthrough;
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
case KF_ARG_PTR_TO_BTF_ID:
- if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
- break;
-
if (!is_trusted_reg(reg)) {
if (!is_kfunc_rcu(meta)) {
verbose(env, "R%d must be referenced or trusted\n", regno);
@@ -13348,6 +13462,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
case KF_ARG_PTR_TO_CONST_STR:
case KF_ARG_PTR_TO_WORKQUEUE:
+ case KF_ARG_PTR_TO_TIMER:
case KF_ARG_PTR_TO_TASK_WORK:
case KF_ARG_PTR_TO_IRQ_FLAG:
case KF_ARG_PTR_TO_RES_SPIN_LOCK:
@@ -13575,7 +13690,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
struct bpf_reg_state *size_reg = &regs[regno + 1];
const struct btf_param *size_arg = &args[i + 1];
- if (!register_is_null(buff_reg) || !is_kfunc_arg_optional(meta->btf, buff_arg)) {
+ if (!register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
if (ret < 0) {
verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
@@ -13643,7 +13758,16 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
verbose(env, "arg#%d doesn't point to a map value\n", i);
return -EINVAL;
}
- ret = process_wq_func(env, regno, meta);
+ ret = check_map_field_pointer(env, regno, BPF_WORKQUEUE, &meta->map);
+ if (ret < 0)
+ return ret;
+ break;
+ case KF_ARG_PTR_TO_TIMER:
+ if (reg->type != PTR_TO_MAP_VALUE) {
+ verbose(env, "arg#%d doesn't point to a map value\n", i);
+ return -EINVAL;
+ }
+ ret = process_timer_kfunc(env, regno, meta);
if (ret < 0)
return ret;
break;
@@ -13652,7 +13776,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
verbose(env, "arg#%d doesn't point to a map value\n", i);
return -EINVAL;
}
- ret = process_task_work_func(env, regno, meta);
+ ret = check_map_field_pointer(env, regno, BPF_TASK_WORK, &meta->map);
if (ret < 0)
return ret;
break;
@@ -13699,44 +13823,28 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return 0;
}
-static int fetch_kfunc_meta(struct bpf_verifier_env *env,
- struct bpf_insn *insn,
- struct bpf_kfunc_call_arg_meta *meta,
- const char **kfunc_name)
+static int fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
+ s32 func_id,
+ s16 offset,
+ struct bpf_kfunc_call_arg_meta *meta)
{
- const struct btf_type *func, *func_proto;
- u32 func_id, *kfunc_flags;
- const char *func_name;
- struct btf *desc_btf;
-
- if (kfunc_name)
- *kfunc_name = NULL;
+ struct bpf_kfunc_meta kfunc;
+ int err;
- if (!insn->imm)
- return -EINVAL;
+ err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
+ if (err)
+ return err;
- desc_btf = find_kfunc_desc_btf(env, insn->off);
- if (IS_ERR(desc_btf))
- return PTR_ERR(desc_btf);
+ memset(meta, 0, sizeof(*meta));
+ meta->btf = kfunc.btf;
+ meta->func_id = kfunc.id;
+ meta->func_proto = kfunc.proto;
+ meta->func_name = kfunc.name;
- func_id = insn->imm;
- func = btf_type_by_id(desc_btf, func_id);
- func_name = btf_name_by_offset(desc_btf, func->name_off);
- if (kfunc_name)
- *kfunc_name = func_name;
- func_proto = btf_type_by_id(desc_btf, func->type);
-
- kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
- if (!kfunc_flags) {
+ if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog))
return -EACCES;
- }
- memset(meta, 0, sizeof(*meta));
- meta->btf = desc_btf;
- meta->func_id = func_id;
- meta->kfunc_flags = *kfunc_flags;
- meta->func_proto = func_proto;
- meta->func_name = func_name;
+ meta->kfunc_flags = *kfunc.flags;
return 0;
}
@@ -13941,12 +14049,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (!insn->imm)
return 0;
- err = fetch_kfunc_meta(env, insn, &meta, &func_name);
- if (err == -EACCES && func_name)
- verbose(env, "calling kernel function %s is not allowed\n", func_name);
+ err = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
+ if (err == -EACCES && meta.func_name)
+ verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
if (err)
return err;
desc_btf = meta.btf;
+ func_name = meta.func_name;
insn_aux = &env->insn_aux_data[insn_idx];
insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
@@ -14016,7 +14125,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
meta.r0_rdonly = false;
}
- if (is_bpf_wq_set_callback_impl_kfunc(meta.func_id)) {
+ if (is_bpf_wq_set_callback_kfunc(meta.func_id)) {
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_timer_callback_state);
if (err) {
@@ -14154,8 +14263,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
- for (i = 0; i < CALLER_SAVED_REGS; i++)
- mark_reg_not_init(env, regs, caller_saved[i]);
+ for (i = 0; i < CALLER_SAVED_REGS; i++) {
+ u32 regno = caller_saved[i];
+
+ mark_reg_not_init(env, regs, regno);
+ regs[regno].subreg_def = DEF_NOT_SUBREG;
+ }
/* Check return type */
t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
@@ -14220,26 +14333,38 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (is_kfunc_rcu_protected(&meta))
regs[BPF_REG_0].type |= MEM_RCU;
} else {
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].btf = desc_btf;
- regs[BPF_REG_0].type = PTR_TO_BTF_ID;
- regs[BPF_REG_0].btf_id = ptr_type_id;
+ enum bpf_reg_type type = PTR_TO_BTF_ID;
if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
- regs[BPF_REG_0].type |= PTR_UNTRUSTED;
- else if (is_kfunc_rcu_protected(&meta))
- regs[BPF_REG_0].type |= MEM_RCU;
-
- if (is_iter_next_kfunc(&meta)) {
- struct bpf_reg_state *cur_iter;
-
- cur_iter = get_iter_from_state(env->cur_state, &meta);
-
- if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */
- regs[BPF_REG_0].type |= MEM_RCU;
- else
- regs[BPF_REG_0].type |= PTR_TRUSTED;
+ type |= PTR_UNTRUSTED;
+ else if (is_kfunc_rcu_protected(&meta) ||
+ (is_iter_next_kfunc(&meta) &&
+ (get_iter_from_state(env->cur_state, &meta)
+ ->type & MEM_RCU))) {
+ /*
+ * If the iterator's constructor (the _new
+ * function e.g., bpf_iter_task_new) has been
+ * annotated with BPF kfunc flag
+ * KF_RCU_PROTECTED and was called within a RCU
+ * read-side critical section, also propagate
+ * the MEM_RCU flag to the pointer returned from
+ * the iterator's next function (e.g.,
+ * bpf_iter_task_next).
+ */
+ type |= MEM_RCU;
+ } else {
+ /*
+ * Any PTR_TO_BTF_ID that is returned from a BPF
+ * kfunc should by default be treated as
+ * implicitly trusted.
+ */
+ type |= PTR_TRUSTED;
}
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].btf = desc_btf;
+ regs[BPF_REG_0].type = type;
+ regs[BPF_REG_0].btf_id = ptr_type_id;
}
if (is_kfunc_ret_null(&meta)) {
@@ -14295,6 +14420,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return err;
}
+ if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
+ env->prog->call_session_cookie = true;
+
return 0;
}
@@ -15081,6 +15209,252 @@ static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
}
}
+static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ u32 *dst_umin = &dst_reg->u32_min_value;
+ u32 *dst_umax = &dst_reg->u32_max_value;
+ u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */
+
+ *dst_umin = *dst_umin / src_val;
+ *dst_umax = *dst_umax / src_val;
+
+ /* Reset other ranges/tnum to unbounded/unknown. */
+ dst_reg->s32_min_value = S32_MIN;
+ dst_reg->s32_max_value = S32_MAX;
+ reset_reg64_and_tnum(dst_reg);
+}
+
+static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ u64 *dst_umin = &dst_reg->umin_value;
+ u64 *dst_umax = &dst_reg->umax_value;
+ u64 src_val = src_reg->umin_value; /* non-zero, const divisor */
+
+ *dst_umin = div64_u64(*dst_umin, src_val);
+ *dst_umax = div64_u64(*dst_umax, src_val);
+
+ /* Reset other ranges/tnum to unbounded/unknown. */
+ dst_reg->smin_value = S64_MIN;
+ dst_reg->smax_value = S64_MAX;
+ reset_reg32_and_tnum(dst_reg);
+}
+
+static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ s32 *dst_smin = &dst_reg->s32_min_value;
+ s32 *dst_smax = &dst_reg->s32_max_value;
+ s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */
+ s32 res1, res2;
+
+ /* BPF div specification: S32_MIN / -1 = S32_MIN */
+ if (*dst_smin == S32_MIN && src_val == -1) {
+ /*
+ * If the dividend range contains more than just S32_MIN,
+ * we cannot precisely track the result, so it becomes unbounded.
+ * e.g., [S32_MIN, S32_MIN+10]/(-1),
+ * = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)]
+ * = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX]
+ * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN.
+ */
+ if (*dst_smax != S32_MIN) {
+ *dst_smin = S32_MIN;
+ *dst_smax = S32_MAX;
+ }
+ goto reset;
+ }
+
+ res1 = *dst_smin / src_val;
+ res2 = *dst_smax / src_val;
+ *dst_smin = min(res1, res2);
+ *dst_smax = max(res1, res2);
+
+reset:
+ /* Reset other ranges/tnum to unbounded/unknown. */
+ dst_reg->u32_min_value = 0;
+ dst_reg->u32_max_value = U32_MAX;
+ reset_reg64_and_tnum(dst_reg);
+}
+
+static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ s64 *dst_smin = &dst_reg->smin_value;
+ s64 *dst_smax = &dst_reg->smax_value;
+ s64 src_val = src_reg->smin_value; /* non-zero, const divisor */
+ s64 res1, res2;
+
+ /* BPF div specification: S64_MIN / -1 = S64_MIN */
+ if (*dst_smin == S64_MIN && src_val == -1) {
+ /*
+ * If the dividend range contains more than just S64_MIN,
+ * we cannot precisely track the result, so it becomes unbounded.
+ * e.g., [S64_MIN, S64_MIN+10]/(-1),
+ * = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)]
+ * = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX]
+ * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN.
+ */
+ if (*dst_smax != S64_MIN) {
+ *dst_smin = S64_MIN;
+ *dst_smax = S64_MAX;
+ }
+ goto reset;
+ }
+
+ res1 = div64_s64(*dst_smin, src_val);
+ res2 = div64_s64(*dst_smax, src_val);
+ *dst_smin = min(res1, res2);
+ *dst_smax = max(res1, res2);
+
+reset:
+ /* Reset other ranges/tnum to unbounded/unknown. */
+ dst_reg->umin_value = 0;
+ dst_reg->umax_value = U64_MAX;
+ reset_reg32_and_tnum(dst_reg);
+}
+
+static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ u32 *dst_umin = &dst_reg->u32_min_value;
+ u32 *dst_umax = &dst_reg->u32_max_value;
+ u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */
+ u32 res_max = src_val - 1;
+
+ /*
+ * If dst_umax <= res_max, the result remains unchanged.
+ * e.g., [2, 5] % 10 = [2, 5].
+ */
+ if (*dst_umax <= res_max)
+ return;
+
+ *dst_umin = 0;
+ *dst_umax = min(*dst_umax, res_max);
+
+ /* Reset other ranges/tnum to unbounded/unknown. */
+ dst_reg->s32_min_value = S32_MIN;
+ dst_reg->s32_max_value = S32_MAX;
+ reset_reg64_and_tnum(dst_reg);
+}
+
+static void scalar_min_max_umod(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ u64 *dst_umin = &dst_reg->umin_value;
+ u64 *dst_umax = &dst_reg->umax_value;
+ u64 src_val = src_reg->umin_value; /* non-zero, const divisor */
+ u64 res_max = src_val - 1;
+
+ /*
+ * If dst_umax <= res_max, the result remains unchanged.
+ * e.g., [2, 5] % 10 = [2, 5].
+ */
+ if (*dst_umax <= res_max)
+ return;
+
+ *dst_umin = 0;
+ *dst_umax = min(*dst_umax, res_max);
+
+ /* Reset other ranges/tnum to unbounded/unknown. */
+ dst_reg->smin_value = S64_MIN;
+ dst_reg->smax_value = S64_MAX;
+ reset_reg32_and_tnum(dst_reg);
+}
+
+static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ s32 *dst_smin = &dst_reg->s32_min_value;
+ s32 *dst_smax = &dst_reg->s32_max_value;
+ s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */
+
+ /*
+ * Safe absolute value calculation:
+ * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648.
+ * Here use unsigned integer to avoid overflow.
+ */
+ u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val;
+
+ /*
+ * Calculate the maximum possible absolute value of the result.
+ * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives
+ * 2147483647 (S32_MAX), which fits perfectly in s32.
+ */
+ s32 res_max_abs = src_abs - 1;
+
+ /*
+ * If the dividend is already within the result range,
+ * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
+ */
+ if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs)
+ return;
+
+ /* General case: result has the same sign as the dividend. */
+ if (*dst_smin >= 0) {
+ *dst_smin = 0;
+ *dst_smax = min(*dst_smax, res_max_abs);
+ } else if (*dst_smax <= 0) {
+ *dst_smax = 0;
+ *dst_smin = max(*dst_smin, -res_max_abs);
+ } else {
+ *dst_smin = -res_max_abs;
+ *dst_smax = res_max_abs;
+ }
+
+ /* Reset other ranges/tnum to unbounded/unknown. */
+ dst_reg->u32_min_value = 0;
+ dst_reg->u32_max_value = U32_MAX;
+ reset_reg64_and_tnum(dst_reg);
+}
+
+static void scalar_min_max_smod(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ s64 *dst_smin = &dst_reg->smin_value;
+ s64 *dst_smax = &dst_reg->smax_value;
+ s64 src_val = src_reg->smin_value; /* non-zero, const divisor */
+
+ /*
+ * Safe absolute value calculation:
+ * If src_val == S64_MIN (-2^63), src_abs becomes 2^63.
+ * Here use unsigned integer to avoid overflow.
+ */
+ u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val;
+
+ /*
+ * Calculate the maximum possible absolute value of the result.
+ * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives
+ * 2^63 - 1 (S64_MAX), which fits perfectly in s64.
+ */
+ s64 res_max_abs = src_abs - 1;
+
+ /*
+ * If the dividend is already within the result range,
+ * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
+ */
+ if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs)
+ return;
+
+ /* General case: result has the same sign as the dividend. */
+ if (*dst_smin >= 0) {
+ *dst_smin = 0;
+ *dst_smax = min(*dst_smax, res_max_abs);
+ } else if (*dst_smax <= 0) {
+ *dst_smax = 0;
+ *dst_smin = max(*dst_smin, -res_max_abs);
+ } else {
+ *dst_smin = -res_max_abs;
+ *dst_smax = res_max_abs;
+ }
+
+ /* Reset other ranges/tnum to unbounded/unknown. */
+ dst_reg->umin_value = 0;
+ dst_reg->umax_value = U64_MAX;
+ reset_reg32_and_tnum(dst_reg);
+}
+
static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
struct bpf_reg_state *src_reg)
{
@@ -15305,21 +15679,17 @@ static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
u64 umin_val, u64 umax_val)
{
/* Special case <<32 because it is a common compiler pattern to sign
- * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
- * positive we know this shift will also be positive so we can track
- * bounds correctly. Otherwise we lose all sign bit information except
- * what we can pick up from var_off. Perhaps we can generalize this
- * later to shifts of any length.
+ * extend subreg by doing <<32 s>>32. smin/smax assignments are correct
+ * because s32 bounds don't flip sign when shifting to the left by
+ * 32bits.
*/
- if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
+ if (umin_val == 32 && umax_val == 32) {
dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
- else
- dst_reg->smax_value = S64_MAX;
-
- if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
- else
+ } else {
+ dst_reg->smax_value = S64_MAX;
dst_reg->smin_value = S64_MIN;
+ }
/* If we might shift our top bit out, then we know nothing */
if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
@@ -15462,6 +15832,48 @@ static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
__update_reg_bounds(dst_reg);
}
+static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn)
+{
+ /*
+ * Byte swap operation - update var_off using tnum_bswap.
+ * Three cases:
+ * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
+ * unconditional swap
+ * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
+ * swap on big-endian, truncation or no-op on little-endian
+ * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
+ * swap on little-endian, truncation or no-op on big-endian
+ */
+
+ bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ bool to_le = BPF_SRC(insn->code) == BPF_TO_LE;
+ bool is_big_endian;
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ is_big_endian = true;
+#else
+ is_big_endian = false;
+#endif
+ /* Apply bswap if alu64 or switch between big-endian and little-endian machines */
+ bool need_bswap = alu64 || (to_le == is_big_endian);
+
+ if (need_bswap) {
+ if (insn->imm == 16)
+ dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
+ else if (insn->imm == 32)
+ dst_reg->var_off = tnum_bswap32(dst_reg->var_off);
+ else if (insn->imm == 64)
+ dst_reg->var_off = tnum_bswap64(dst_reg->var_off);
+ /*
+ * Byteswap scrambles the range, so we must reset bounds.
+ * Bounds will be re-derived from the new tnum later.
+ */
+ __mark_reg_unbounded(dst_reg);
+ }
+ /* For bswap16/32, truncate dst register to match the swapped size */
+ if (insn->imm == 16 || insn->imm == 32)
+ coerce_reg_to_size(dst_reg, insn->imm / 8);
+}
+
static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
const struct bpf_reg_state *src_reg)
{
@@ -15488,8 +15900,17 @@ static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
case BPF_XOR:
case BPF_OR:
case BPF_MUL:
+ case BPF_END:
return true;
+ /*
+ * Division and modulo operators range is only safe to compute when the
+ * divisor is a constant.
+ */
+ case BPF_DIV:
+ case BPF_MOD:
+ return src_is_const;
+
/* Shift operators range is only computable if shift dimension operand
* is a constant. Shifts greater than 31 or 63 are undefined. This
* includes shifts by a negative number.
@@ -15503,6 +15924,35 @@ static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
}
}
+static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ struct bpf_reg_state *dst_reg)
+{
+ struct bpf_verifier_state *branch;
+ struct bpf_reg_state *regs;
+ bool alu32;
+
+ if (dst_reg->smin_value == -1 && dst_reg->smax_value == 0)
+ alu32 = false;
+ else if (dst_reg->s32_min_value == -1 && dst_reg->s32_max_value == 0)
+ alu32 = true;
+ else
+ return 0;
+
+ branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
+ if (IS_ERR(branch))
+ return PTR_ERR(branch);
+
+ regs = branch->frame[branch->curframe]->regs;
+ if (alu32) {
+ __mark_reg32_known(&regs[insn->dst_reg], 0);
+ __mark_reg32_known(dst_reg, -1ull);
+ } else {
+ __mark_reg_known(&regs[insn->dst_reg], 0);
+ __mark_reg_known(dst_reg, -1ull);
+ }
+ return 0;
+}
+
/* WARNING: This function does calculations on 64-bit values, but the actual
* execution may occur on 32-bit values. Therefore, things like bitshifts
* need extra checks in the 32-bit case.
@@ -15513,6 +15963,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
struct bpf_reg_state src_reg)
{
u8 opcode = BPF_OP(insn->code);
+ s16 off = insn->off;
bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
int ret;
@@ -15564,12 +16015,54 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
scalar32_min_max_mul(dst_reg, &src_reg);
scalar_min_max_mul(dst_reg, &src_reg);
break;
+ case BPF_DIV:
+ /* BPF div specification: x / 0 = 0 */
+ if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0)) {
+ ___mark_reg_known(dst_reg, 0);
+ break;
+ }
+ if (alu32)
+ if (off == 1)
+ scalar32_min_max_sdiv(dst_reg, &src_reg);
+ else
+ scalar32_min_max_udiv(dst_reg, &src_reg);
+ else
+ if (off == 1)
+ scalar_min_max_sdiv(dst_reg, &src_reg);
+ else
+ scalar_min_max_udiv(dst_reg, &src_reg);
+ break;
+ case BPF_MOD:
+ /* BPF mod specification: x % 0 = x */
+ if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0))
+ break;
+ if (alu32)
+ if (off == 1)
+ scalar32_min_max_smod(dst_reg, &src_reg);
+ else
+ scalar32_min_max_umod(dst_reg, &src_reg);
+ else
+ if (off == 1)
+ scalar_min_max_smod(dst_reg, &src_reg);
+ else
+ scalar_min_max_umod(dst_reg, &src_reg);
+ break;
case BPF_AND:
+ if (tnum_is_const(src_reg.var_off)) {
+ ret = maybe_fork_scalars(env, insn, dst_reg);
+ if (ret)
+ return ret;
+ }
dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
scalar32_min_max_and(dst_reg, &src_reg);
scalar_min_max_and(dst_reg, &src_reg);
break;
case BPF_OR:
+ if (tnum_is_const(src_reg.var_off)) {
+ ret = maybe_fork_scalars(env, insn, dst_reg);
+ if (ret)
+ return ret;
+ }
dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
scalar32_min_max_or(dst_reg, &src_reg);
scalar_min_max_or(dst_reg, &src_reg);
@@ -15597,12 +16090,23 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
else
scalar_min_max_arsh(dst_reg, &src_reg);
break;
+ case BPF_END:
+ scalar_byte_swap(dst_reg, insn);
+ break;
default:
break;
}
- /* ALU32 ops are zero extended into 64bit register */
- if (alu32)
+ /*
+ * ALU32 ops are zero extended into 64bit register.
+ *
+ * BPF_END is already handled inside the helper (truncation),
+ * so skip zext here to avoid unexpected zero extension.
+ * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40
+ * This is a 64bit byte swap operation with alu32==true,
+ * but we should not zero extend the result.
+ */
+ if (alu32 && opcode != BPF_END)
zext_32_to_64(dst_reg);
reg_bounds_sync(dst_reg);
return 0;
@@ -15705,6 +16209,13 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
verbose(env, "verifier internal error: no src_reg\n");
return -EFAULT;
}
+ /*
+ * For alu32 linked register tracking, we need to check dst_reg's
+ * umax_value before the ALU operation. After adjust_scalar_min_max_vals(),
+ * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX.
+ */
+ u64 dst_umax = dst_reg->umax_value;
+
err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
if (err)
return err;
@@ -15714,26 +16225,44 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
* r1 += 0x1
* if r2 < 1000 goto ...
* use r1 in memory access
- * So for 64-bit alu remember constant delta between r2 and r1 and
- * update r1 after 'if' condition.
+ * So remember constant delta between r2 and r1 and update r1 after
+ * 'if' condition.
*/
if (env->bpf_capable &&
- BPF_OP(insn->code) == BPF_ADD && !alu32 &&
- dst_reg->id && is_reg_const(src_reg, false)) {
- u64 val = reg_const_value(src_reg, false);
+ (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) &&
+ dst_reg->id && is_reg_const(src_reg, alu32)) {
+ u64 val = reg_const_value(src_reg, alu32);
+ s32 off;
+
+ if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX))
+ goto clear_id;
+
+ if (alu32 && (dst_umax > U32_MAX))
+ goto clear_id;
- if ((dst_reg->id & BPF_ADD_CONST) ||
- /* prevent overflow in sync_linked_regs() later */
- val > (u32)S32_MAX) {
+ off = (s32)val;
+
+ if (BPF_OP(insn->code) == BPF_SUB) {
+ /* Negating S32_MIN would overflow */
+ if (off == S32_MIN)
+ goto clear_id;
+ off = -off;
+ }
+
+ if (dst_reg->id & BPF_ADD_CONST) {
/*
* If the register already went through rX += val
* we cannot accumulate another val into rx->off.
*/
+clear_id:
dst_reg->off = 0;
dst_reg->id = 0;
} else {
- dst_reg->id |= BPF_ADD_CONST;
- dst_reg->off = val;
+ if (alu32)
+ dst_reg->id |= BPF_ADD_CONST32;
+ else
+ dst_reg->id |= BPF_ADD_CONST64;
+ dst_reg->off = off;
}
} else {
/*
@@ -15782,7 +16311,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
/* check dest operand */
- if (opcode == BPF_NEG &&
+ if ((opcode == BPF_NEG || opcode == BPF_END) &&
regs[insn->dst_reg].type == SCALAR_VALUE) {
err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
err = err ?: adjust_scalar_min_max_vals(env, insn,
@@ -16802,8 +17331,8 @@ static void collect_linked_regs(struct bpf_verifier_state *vstate, u32 id,
/* For all R in linked_regs, copy known_reg range into R
* if R->id == known_reg->id.
*/
-static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg,
- struct linked_regs *linked_regs)
+static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate,
+ struct bpf_reg_state *known_reg, struct linked_regs *linked_regs)
{
struct bpf_reg_state fake_reg;
struct bpf_reg_state *reg;
@@ -16827,23 +17356,32 @@ static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_s
} else {
s32 saved_subreg_def = reg->subreg_def;
s32 saved_off = reg->off;
+ u32 saved_id = reg->id;
fake_reg.type = SCALAR_VALUE;
- __mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off);
+ __mark_reg_known(&fake_reg, (s64)reg->off - (s64)known_reg->off);
/* reg = known_reg; reg += delta */
copy_register_state(reg, known_reg);
/*
- * Must preserve off, id and add_const flag,
+ * Must preserve off, id and subreg_def flag,
* otherwise another sync_linked_regs() will be incorrect.
*/
reg->off = saved_off;
+ reg->id = saved_id;
reg->subreg_def = saved_subreg_def;
scalar32_min_max_add(reg, &fake_reg);
scalar_min_max_add(reg, &fake_reg);
reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
+ if (known_reg->id & BPF_ADD_CONST32)
+ zext_32_to_64(reg);
+ reg_bounds_sync(reg);
}
+ if (e->is_reg)
+ mark_reg_scratched(env, e->regno);
+ else
+ mark_stack_slot_scratched(env, e->spi);
}
}
@@ -17030,13 +17568,15 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
if (BPF_SRC(insn->code) == BPF_X &&
src_reg->type == SCALAR_VALUE && src_reg->id &&
!WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
- sync_linked_regs(this_branch, src_reg, &linked_regs);
- sync_linked_regs(other_branch, &other_branch_regs[insn->src_reg], &linked_regs);
+ sync_linked_regs(env, this_branch, src_reg, &linked_regs);
+ sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg],
+ &linked_regs);
}
if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
!WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
- sync_linked_regs(this_branch, dst_reg, &linked_regs);
- sync_linked_regs(other_branch, &other_branch_regs[insn->dst_reg], &linked_regs);
+ sync_linked_regs(env, this_branch, dst_reg, &linked_regs);
+ sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg],
+ &linked_regs);
}
/* if one pointer register is compared to another pointer
@@ -17411,6 +17951,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
switch (env->prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
range = retval_range(0, 0);
break;
case BPF_TRACE_RAW_TP:
@@ -17693,6 +18234,10 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
switch (imm) {
#ifdef CONFIG_X86_64
case BPF_FUNC_get_smp_processor_id:
+#ifdef CONFIG_SMP
+ case BPF_FUNC_get_current_task_btf:
+ case BPF_FUNC_get_current_task:
+#endif
return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
#endif
default:
@@ -17737,7 +18282,7 @@ static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call
if (bpf_pseudo_kfunc_call(call)) {
int err;
- err = fetch_kfunc_meta(env, call, &meta, NULL);
+ err = fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
if (err < 0)
/* error would be reported later */
return false;
@@ -18245,7 +18790,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
struct bpf_kfunc_call_arg_meta meta;
- ret = fetch_kfunc_meta(env, insn, &meta, NULL);
+ ret = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
if (ret == 0 && is_iter_next_kfunc(&meta)) {
mark_prune_point(env, t);
/* Checking and saving state checkpoints at iter_next() call
@@ -18948,30 +19493,49 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
if (old_id == 0) /* cur_id == 0 as well */
return true;
- for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
- if (!map[i].old) {
- /* Reached an empty slot; haven't seen this id before */
- map[i].old = old_id;
- map[i].cur = cur_id;
- return true;
- }
+ for (i = 0; i < idmap->cnt; i++) {
if (map[i].old == old_id)
return map[i].cur == cur_id;
if (map[i].cur == cur_id)
return false;
}
+
+ /* Reached the end of known mappings; haven't seen this id before */
+ if (idmap->cnt < BPF_ID_MAP_SIZE) {
+ map[idmap->cnt].old = old_id;
+ map[idmap->cnt].cur = cur_id;
+ idmap->cnt++;
+ return true;
+ }
+
/* We ran out of idmap slots, which should be impossible */
WARN_ON_ONCE(1);
return false;
}
-/* Similar to check_ids(), but allocate a unique temporary ID
- * for 'old_id' or 'cur_id' of zero.
- * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
+/*
+ * Compare scalar register IDs for state equivalence.
+ *
+ * When old_id == 0, the old register is independent - not linked to any
+ * other register. Any linking in the current state only adds constraints,
+ * making it more restrictive. Since the old state didn't rely on any ID
+ * relationships for this register, it's always safe to accept cur regardless
+ * of its ID. Hence, return true immediately.
+ *
+ * When old_id != 0 but cur_id == 0, we need to ensure that different
+ * independent registers in cur don't incorrectly satisfy the ID matching
+ * requirements of linked registers in old.
+ *
+ * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0
+ * and r7.id=0 (both independent), without temp IDs both would map old_id=X
+ * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map
+ * X->temp2, but X is already mapped to temp1, so the check fails correctly.
*/
static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
{
- old_id = old_id ? old_id : ++idmap->tmp_id_gen;
+ if (!old_id)
+ return true;
+
cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
return check_ids(old_id, cur_id, idmap);
@@ -19045,6 +19609,72 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
* doesn't meant that the states are DONE. The verifier has to compare
* the callsites
*/
+
+/* Find id in idset and increment its count, or add new entry */
+static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
+{
+ u32 i;
+
+ for (i = 0; i < idset->num_ids; i++) {
+ if (idset->entries[i].id == id) {
+ idset->entries[i].cnt++;
+ return;
+ }
+ }
+ /* New id */
+ if (idset->num_ids < BPF_ID_MAP_SIZE) {
+ idset->entries[idset->num_ids].id = id;
+ idset->entries[idset->num_ids].cnt = 1;
+ idset->num_ids++;
+ }
+}
+
+/* Find id in idset and return its count, or 0 if not found */
+static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
+{
+ u32 i;
+
+ for (i = 0; i < idset->num_ids; i++) {
+ if (idset->entries[i].id == id)
+ return idset->entries[i].cnt;
+ }
+ return 0;
+}
+
+/*
+ * Clear singular scalar ids in a state.
+ * A register with a non-zero id is called singular if no other register shares
+ * the same base id. Such registers can be treated as independent (id=0).
+ */
+static void clear_singular_ids(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
+{
+ struct bpf_idset *idset = &env->idset_scratch;
+ struct bpf_func_state *func;
+ struct bpf_reg_state *reg;
+
+ idset->num_ids = 0;
+
+ bpf_for_each_reg_in_vstate(st, func, reg, ({
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ if (!reg->id)
+ continue;
+ idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST);
+ }));
+
+ bpf_for_each_reg_in_vstate(st, func, reg, ({
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ if (!reg->id)
+ continue;
+ if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1) {
+ reg->id = 0;
+ reg->off = 0;
+ }
+ }));
+}
+
static void clean_live_states(struct bpf_verifier_env *env, int insn,
struct bpf_verifier_state *cur)
{
@@ -19091,11 +19721,9 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
if (exact == EXACT)
return regs_exact(rold, rcur, idmap);
- if (rold->type == NOT_INIT) {
- if (exact == NOT_EXACT || rcur->type == NOT_INIT)
- /* explored state can't have used this */
- return true;
- }
+ if (rold->type == NOT_INIT)
+ /* explored state can't have used this */
+ return true;
/* Enforce that register types have to match exactly, including their
* modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
@@ -19132,11 +19760,21 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
}
if (!rold->precise && exact == NOT_EXACT)
return true;
- if ((rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
- return false;
- if ((rold->id & BPF_ADD_CONST) && (rold->off != rcur->off))
- return false;
- /* Why check_ids() for scalar registers?
+ /*
+ * Linked register tracking uses rold->id to detect relationships.
+ * When rold->id == 0, the register is independent and any linking
+ * in rcur only adds constraints. When rold->id != 0, we must verify
+ * id mapping and (for BPF_ADD_CONST) offset consistency.
+ *
+ * +------------------+-----------+------------------+---------------+
+ * | | rold->id | rold + ADD_CONST | rold->id == 0 |
+ * |------------------+-----------+------------------+---------------|
+ * | rcur->id | range,ids | false | range |
+ * | rcur + ADD_CONST | false | range,ids,off | range |
+ * | rcur->id == 0 | range,ids | false | range |
+ * +------------------+-----------+------------------+---------------+
+ *
+ * Why check_ids() for scalar registers?
*
* Consider the following BPF code:
* 1: r6 = ... unbound scalar, ID=a ...
@@ -19160,9 +19798,22 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
* ---
* Also verify that new value satisfies old value range knowledge.
*/
- return range_within(rold, rcur) &&
- tnum_in(rold->var_off, rcur->var_off) &&
- check_scalar_ids(rold->id, rcur->id, idmap);
+
+ /* ADD_CONST mismatch: different linking semantics */
+ if ((rold->id & BPF_ADD_CONST) && !(rcur->id & BPF_ADD_CONST))
+ return false;
+
+ if (rold->id && !(rold->id & BPF_ADD_CONST) && (rcur->id & BPF_ADD_CONST))
+ return false;
+
+ /* Both have offset linkage: offsets must match */
+ if ((rold->id & BPF_ADD_CONST) && rold->off != rcur->off)
+ return false;
+
+ if (!check_scalar_ids(rold->id, rcur->id, idmap))
+ return false;
+
+ return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
case PTR_TO_MEM:
@@ -19264,7 +19915,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
spi = i / BPF_REG_SIZE;
- if (exact != NOT_EXACT &&
+ if (exact == EXACT &&
(i >= cur->allocated_stack ||
old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
@@ -19470,8 +20121,10 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
static void reset_idmap_scratch(struct bpf_verifier_env *env)
{
- env->idmap_scratch.tmp_id_gen = env->id_gen;
- memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
+ struct bpf_idmap *idmap = &env->idmap_scratch;
+
+ idmap->tmp_id_gen = env->id_gen;
+ idmap->cnt = 0;
}
static bool states_equal(struct bpf_verifier_env *env,
@@ -19835,8 +20488,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
}
}
if (bpf_calls_callback(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
+ if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ loop = true;
goto hit;
+ }
goto skip_inf_loop_check;
}
/* attempt to detect infinite loop to avoid unnecessary doomed work */
@@ -20041,6 +20696,8 @@ miss:
if (env->bpf_capable)
mark_all_scalars_imprecise(env, cur);
+ clear_singular_ids(env, cur);
+
/* add new state to the head of linked list */
new = &new_sl->state;
err = copy_verifier_state(new, cur);
@@ -20611,17 +21268,19 @@ static int do_check(struct bpf_verifier_env *env)
* may skip a nospec patched-in after the jump. This can
* currently never happen because nospec_result is only
* used for the write-ops
- * `*(size*)(dst_reg+off)=src_reg|imm32` which must
- * never skip the following insn. Still, add a warning
- * to document this in case nospec_result is used
- * elsewhere in the future.
+ * `*(size*)(dst_reg+off)=src_reg|imm32` and helper
+ * calls. These must never skip the following insn
+ * (i.e., bpf_insn_successors()'s opcode_info.can_jump
+ * is false). Still, add a warning to document this in
+ * case nospec_result is used elsewhere in the future.
*
* All non-branch instructions have a single
* fall-through edge. For these, nospec_result should
* already work.
*/
- if (verifier_bug_if(BPF_CLASS(insn->code) == BPF_JMP ||
- BPF_CLASS(insn->code) == BPF_JMP32, env,
+ if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP ||
+ BPF_CLASS(insn->code) == BPF_JMP32) &&
+ BPF_OP(insn->code) != BPF_CALL, env,
"speculation barrier after jump instruction may not have the desired effect"))
return -EFAULT;
process_bpf_exit:
@@ -20660,12 +21319,7 @@ static int find_btf_percpu_datasec(struct btf *btf)
* types to look at only module's own BTF types.
*/
n = btf_nr_types(btf);
- if (btf_is_module(btf))
- i = btf_nr_types(btf_vmlinux);
- else
- i = 1;
-
- for(; i < n; i++) {
+ for (i = btf_named_start_id(btf, true); i < n; i++) {
t = btf_type_by_id(btf, i);
if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
continue;
@@ -20890,20 +21544,6 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
}
}
- if (btf_record_has_field(map->record, BPF_TIMER)) {
- if (is_tracing_prog_type(prog_type)) {
- verbose(env, "tracing progs cannot use bpf_timer yet\n");
- return -EINVAL;
- }
- }
-
- if (btf_record_has_field(map->record, BPF_WORKQUEUE)) {
- if (is_tracing_prog_type(prog_type)) {
- verbose(env, "tracing progs cannot use bpf_wq yet\n");
- return -EINVAL;
- }
- }
-
if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
!bpf_offload_prog_map_match(prog, map)) {
verbose(env, "offload device mismatch between prog and map\n");
@@ -20935,6 +21575,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_STACK:
case BPF_MAP_TYPE_ARENA:
case BPF_MAP_TYPE_INSN_ARRAY:
+ case BPF_MAP_TYPE_PROG_ARRAY:
break;
default:
verbose(env,
@@ -21141,11 +21782,6 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
} else {
u32 off = insn[1].imm;
- if (off >= BPF_MAX_VAR_OFF) {
- verbose(env, "direct value offset of %u is not allowed\n", off);
- return -EINVAL;
- }
-
if (!map->ops->map_direct_value_addr) {
verbose(env, "no direct value access support for this map type\n");
return -EINVAL;
@@ -22446,6 +23082,12 @@ static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc
} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
if (!env->insn_aux_data[insn_idx].non_sleepable)
addr = (unsigned long)bpf_dynptr_from_file_sleepable;
+ } else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
+ if (env->insn_aux_data[insn_idx].non_sleepable)
+ addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
+ } else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
+ if (env->insn_aux_data[insn_idx].non_sleepable)
+ addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
}
desc->addr = addr;
return 0;
@@ -22498,8 +23140,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (!bpf_jit_supports_far_kfunc_call())
insn->imm = BPF_CALL_IMM(desc->addr);
- if (insn->off)
- return 0;
+
if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
@@ -22565,6 +23206,36 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
+ env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
+ /*
+ * inline the bpf_session_is_return() for fsession:
+ * bool bpf_session_is_return(void *ctx)
+ * {
+ * return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
+ * }
+ */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
+ *cnt = 3;
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
+ env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
+ /*
+ * inline bpf_session_cookie() for fsession:
+ * __u64 *bpf_session_cookie(void *ctx)
+ * {
+ * u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
+ * return &((u64 *)ctx)[-off];
+ * }
+ */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+ insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
+ insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
+ insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
+ *cnt = 6;
}
if (env->insn_aux_data[insn_idx].arg_prog) {
@@ -23278,21 +23949,48 @@ patch_map_ops_generic:
insn = new_prog->insnsi + i + delta;
goto next_insn;
}
+
+ /* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
+ if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
+ verifier_inlines_helper_call(env, insn->imm)) {
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&current_task);
+ insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
+ insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
+ cnt = 3;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
#endif
/* Implement bpf_get_func_arg inline. */
if (prog_type == BPF_PROG_TYPE_TRACING &&
insn->imm == BPF_FUNC_get_func_arg) {
- /* Load nr_args from ctx - 8 */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
- insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
- insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
- insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
- insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
- insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
- insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
- insn_buf[7] = BPF_JMP_A(1);
- insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
- cnt = 9;
+ if (eatype == BPF_TRACE_RAW_TP) {
+ int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
+
+ /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
+ cnt = 1;
+ } else {
+ /* Load nr_args from ctx - 8 */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+ cnt = 2;
+ }
+ insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
+ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
+ insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
+ insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
+ insn_buf[cnt++] = BPF_JMP_A(1);
+ insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
@@ -23308,15 +24006,17 @@ patch_map_ops_generic:
if (prog_type == BPF_PROG_TYPE_TRACING &&
insn->imm == BPF_FUNC_get_func_ret) {
if (eatype == BPF_TRACE_FEXIT ||
+ eatype == BPF_TRACE_FSESSION ||
eatype == BPF_MODIFY_RETURN) {
/* Load nr_args from ctx - 8 */
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
- insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
- insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
- insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
- insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
- insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
- cnt = 6;
+ insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
+ insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
+ insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
+ insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
+ insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
+ cnt = 7;
} else {
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
cnt = 1;
@@ -23335,13 +24035,24 @@ patch_map_ops_generic:
/* Implement get_func_arg_cnt inline. */
if (prog_type == BPF_PROG_TYPE_TRACING &&
insn->imm == BPF_FUNC_get_func_arg_cnt) {
- /* Load nr_args from ctx - 8 */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ if (eatype == BPF_TRACE_RAW_TP) {
+ int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
+ /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
+ cnt = 1;
+ } else {
+ /* Load nr_args from ctx - 8 */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+ cnt = 2;
+ }
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
+ delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
goto next_insn;
@@ -24252,7 +24963,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
prog_extension &&
(tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
- tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
+ tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) {
/* Program extensions can extend all program types
* except fentry/fexit. The reason is the following.
* The fentry/fexit programs are used for performance
@@ -24267,7 +24979,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
* beyond reasonable stack size. Hence extending fentry
* is not allowed.
*/
- bpf_log(log, "Cannot extend fentry/fexit\n");
+ bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
return -EINVAL;
}
} else {
@@ -24351,6 +25063,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
case BPF_LSM_CGROUP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
+ if (prog->expected_attach_type == BPF_TRACE_FSESSION &&
+ !bpf_jit_supports_fsession()) {
+ bpf_log(log, "JIT does not support fsession\n");
+ return -EOPNOTSUPP;
+ }
if (!btf_type_is_func(t)) {
bpf_log(log, "attach_btf_id %u is not a function\n",
btf_id);
@@ -24517,6 +25235,7 @@ static bool can_be_sleepable(struct bpf_prog *prog)
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
case BPF_TRACE_ITER:
+ case BPF_TRACE_FSESSION:
return true;
default:
return false;
@@ -24598,9 +25317,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
tgt_info.tgt_name);
return -EINVAL;
} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ prog->expected_attach_type == BPF_TRACE_FSESSION ||
prog->expected_attach_type == BPF_MODIFY_RETURN) &&
btf_id_set_contains(&noreturn_deny, btf_id)) {
- verbose(env, "Attaching fexit/fmod_ret to __noreturn function '%s' is rejected.\n",
+ verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
tgt_info.tgt_name);
return -EINVAL;
}
@@ -24809,6 +25529,12 @@ static void compute_insn_live_regs(struct bpf_verifier_env *env,
case BPF_JMP32:
switch (code) {
case BPF_JA:
+ def = 0;
+ if (BPF_SRC(insn->code) == BPF_X)
+ use = dst;
+ else
+ use = 0;
+ break;
case BPF_JCOND:
def = 0;
use = 0;
@@ -25076,15 +25802,18 @@ dfs_continue:
}
/*
* Assign SCC number only if component has two or more elements,
- * or if component has a self reference.
+ * or if component has a self reference, or if instruction is a
+ * callback calling function (implicit loop).
*/
- assign_scc = stack[stack_sz - 1] != w;
- for (j = 0; j < succ->cnt; ++j) {
+ assign_scc = stack[stack_sz - 1] != w; /* two or more elements? */
+ for (j = 0; j < succ->cnt; ++j) { /* self reference? */
if (succ->items[j] == w) {
assign_scc = true;
break;
}
}
+ if (bpf_calls_callback(env, w)) /* implicit loop? */
+ assign_scc = true;
/* Pop component elements from stack */
do {
t = stack[--stack_sz];
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 0bb8fa927e9e..7ccd84c17792 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -7275,9 +7275,9 @@ BTF_ID_FLAGS(func, scx_bpf_dsq_peek, KF_RCU_PROTECTED | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_scx_dsq_new, KF_ITER_NEW | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_iter_scx_dsq_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_scx_dsq_destroy, KF_ITER_DESTROY)
-BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, scx_bpf_dump_bstr, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, scx_bpf_exit_bstr)
+BTF_ID_FLAGS(func, scx_bpf_error_bstr)
+BTF_ID_FLAGS(func, scx_bpf_dump_bstr)
BTF_ID_FLAGS(func, scx_bpf_reenqueue_local___v2)
BTF_ID_FLAGS(func, scx_bpf_cpuperf_cap)
BTF_ID_FLAGS(func, scx_bpf_cpuperf_cur)
@@ -7296,7 +7296,7 @@ BTF_ID_FLAGS(func, scx_bpf_cpu_curr, KF_RET_NULL | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, scx_bpf_task_cgroup, KF_RCU | KF_ACQUIRE)
#endif
BTF_ID_FLAGS(func, scx_bpf_now)
-BTF_ID_FLAGS(func, scx_bpf_events, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, scx_bpf_events)
BTF_KFUNCS_END(scx_kfunc_ids_any)
static const struct btf_kfunc_id_set scx_kfunc_set_any = {
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index bfa2ec46e075..d7042a09fe46 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -50,6 +50,9 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
bool
+config HAVE_SINGLE_FTRACE_DIRECT_OPS
+ bool
+
config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
bool
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index fe28d86f7c35..f7baeb8278ca 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -830,7 +830,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struc
info.si_code = SI_KERNEL;
info.si_pid = 0;
info.si_uid = 0;
- info.si_value.sival_ptr = (void *)(unsigned long)value;
+ info.si_value.sival_ptr = (void __user __force *)(unsigned long)value;
siginfo = &info;
}
@@ -1022,7 +1022,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
.func = bpf_snprintf_btf,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM,
+ .arg1_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE,
@@ -1194,7 +1194,7 @@ const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value)
{
/* This helper call is inlined by verifier. */
- u64 nr_args = ((u64 *)ctx)[-1];
+ u64 nr_args = ((u64 *)ctx)[-1] & 0xFF;
if ((u64) n >= nr_args)
return -EINVAL;
@@ -1214,7 +1214,7 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = {
BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
{
/* This helper call is inlined by verifier. */
- u64 nr_args = ((u64 *)ctx)[-1];
+ u64 nr_args = ((u64 *)ctx)[-1] & 0xFF;
*value = ((u64 *)ctx)[nr_args];
return 0;
@@ -1231,7 +1231,7 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = {
BPF_CALL_1(get_func_arg_cnt, void *, ctx)
{
/* This helper call is inlined by verifier. */
- return ((u64 *)ctx)[-1];
+ return ((u64 *)ctx)[-1] & 0xFF;
}
static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
@@ -1286,7 +1286,8 @@ static bool is_kprobe_multi(const struct bpf_prog *prog)
static inline bool is_kprobe_session(const struct bpf_prog *prog)
{
- return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
+ return prog->type == BPF_PROG_TYPE_KPROBE &&
+ prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
}
static inline bool is_uprobe_multi(const struct bpf_prog *prog)
@@ -1297,7 +1298,14 @@ static inline bool is_uprobe_multi(const struct bpf_prog *prog)
static inline bool is_uprobe_session(const struct bpf_prog *prog)
{
- return prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
+ return prog->type == BPF_PROG_TYPE_KPROBE &&
+ prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
+}
+
+static inline bool is_trace_fsession(const struct bpf_prog *prog)
+{
+ return prog->type == BPF_PROG_TYPE_TRACING &&
+ prog->expected_attach_type == BPF_TRACE_FSESSION;
}
static const struct bpf_func_proto *
@@ -1526,7 +1534,7 @@ static const struct bpf_func_proto bpf_read_branch_records_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg2_type = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
@@ -1661,7 +1669,7 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
@@ -1734,11 +1742,17 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_d_path:
return &bpf_d_path_proto;
case BPF_FUNC_get_func_arg:
- return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL;
+ if (bpf_prog_has_trampoline(prog) ||
+ prog->expected_attach_type == BPF_TRACE_RAW_TP)
+ return &bpf_get_func_arg_proto;
+ return NULL;
case BPF_FUNC_get_func_ret:
return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL;
case BPF_FUNC_get_func_arg_cnt:
- return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL;
+ if (bpf_prog_has_trampoline(prog) ||
+ prog->expected_attach_type == BPF_TRACE_RAW_TP)
+ return &bpf_get_func_arg_cnt_proto;
+ return NULL;
case BPF_FUNC_get_attach_cookie:
if (prog->type == BPF_PROG_TYPE_TRACING &&
prog->expected_attach_type == BPF_TRACE_RAW_TP)
@@ -2063,7 +2077,7 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
struct bpf_trace_run_ctx run_ctx;
cant_sleep();
- if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
+ if (unlikely(!bpf_prog_get_recursion_context(prog))) {
bpf_prog_inc_misses_counter(prog);
goto out;
}
@@ -2077,7 +2091,7 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
bpf_reset_run_ctx(old_run_ctx);
out:
- this_cpu_dec(*(prog->active));
+ bpf_prog_put_recursion_context(prog);
}
#define UNPACK(...) __VA_ARGS__
@@ -2564,6 +2578,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
+ ftrace_partial_regs_update(fregs, bpf_kprobe_multi_pt_regs_ptr());
rcu_read_unlock();
out:
@@ -3316,7 +3331,7 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
__bpf_kfunc_start_defs();
-__bpf_kfunc bool bpf_session_is_return(void)
+__bpf_kfunc bool bpf_session_is_return(void *ctx)
{
struct bpf_session_run_ctx *session_ctx;
@@ -3324,7 +3339,7 @@ __bpf_kfunc bool bpf_session_is_return(void)
return session_ctx->is_return;
}
-__bpf_kfunc __u64 *bpf_session_cookie(void)
+__bpf_kfunc __u64 *bpf_session_cookie(void *ctx)
{
struct bpf_session_run_ctx *session_ctx;
@@ -3334,34 +3349,39 @@ __bpf_kfunc __u64 *bpf_session_cookie(void)
__bpf_kfunc_end_defs();
-BTF_KFUNCS_START(kprobe_multi_kfunc_set_ids)
+BTF_KFUNCS_START(session_kfunc_set_ids)
BTF_ID_FLAGS(func, bpf_session_is_return)
BTF_ID_FLAGS(func, bpf_session_cookie)
-BTF_KFUNCS_END(kprobe_multi_kfunc_set_ids)
+BTF_KFUNCS_END(session_kfunc_set_ids)
-static int bpf_kprobe_multi_filter(const struct bpf_prog *prog, u32 kfunc_id)
+static int bpf_session_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
- if (!btf_id_set8_contains(&kprobe_multi_kfunc_set_ids, kfunc_id))
+ if (!btf_id_set8_contains(&session_kfunc_set_ids, kfunc_id))
return 0;
- if (!is_kprobe_session(prog) && !is_uprobe_session(prog))
+ if (!is_kprobe_session(prog) && !is_uprobe_session(prog) && !is_trace_fsession(prog))
return -EACCES;
return 0;
}
-static const struct btf_kfunc_id_set bpf_kprobe_multi_kfunc_set = {
+static const struct btf_kfunc_id_set bpf_session_kfunc_set = {
.owner = THIS_MODULE,
- .set = &kprobe_multi_kfunc_set_ids,
- .filter = bpf_kprobe_multi_filter,
+ .set = &session_kfunc_set_ids,
+ .filter = bpf_session_filter,
};
-static int __init bpf_kprobe_multi_kfuncs_init(void)
+static int __init bpf_trace_kfuncs_init(void)
{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_kprobe_multi_kfunc_set);
+ int err = 0;
+
+ err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_session_kfunc_set);
+ err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_session_kfunc_set);
+
+ return err;
}
-late_initcall(bpf_kprobe_multi_kfuncs_init);
+late_initcall(bpf_trace_kfuncs_init);
typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
@@ -3517,7 +3537,7 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid
__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign)
{
- return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
copy_user_data_nofault, NULL);
}
@@ -3531,7 +3551,7 @@ __bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off,
__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign)
{
- return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
copy_user_str_nofault, NULL);
}
@@ -3545,14 +3565,14 @@ __bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 of
__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign)
{
- return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
copy_user_data_sleepable, NULL);
}
__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign)
{
- return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
copy_user_str_sleepable, NULL);
}
@@ -3560,7 +3580,7 @@ __bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign,
struct task_struct *tsk)
{
- return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
copy_user_data_sleepable, tsk);
}
@@ -3568,7 +3588,7 @@ __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64
u64 size, const void __user *unsafe_ptr__ign,
struct task_struct *tsk)
{
- return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
copy_user_str_sleepable, tsk);
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index aa758efc3731..f9b10c633bdd 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -68,7 +68,6 @@
})
/* hash bits for specific function selection */
-#define FTRACE_HASH_DEFAULT_BITS 10
#define FTRACE_HASH_MAX_BITS 12
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -1210,8 +1209,8 @@ static void __add_hash_entry(struct ftrace_hash *hash,
hash->count++;
}
-static struct ftrace_func_entry *
-add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
+struct ftrace_func_entry *
+add_ftrace_hash_entry_direct(struct ftrace_hash *hash, unsigned long ip, unsigned long direct)
{
struct ftrace_func_entry *entry;
@@ -1220,11 +1219,18 @@ add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
return NULL;
entry->ip = ip;
+ entry->direct = direct;
__add_hash_entry(hash, entry);
return entry;
}
+static struct ftrace_func_entry *
+add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
+{
+ return add_ftrace_hash_entry_direct(hash, ip, 0);
+}
+
static void
free_hash_entry(struct ftrace_hash *hash,
struct ftrace_func_entry *entry)
@@ -1283,7 +1289,7 @@ static void clear_ftrace_mod_list(struct list_head *head)
mutex_unlock(&ftrace_lock);
}
-static void free_ftrace_hash(struct ftrace_hash *hash)
+void free_ftrace_hash(struct ftrace_hash *hash)
{
if (!hash || hash == EMPTY_HASH)
return;
@@ -1323,7 +1329,7 @@ void ftrace_free_filter(struct ftrace_ops *ops)
}
EXPORT_SYMBOL_GPL(ftrace_free_filter);
-static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
+struct ftrace_hash *alloc_ftrace_hash(int size_bits)
{
struct ftrace_hash *hash;
int size;
@@ -1397,7 +1403,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
size = 1 << hash->size_bits;
for (i = 0; i < size; i++) {
hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
- if (add_hash_entry(new_hash, entry->ip) == NULL)
+ if (add_ftrace_hash_entry_direct(new_hash, entry->ip, entry->direct) == NULL)
goto free_hash;
}
}
@@ -2068,7 +2074,7 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
*/
if (!ops->ops_func)
return -EBUSY;
- ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF);
+ ret = ops->ops_func(ops, rec->ip, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF);
if (ret)
return ret;
} else if (is_ipmodify) {
@@ -2624,8 +2630,13 @@ unsigned long ftrace_find_rec_direct(unsigned long ip)
static void call_direct_funcs(unsigned long ip, unsigned long pip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
- unsigned long addr = READ_ONCE(ops->direct_call);
+ unsigned long addr;
+#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
+ addr = ftrace_find_rec_direct(ip);
+#else
+ addr = READ_ONCE(ops->direct_call);
+#endif
if (!addr)
return;
@@ -6049,15 +6060,8 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
if (ftrace_hash_empty(hash))
return -EINVAL;
- /* This is a "raw" address, and this should never happen. */
- if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
- return -EINVAL;
-
mutex_lock(&direct_mutex);
- if (ops->flags & FTRACE_OPS_FL_JMP)
- addr = ftrace_jmp_set(addr);
-
/* Make sure requested entries are not already registered.. */
size = 1 << hash->size_bits;
for (i = 0; i < size; i++) {
@@ -6178,13 +6182,6 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
lockdep_assert_held_once(&direct_mutex);
- /* This is a "raw" address, and this should never happen. */
- if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
- return -EINVAL;
-
- if (ops->flags & FTRACE_OPS_FL_JMP)
- addr = ftrace_jmp_set(addr);
-
/* Enable the tmp_ops to have the same functions as the direct ops */
ftrace_ops_init(&tmp_ops);
tmp_ops.func_hash = ops->func_hash;
@@ -6289,6 +6286,368 @@ int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
return err;
}
EXPORT_SYMBOL_GPL(modify_ftrace_direct);
+
+static unsigned long hash_count(struct ftrace_hash *hash)
+{
+ return hash ? hash->count : 0;
+}
+
+/**
+ * hash_add - adds two struct ftrace_hash and returns the result
+ * @a: struct ftrace_hash object
+ * @b: struct ftrace_hash object
+ *
+ * Returns struct ftrace_hash object on success, NULL on error.
+ */
+static struct ftrace_hash *hash_add(struct ftrace_hash *a, struct ftrace_hash *b)
+{
+ struct ftrace_func_entry *entry;
+ struct ftrace_hash *add;
+ int size;
+
+ size = hash_count(a) + hash_count(b);
+ if (size > 32)
+ size = 32;
+
+ add = alloc_and_copy_ftrace_hash(fls(size), a);
+ if (!add)
+ return NULL;
+
+ size = 1 << b->size_bits;
+ for (int i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &b->buckets[i], hlist) {
+ if (add_ftrace_hash_entry_direct(add, entry->ip, entry->direct) == NULL) {
+ free_ftrace_hash(add);
+ return NULL;
+ }
+ }
+ }
+ return add;
+}
+
+/**
+ * update_ftrace_direct_add - Updates @ops by adding direct
+ * callers provided in @hash
+ * @ops: The address of the struct ftrace_ops object
+ * @hash: The address of the struct ftrace_hash object
+ *
+ * This is used to add custom direct callers (ip -> addr) to @ops,
+ * specified in @hash. The @ops will be either registered or updated.
+ *
+ * Returns: zero on success. Non zero on error, which includes:
+ * -EINVAL - The @hash is empty
+ */
+int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash)
+{
+ struct ftrace_hash *old_direct_functions = NULL;
+ struct ftrace_hash *new_direct_functions;
+ struct ftrace_hash *old_filter_hash;
+ struct ftrace_hash *new_filter_hash = NULL;
+ struct ftrace_func_entry *entry;
+ int err = -EINVAL;
+ int size;
+ bool reg;
+
+ if (!hash_count(hash))
+ return -EINVAL;
+
+ mutex_lock(&direct_mutex);
+
+ /* Make sure requested entries are not already registered. */
+ size = 1 << hash->size_bits;
+ for (int i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+ if (__ftrace_lookup_ip(direct_functions, entry->ip))
+ goto out_unlock;
+ }
+ }
+
+ old_filter_hash = ops->func_hash ? ops->func_hash->filter_hash : NULL;
+
+ /* If there's nothing in filter_hash we need to register the ops. */
+ reg = hash_count(old_filter_hash) == 0;
+ if (reg) {
+ if (ops->func || ops->trampoline)
+ goto out_unlock;
+ if (ops->flags & FTRACE_OPS_FL_ENABLED)
+ goto out_unlock;
+ }
+
+ err = -ENOMEM;
+ new_filter_hash = hash_add(old_filter_hash, hash);
+ if (!new_filter_hash)
+ goto out_unlock;
+
+ new_direct_functions = hash_add(direct_functions, hash);
+ if (!new_direct_functions)
+ goto out_unlock;
+
+ old_direct_functions = direct_functions;
+ rcu_assign_pointer(direct_functions, new_direct_functions);
+
+ if (reg) {
+ ops->func = call_direct_funcs;
+ ops->flags |= MULTI_FLAGS;
+ ops->trampoline = FTRACE_REGS_ADDR;
+ ops->local_hash.filter_hash = new_filter_hash;
+
+ err = register_ftrace_function_nolock(ops);
+ if (err) {
+ /* restore old filter on error */
+ ops->local_hash.filter_hash = old_filter_hash;
+
+ /* cleanup for possible another register call */
+ ops->func = NULL;
+ ops->trampoline = 0;
+ } else {
+ new_filter_hash = old_filter_hash;
+ }
+ } else {
+ err = ftrace_update_ops(ops, new_filter_hash, EMPTY_HASH);
+ /*
+ * new_filter_hash is dup-ed, so we need to release it anyway,
+ * old_filter_hash either stays on error or is already released
+ */
+ }
+
+ if (err) {
+ /* reset direct_functions and free the new one */
+ rcu_assign_pointer(direct_functions, old_direct_functions);
+ old_direct_functions = new_direct_functions;
+ }
+
+ out_unlock:
+ mutex_unlock(&direct_mutex);
+
+ if (old_direct_functions && old_direct_functions != EMPTY_HASH)
+ call_rcu_tasks(&old_direct_functions->rcu, register_ftrace_direct_cb);
+ free_ftrace_hash(new_filter_hash);
+
+ return err;
+}
+
+/**
+ * hash_sub - substracts @b from @a and returns the result
+ * @a: struct ftrace_hash object
+ * @b: struct ftrace_hash object
+ *
+ * Returns struct ftrace_hash object on success, NULL on error.
+ */
+static struct ftrace_hash *hash_sub(struct ftrace_hash *a, struct ftrace_hash *b)
+{
+ struct ftrace_func_entry *entry, *del;
+ struct ftrace_hash *sub;
+ int size;
+
+ sub = alloc_and_copy_ftrace_hash(a->size_bits, a);
+ if (!sub)
+ return NULL;
+
+ size = 1 << b->size_bits;
+ for (int i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &b->buckets[i], hlist) {
+ del = __ftrace_lookup_ip(sub, entry->ip);
+ if (WARN_ON_ONCE(!del)) {
+ free_ftrace_hash(sub);
+ return NULL;
+ }
+ remove_hash_entry(sub, del);
+ kfree(del);
+ }
+ }
+ return sub;
+}
+
+/**
+ * update_ftrace_direct_del - Updates @ops by removing its direct
+ * callers provided in @hash
+ * @ops: The address of the struct ftrace_ops object
+ * @hash: The address of the struct ftrace_hash object
+ *
+ * This is used to delete custom direct callers (ip -> addr) in
+ * @ops specified via @hash. The @ops will be either unregistered
+ * updated.
+ *
+ * Returns: zero on success. Non zero on error, which includes:
+ * -EINVAL - The @hash is empty
+ * -EINVAL - The @ops is not registered
+ */
+int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash)
+{
+ struct ftrace_hash *old_direct_functions = NULL;
+ struct ftrace_hash *new_direct_functions;
+ struct ftrace_hash *new_filter_hash = NULL;
+ struct ftrace_hash *old_filter_hash;
+ struct ftrace_func_entry *entry;
+ struct ftrace_func_entry *del;
+ unsigned long size;
+ int err = -EINVAL;
+
+ if (!hash_count(hash))
+ return -EINVAL;
+ if (check_direct_multi(ops))
+ return -EINVAL;
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return -EINVAL;
+ if (direct_functions == EMPTY_HASH)
+ return -EINVAL;
+
+ mutex_lock(&direct_mutex);
+
+ old_filter_hash = ops->func_hash ? ops->func_hash->filter_hash : NULL;
+
+ if (!hash_count(old_filter_hash))
+ goto out_unlock;
+
+ /* Make sure requested entries are already registered. */
+ size = 1 << hash->size_bits;
+ for (int i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+ del = __ftrace_lookup_ip(direct_functions, entry->ip);
+ if (!del || del->direct != entry->direct)
+ goto out_unlock;
+ }
+ }
+
+ err = -ENOMEM;
+ new_filter_hash = hash_sub(old_filter_hash, hash);
+ if (!new_filter_hash)
+ goto out_unlock;
+
+ new_direct_functions = hash_sub(direct_functions, hash);
+ if (!new_direct_functions)
+ goto out_unlock;
+
+ /* If there's nothing left, we need to unregister the ops. */
+ if (ftrace_hash_empty(new_filter_hash)) {
+ err = unregister_ftrace_function(ops);
+ if (!err) {
+ /* cleanup for possible another register call */
+ ops->func = NULL;
+ ops->trampoline = 0;
+ ftrace_free_filter(ops);
+ ops->func_hash->filter_hash = NULL;
+ }
+ } else {
+ err = ftrace_update_ops(ops, new_filter_hash, EMPTY_HASH);
+ /*
+ * new_filter_hash is dup-ed, so we need to release it anyway,
+ * old_filter_hash either stays on error or is already released
+ */
+ }
+
+ if (err) {
+ /* free the new_direct_functions */
+ old_direct_functions = new_direct_functions;
+ } else {
+ old_direct_functions = direct_functions;
+ rcu_assign_pointer(direct_functions, new_direct_functions);
+ }
+
+ out_unlock:
+ mutex_unlock(&direct_mutex);
+
+ if (old_direct_functions && old_direct_functions != EMPTY_HASH)
+ call_rcu_tasks(&old_direct_functions->rcu, register_ftrace_direct_cb);
+ free_ftrace_hash(new_filter_hash);
+
+ return err;
+}
+
+/**
+ * update_ftrace_direct_mod - Updates @ops by modifing its direct
+ * callers provided in @hash
+ * @ops: The address of the struct ftrace_ops object
+ * @hash: The address of the struct ftrace_hash object
+ * @do_direct_lock: If true lock the direct_mutex
+ *
+ * This is used to modify custom direct callers (ip -> addr) in
+ * @ops specified via @hash.
+ *
+ * This can be called from within ftrace ops_func callback with
+ * direct_mutex already locked, in which case @do_direct_lock
+ * needs to be false.
+ *
+ * Returns: zero on success. Non zero on error, which includes:
+ * -EINVAL - The @hash is empty
+ * -EINVAL - The @ops is not registered
+ */
+int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock)
+{
+ struct ftrace_func_entry *entry, *tmp;
+ static struct ftrace_ops tmp_ops = {
+ .func = ftrace_stub,
+ .flags = FTRACE_OPS_FL_STUB,
+ };
+ struct ftrace_hash *orig_hash;
+ unsigned long size, i;
+ int err = -EINVAL;
+
+ if (!hash_count(hash))
+ return -EINVAL;
+ if (check_direct_multi(ops))
+ return -EINVAL;
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return -EINVAL;
+ if (direct_functions == EMPTY_HASH)
+ return -EINVAL;
+
+ /*
+ * We can be called from within ops_func callback with direct_mutex
+ * already taken.
+ */
+ if (do_direct_lock)
+ mutex_lock(&direct_mutex);
+
+ orig_hash = ops->func_hash ? ops->func_hash->filter_hash : NULL;
+ if (!orig_hash)
+ goto unlock;
+
+ /* Enable the tmp_ops to have the same functions as the direct ops */
+ ftrace_ops_init(&tmp_ops);
+ tmp_ops.func_hash = ops->func_hash;
+
+ err = register_ftrace_function_nolock(&tmp_ops);
+ if (err)
+ goto unlock;
+
+ /*
+ * Call __ftrace_hash_update_ipmodify() here, so that we can call
+ * ops->ops_func for the ops. This is needed because the above
+ * register_ftrace_function_nolock() worked on tmp_ops.
+ */
+ err = __ftrace_hash_update_ipmodify(ops, orig_hash, orig_hash, true);
+ if (err)
+ goto out;
+
+ /*
+ * Now the ftrace_ops_list_func() is called to do the direct callers.
+ * We can safely change the direct functions attached to each entry.
+ */
+ mutex_lock(&ftrace_lock);
+
+ size = 1 << hash->size_bits;
+ for (i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+ tmp = __ftrace_lookup_ip(direct_functions, entry->ip);
+ if (!tmp)
+ continue;
+ tmp->direct = entry->direct;
+ }
+ }
+
+ mutex_unlock(&ftrace_lock);
+
+out:
+ /* Removing the tmp_ops will add the updated direct callers to the functions */
+ unregister_ftrace_function(&tmp_ops);
+
+unlock:
+ if (do_direct_lock)
+ mutex_unlock(&direct_mutex);
+ return err;
+}
+
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
/**
@@ -8709,7 +9068,7 @@ static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
if (!op->ops_func)
return -EBUSY;
- ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER);
+ ret = op->ops_func(op, ip, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER);
if (ret)
return ret;
}
@@ -8756,7 +9115,7 @@ static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
/* The cleanup is optional, ignore any errors */
if (found_op && op->ops_func)
- op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER);
+ op->ops_func(op, ip, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER);
}
}
mutex_unlock(&direct_mutex);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba36939fda79..60281c4f9e99 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -388,18 +388,13 @@ config DEBUG_INFO_BTF
depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED
depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
depends on BPF_SYSCALL
- depends on PAHOLE_VERSION >= 116
- depends on DEBUG_INFO_DWARF4 || PAHOLE_VERSION >= 121
+ depends on PAHOLE_VERSION >= 122
# pahole uses elfutils, which does not have support for Hexagon relocations
depends on !HEXAGON
help
Generate deduplicated BTF type information from DWARF debug info.
- Turning this on requires pahole v1.16 or later (v1.21 or later to
- support DWARF 5), which will convert DWARF type info into equivalent
- deduplicated BTF type info.
-
-config PAHOLE_HAS_SPLIT_BTF
- def_bool PAHOLE_VERSION >= 119
+ Turning this on requires pahole v1.22 or later, which will convert
+ DWARF type info into equivalent deduplicated BTF type info.
config PAHOLE_HAS_BTF_TAG
def_bool PAHOLE_VERSION >= 123
@@ -421,7 +416,7 @@ config PAHOLE_HAS_LANG_EXCLUDE
config DEBUG_INFO_BTF_MODULES
bool "Generate BTF type information for kernel modules"
default y
- depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
+ depends on DEBUG_INFO_BTF && MODULES
help
Generate compact split BTF type information for kernel modules.
diff --git a/mm/Makefile b/mm/Makefile
index 2d0570a16e5b..bf46fe31dc14 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -106,6 +106,9 @@ obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
ifdef CONFIG_SWAP
obj-$(CONFIG_MEMCG) += swap_cgroup.o
endif
+ifdef CONFIG_BPF_SYSCALL
+obj-$(CONFIG_MEMCG) += bpf_memcontrol.o
+endif
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
obj-$(CONFIG_GUP_TEST) += gup_test.o
obj-$(CONFIG_DMAPOOL_TEST) += dmapool_test.o
diff --git a/mm/bpf_memcontrol.c b/mm/bpf_memcontrol.c
new file mode 100644
index 000000000000..716df49d7647
--- /dev/null
+++ b/mm/bpf_memcontrol.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Memory Controller-related BPF kfuncs and auxiliary code
+ *
+ * Author: Roman Gushchin <roman.gushchin@linux.dev>
+ */
+
+#include <linux/memcontrol.h>
+#include <linux/bpf.h>
+
+__bpf_kfunc_start_defs();
+
+/**
+ * bpf_get_root_mem_cgroup - Returns a pointer to the root memory cgroup
+ *
+ * The function has KF_ACQUIRE semantics, even though the root memory
+ * cgroup is never destroyed after being created and doesn't require
+ * reference counting. And it's perfectly safe to pass it to
+ * bpf_put_mem_cgroup()
+ *
+ * Return: A pointer to the root memory cgroup.
+ */
+__bpf_kfunc struct mem_cgroup *bpf_get_root_mem_cgroup(void)
+{
+ if (mem_cgroup_disabled())
+ return NULL;
+
+ /* css_get() is not needed */
+ return root_mem_cgroup;
+}
+
+/**
+ * bpf_get_mem_cgroup - Get a reference to a memory cgroup
+ * @css: pointer to the css structure
+ *
+ * It's fine to pass a css which belongs to any cgroup controller,
+ * e.g. unified hierarchy's main css.
+ *
+ * Implements KF_ACQUIRE semantics.
+ *
+ * Return: A pointer to a mem_cgroup structure after bumping
+ * the corresponding css's reference counter.
+ */
+__bpf_kfunc struct mem_cgroup *
+bpf_get_mem_cgroup(struct cgroup_subsys_state *css)
+{
+ struct mem_cgroup *memcg = NULL;
+ bool rcu_unlock = false;
+
+ if (mem_cgroup_disabled() || !root_mem_cgroup)
+ return NULL;
+
+ if (root_mem_cgroup->css.ss != css->ss) {
+ struct cgroup *cgroup = css->cgroup;
+ int ssid = root_mem_cgroup->css.ss->id;
+
+ rcu_read_lock();
+ rcu_unlock = true;
+ css = rcu_dereference_raw(cgroup->subsys[ssid]);
+ }
+
+ if (css && css_tryget(css))
+ memcg = container_of(css, struct mem_cgroup, css);
+
+ if (rcu_unlock)
+ rcu_read_unlock();
+
+ return memcg;
+}
+
+/**
+ * bpf_put_mem_cgroup - Put a reference to a memory cgroup
+ * @memcg: memory cgroup to release
+ *
+ * Releases a previously acquired memcg reference.
+ * Implements KF_RELEASE semantics.
+ */
+__bpf_kfunc void bpf_put_mem_cgroup(struct mem_cgroup *memcg)
+{
+ css_put(&memcg->css);
+}
+
+/**
+ * bpf_mem_cgroup_vm_events - Read memory cgroup's vm event counter
+ * @memcg: memory cgroup
+ * @event: event id
+ *
+ * Allows to read memory cgroup event counters.
+ *
+ * Return: The current value of the corresponding events counter.
+ */
+__bpf_kfunc unsigned long bpf_mem_cgroup_vm_events(struct mem_cgroup *memcg,
+ enum vm_event_item event)
+{
+ if (unlikely(!memcg_vm_event_item_valid(event)))
+ return (unsigned long)-1;
+
+ return memcg_events(memcg, event);
+}
+
+/**
+ * bpf_mem_cgroup_usage - Read memory cgroup's usage
+ * @memcg: memory cgroup
+ *
+ * Please, note that the root memory cgroup it special and is exempt
+ * from the memory accounting. The returned value is a sum of sub-cgroup's
+ * usages and it not reflecting the size of the root memory cgroup itself.
+ * If you need to get an approximation, you can use root level statistics:
+ * e.g. NR_FILE_PAGES + NR_ANON_MAPPED.
+ *
+ * Return: The current memory cgroup size in bytes.
+ */
+__bpf_kfunc unsigned long bpf_mem_cgroup_usage(struct mem_cgroup *memcg)
+{
+ return page_counter_read(&memcg->memory) * PAGE_SIZE;
+}
+
+/**
+ * bpf_mem_cgroup_memory_events - Read memory cgroup's memory event value
+ * @memcg: memory cgroup
+ * @event: memory event id
+ *
+ * Return: The current value of the memory event counter.
+ */
+__bpf_kfunc unsigned long bpf_mem_cgroup_memory_events(struct mem_cgroup *memcg,
+ enum memcg_memory_event event)
+{
+ if (unlikely(event >= MEMCG_NR_MEMORY_EVENTS))
+ return (unsigned long)-1;
+
+ return atomic_long_read(&memcg->memory_events[event]);
+}
+
+/**
+ * bpf_mem_cgroup_page_state - Read memory cgroup's page state counter
+ * @memcg: memory cgroup
+ * @idx: counter idx
+ *
+ * Allows to read memory cgroup statistics. The output is in bytes.
+ *
+ * Return: The value of the page state counter in bytes.
+ */
+__bpf_kfunc unsigned long bpf_mem_cgroup_page_state(struct mem_cgroup *memcg, int idx)
+{
+ if (unlikely(!memcg_stat_item_valid(idx)))
+ return (unsigned long)-1;
+
+ return memcg_page_state_output(memcg, idx);
+}
+
+/**
+ * bpf_mem_cgroup_flush_stats - Flush memory cgroup's statistics
+ * @memcg: memory cgroup
+ *
+ * Propagate memory cgroup's statistics up the cgroup tree.
+ */
+__bpf_kfunc void bpf_mem_cgroup_flush_stats(struct mem_cgroup *memcg)
+{
+ mem_cgroup_flush_stats(memcg);
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(bpf_memcontrol_kfuncs)
+BTF_ID_FLAGS(func, bpf_get_root_mem_cgroup, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_get_mem_cgroup, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
+BTF_ID_FLAGS(func, bpf_put_mem_cgroup, KF_RELEASE)
+
+BTF_ID_FLAGS(func, bpf_mem_cgroup_vm_events)
+BTF_ID_FLAGS(func, bpf_mem_cgroup_memory_events)
+BTF_ID_FLAGS(func, bpf_mem_cgroup_usage)
+BTF_ID_FLAGS(func, bpf_mem_cgroup_page_state)
+BTF_ID_FLAGS(func, bpf_mem_cgroup_flush_stats, KF_SLEEPABLE)
+
+BTF_KFUNCS_END(bpf_memcontrol_kfuncs)
+
+static const struct btf_kfunc_id_set bpf_memcontrol_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_memcontrol_kfuncs,
+};
+
+static int __init bpf_memcontrol_init(void)
+{
+ int err;
+
+ err = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC,
+ &bpf_memcontrol_kfunc_set);
+ if (err)
+ pr_warn("error while registering bpf memcontrol kfuncs: %d", err);
+
+ return err;
+}
+late_initcall(bpf_memcontrol_init);
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 6358464bb416..a304ad418cdf 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -27,7 +27,6 @@ unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap);
void drain_all_stock(struct mem_cgroup *root_memcg);
unsigned long memcg_events(struct mem_cgroup *memcg, int event);
-unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item);
int memory_stat_show(struct seq_file *m, void *v);
void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 783b3b008fef..129eed3ff5bb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -665,6 +665,14 @@ unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
return x;
}
+bool memcg_stat_item_valid(int idx)
+{
+ if ((u32)idx >= MEMCG_NR_STAT)
+ return false;
+
+ return !BAD_STAT_IDX(memcg_stats_index(idx));
+}
+
static int memcg_page_state_unit(int item);
/*
@@ -862,6 +870,14 @@ unsigned long memcg_events(struct mem_cgroup *memcg, int event)
return READ_ONCE(memcg->vmstats->events[i]);
}
+bool memcg_vm_event_item_valid(enum vm_event_item idx)
+{
+ if (idx >= NR_VM_EVENT_ITEMS)
+ return false;
+
+ return !BAD_STAT_IDX(memcg_events_index(idx));
+}
+
#ifdef CONFIG_MEMCG_V1
unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
{
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 26cfcfdc45eb..178c4738e63b 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -685,6 +685,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
if (bpf_fentry_test1(1) != 2 ||
bpf_fentry_test2(2, 3) != 5 ||
bpf_fentry_test3(4, 5, 6) != 15 ||
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 850dd736ccd1..1eb3e060994e 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -40,29 +40,30 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), false);
-
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
struct bpf_local_storage *sk_storage;
+ u32 uncharge;
rcu_read_lock_dont_migrate();
sk_storage = rcu_dereference(sk->sk_bpf_storage);
if (!sk_storage)
goto out;
- bpf_local_storage_destroy(sk_storage);
+ uncharge = bpf_local_storage_destroy(sk_storage);
+ if (uncharge)
+ atomic_sub(uncharge, &sk->sk_omem_alloc);
out:
rcu_read_unlock_migrate();
}
static void bpf_sk_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &sk_cache, NULL);
+ bpf_local_storage_map_free(map, &sk_cache);
}
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
@@ -191,7 +192,14 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
}
if (new_sk_storage) {
- bpf_selem_link_map(smap, copy_selem);
+ ret = bpf_selem_link_map(smap, new_sk_storage, copy_selem);
+ if (ret) {
+ bpf_selem_free(copy_selem, true);
+ atomic_sub(smap->elem_size,
+ &newsk->sk_omem_alloc);
+ bpf_map_put(map);
+ goto out;
+ }
bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
} else {
ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
@@ -365,6 +373,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return true;
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage",
strlen("bpf_sk_storage"));
default:
diff --git a/net/core/filter.c b/net/core/filter.c
index 029e560e32ce..ba019ded773d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4137,7 +4137,7 @@ static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE,
};
@@ -6401,7 +6401,7 @@ static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -6456,7 +6456,7 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -8010,9 +8010,9 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
.gpl_only = true, /* __cookie_v4_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg1_size = sizeof(struct iphdr),
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
};
@@ -8042,9 +8042,9 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
.gpl_only = true, /* __cookie_v6_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg1_size = sizeof(struct ipv6hdr),
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
};
@@ -8062,9 +8062,9 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
.gpl_only = true, /* __cookie_v4_check is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg1_size = sizeof(struct iphdr),
- .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg2_size = sizeof(struct tcphdr),
};
@@ -8086,9 +8086,9 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
.gpl_only = true, /* __cookie_v6_check is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg1_size = sizeof(struct ipv6hdr),
- .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg2_size = sizeof(struct tcphdr),
};
#endif /* CONFIG_SYN_COOKIES */
@@ -12023,7 +12023,7 @@ BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk)
* trigger an explicit type generation here.
*/
BTF_TYPE_EMIT(struct unix_sock);
- if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX)
+ if (sk && sk_is_unix(sk))
return (unsigned long)sk;
return (unsigned long)NULL;
@@ -12440,11 +12440,11 @@ int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
}
BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
-BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
BTF_KFUNCS_START(bpf_kfunc_check_set_skb_meta)
-BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
@@ -12457,11 +12457,11 @@ BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
BTF_KFUNCS_END(bpf_kfunc_check_set_sock_addr)
BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk)
-BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk)
BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk)
BTF_KFUNCS_START(bpf_kfunc_check_set_sock_ops)
-BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp)
BTF_KFUNCS_END(bpf_kfunc_check_set_sock_ops)
static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
@@ -12556,7 +12556,7 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_sk_iter_kfunc_ids)
-BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_sock_destroy)
BTF_KFUNCS_END(bpf_sk_iter_kfunc_ids)
static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2ac7731e1e0a..ddde93dd8bc6 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -409,22 +409,26 @@ out:
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
-/* Receive sk_msg from psock->ingress_msg to @msg. */
-int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
- int len, int flags)
+int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags, int *copied_from_self)
{
struct iov_iter *iter = &msg->msg_iter;
int peek = flags & MSG_PEEK;
struct sk_msg *msg_rx;
int i, copied = 0;
+ bool from_self;
msg_rx = sk_psock_peek_msg(psock);
+ if (copied_from_self)
+ *copied_from_self = 0;
+
while (copied != len) {
struct scatterlist *sge;
if (unlikely(!msg_rx))
break;
+ from_self = msg_rx->sk == sk;
i = msg_rx->sg.start;
do {
struct page *page;
@@ -443,6 +447,9 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
copied += copy;
+ if (from_self && copied_from_self)
+ *copied_from_self += copy;
+
if (likely(!peek)) {
sge->offset += copy;
sge->length -= copy;
@@ -451,6 +458,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
atomic_sub(copy, &sk->sk_rmem_alloc);
}
msg_rx->sg.size -= copy;
+ sk_psock_msg_len_add(psock, -copy);
if (!sge->length) {
sk_msg_iter_var_next(i);
@@ -487,6 +495,13 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
out:
return copied;
}
+
+/* Receive sk_msg from psock->ingress_msg to @msg. */
+int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags)
+{
+ return __sk_msg_recvmsg(sk, psock, msg, len, flags, NULL);
+}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
bool sk_msg_is_readable(struct sock *sk)
@@ -616,6 +631,12 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
if (unlikely(!msg))
return -EAGAIN;
skb_set_owner_r(skb, sk);
+
+ /* This is used in tcp_bpf_recvmsg_parser() to determine whether the
+ * data originates from the socket's own protocol stack. No need to
+ * refcount sk because msg's lifetime is bound to sk via the ingress_msg.
+ */
+ msg->sk = sk;
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
if (err < 0)
kfree(msg);
@@ -801,9 +822,11 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
list_del(&msg->list);
if (!msg->skb)
atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
+ sk_psock_msg_len_add(psock, -msg->sg.size);
sk_msg_free(psock->sk, msg);
kfree(msg);
}
+ WARN_ON_ONCE(psock->msg_tot_len);
}
static void __sk_psock_zap_ingress(struct sk_psock *psock)
@@ -909,6 +932,7 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
sk_msg_compute_data_pointers(msg);
msg->sk = sk;
ret = bpf_prog_run_pin_on_cpu(prog, msg);
+ msg->sk = NULL;
ret = sk_psock_map_verd(ret, msg->sk_redir);
psock->apply_bytes = msg->apply_bytes;
if (ret == __SK_REDIRECT) {
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9100e160113a..fee6d080ee85 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -964,7 +964,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(xdp_metadata_kfunc_ids)
-#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
+#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
BTF_KFUNCS_END(xdp_metadata_kfunc_ids)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index a268e1595b22..ca8a5cb8e569 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -10,6 +10,7 @@
#include <net/inet_common.h>
#include <net/tls.h>
+#include <asm/ioctls.h>
void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
{
@@ -226,6 +227,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
int peek = flags & MSG_PEEK;
struct sk_psock *psock;
struct tcp_sock *tcp;
+ int copied_from_self = 0;
int copied = 0;
u32 seq;
@@ -262,7 +264,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
}
msg_bytes_ready:
- copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ copied = __sk_msg_recvmsg(sk, psock, msg, len, flags, &copied_from_self);
/* The typical case for EFAULT is the socket was gracefully
* shutdown with a FIN pkt. So check here the other case is
* some error on copy_page_to_iter which would be unexpected.
@@ -277,7 +279,7 @@ msg_bytes_ready:
goto out;
}
}
- seq += copied;
+ seq += copied_from_self;
if (!copied) {
long timeo;
int data;
@@ -331,6 +333,24 @@ unlock:
return copied;
}
+static int tcp_bpf_ioctl(struct sock *sk, int cmd, int *karg)
+{
+ bool slow;
+
+ if (cmd != SIOCINQ)
+ return tcp_ioctl(sk, cmd, karg);
+
+ /* works similar as tcp_ioctl */
+ if (sk->sk_state == TCP_LISTEN)
+ return -EINVAL;
+
+ slow = lock_sock_fast(sk);
+ *karg = sk_psock_msg_inq(sk);
+ unlock_sock_fast(sk, slow);
+
+ return 0;
+}
+
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int flags, int *addr_len)
{
@@ -609,6 +629,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
+ prot[TCP_BPF_BASE].ioctl = tcp_bpf_ioctl;
prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 0735d820e413..91233e37cd97 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -5,6 +5,7 @@
#include <net/sock.h>
#include <net/udp.h>
#include <net/inet_common.h>
+#include <asm/ioctls.h>
#include "udp_impl.h"
@@ -111,12 +112,26 @@ enum {
static DEFINE_SPINLOCK(udpv6_prot_lock);
static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
+static int udp_bpf_ioctl(struct sock *sk, int cmd, int *karg)
+{
+ if (cmd != SIOCINQ)
+ return udp_ioctl(sk, cmd, karg);
+
+ /* Since we don't hold a lock, sk_receive_queue may contain data.
+ * BPF might only be processing this data at the moment. We only
+ * care about the data in the ingress_msg here.
+ */
+ *karg = sk_msg_first_len(sk);
+ return 0;
+}
+
static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
- *prot = *base;
- prot->close = sock_map_close;
- prot->recvmsg = udp_bpf_recvmsg;
- prot->sock_is_readable = sk_msg_is_readable;
+ *prot = *base;
+ prot->close = sock_map_close;
+ prot->recvmsg = udp_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
+ prot->ioctl = udp_bpf_ioctl;
}
static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 4a136fc3a9c0..be654363f53f 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -114,8 +114,6 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
struct nf_conn *ct;
int err;
- if (!opts || !bpf_tuple)
- return ERR_PTR(-EINVAL);
if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == 12))
return ERR_PTR(-EINVAL);
if (opts_len == NF_BPF_CT_OPTS_SZ) {
@@ -299,8 +297,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
opts, opts__sz, 10);
if (IS_ERR(nfct)) {
- if (opts)
- opts->error = PTR_ERR(nfct);
+ opts->error = PTR_ERR(nfct);
return NULL;
}
@@ -334,8 +331,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
caller_net = dev_net(ctx->rxq->dev);
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
if (IS_ERR(nfct)) {
- if (opts)
- opts->error = PTR_ERR(nfct);
+ opts->error = PTR_ERR(nfct);
return NULL;
}
return nfct;
@@ -367,8 +363,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
if (IS_ERR(nfct)) {
- if (opts)
- opts->error = PTR_ERR(nfct);
+ opts->error = PTR_ERR(nfct);
return NULL;
}
@@ -402,8 +397,7 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
if (IS_ERR(nfct)) {
- if (opts)
- opts->error = PTR_ERR(nfct);
+ opts->error = PTR_ERR(nfct);
return NULL;
}
return nfct;
@@ -516,10 +510,10 @@ BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_set_timeout)
+BTF_ID_FLAGS(func, bpf_ct_change_timeout)
+BTF_ID_FLAGS(func, bpf_ct_set_status)
+BTF_ID_FLAGS(func, bpf_ct_change_status)
BTF_KFUNCS_END(nf_ct_kfunc_set)
static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
diff --git a/net/netfilter/nf_flow_table_bpf.c b/net/netfilter/nf_flow_table_bpf.c
index 4a5f5195f2d2..cbd5b97a6329 100644
--- a/net/netfilter/nf_flow_table_bpf.c
+++ b/net/netfilter/nf_flow_table_bpf.c
@@ -105,7 +105,7 @@ __diag_pop()
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(nf_ft_kfunc_set)
-BTF_ID_FLAGS(func, bpf_xdp_flow_lookup, KF_TRUSTED_ARGS | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_xdp_flow_lookup, KF_RET_NULL)
BTF_KFUNCS_END(nf_ft_kfunc_set)
static const struct btf_kfunc_id_set nf_flow_kfunc_set = {
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
index 481be15609b1..f9dd85ccea01 100644
--- a/net/netfilter/nf_nat_bpf.c
+++ b/net/netfilter/nf_nat_bpf.c
@@ -55,7 +55,7 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(nf_nat_kfunc_set)
-BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_set_nat_info)
BTF_KFUNCS_END(nf_nat_kfunc_set)
static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = {
diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c
index adcb618a2bfc..098ca02aed89 100644
--- a/net/sched/bpf_qdisc.c
+++ b/net/sched/bpf_qdisc.c
@@ -202,6 +202,12 @@ __bpf_kfunc void bpf_kfree_skb(struct sk_buff *skb)
kfree_skb(skb);
}
+__bpf_kfunc void bpf_kfree_skb_dtor(void *skb)
+{
+ bpf_kfree_skb(skb);
+}
+CFI_NOSEAL(bpf_kfree_skb_dtor);
+
/* bpf_qdisc_skb_drop - Drop an skb by adding it to a deferred free list.
* @skb: The skb whose reference to be released and dropped.
* @to_free_list: The list of skbs to be dropped.
@@ -271,14 +277,14 @@ __bpf_kfunc void bpf_qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(qdisc_kfunc_ids)
-BTF_ID_FLAGS(func, bpf_skb_get_hash, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_skb_get_hash)
BTF_ID_FLAGS(func, bpf_kfree_skb, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_qdisc_skb_drop, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_qdisc_watchdog_schedule, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_qdisc_init_prologue, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_qdisc_reset_destroy_epilogue, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_qdisc_bstats_update, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
+BTF_ID_FLAGS(func, bpf_qdisc_watchdog_schedule)
+BTF_ID_FLAGS(func, bpf_qdisc_init_prologue)
+BTF_ID_FLAGS(func, bpf_qdisc_reset_destroy_epilogue)
+BTF_ID_FLAGS(func, bpf_qdisc_bstats_update)
BTF_KFUNCS_END(qdisc_kfunc_ids)
BTF_SET_START(qdisc_common_kfunc_set)
@@ -449,7 +455,7 @@ static struct bpf_struct_ops bpf_Qdisc_ops = {
.owner = THIS_MODULE,
};
-BTF_ID_LIST_SINGLE(bpf_sk_buff_dtor_ids, func, bpf_kfree_skb)
+BTF_ID_LIST_SINGLE(bpf_sk_buff_dtor_ids, func, bpf_kfree_skb_dtor)
static int __init bpf_qdisc_kfunc_init(void)
{
diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c
index 2248eda741f8..4180c317f9bc 100644
--- a/net/xfrm/xfrm_state_bpf.c
+++ b/net/xfrm/xfrm_state_bpf.c
@@ -68,7 +68,7 @@ bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32
struct net *net = dev_net(xdp->rxq->dev);
struct xfrm_state *x;
- if (!opts || opts__sz < sizeof(opts->error))
+ if (opts__sz < sizeof(opts->error))
return NULL;
if (opts__sz != BPF_XFRM_STATE_OPTS_SZ) {
diff --git a/scripts/Makefile.btf b/scripts/Makefile.btf
index db76335dd917..562a04b40e06 100644
--- a/scripts/Makefile.btf
+++ b/scripts/Makefile.btf
@@ -7,14 +7,7 @@ JOBS := $(patsubst -j%,%,$(filter -j%,$(MAKEFLAGS)))
ifeq ($(call test-le, $(pahole-ver), 125),y)
-# pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars
-ifeq ($(call test-le, $(pahole-ver), 121),y)
-pahole-flags-$(call test-ge, $(pahole-ver), 118) += --skip_encoding_btf_vars
-endif
-
-pahole-flags-$(call test-ge, $(pahole-ver), 121) += --btf_gen_floats
-
-pahole-flags-$(call test-ge, $(pahole-ver), 122) += -j$(JOBS)
+pahole-flags-y += --btf_gen_floats -j$(JOBS)
pahole-flags-$(call test-ge, $(pahole-ver), 125) += --skip_encoding_btf_inconsistent_proto --btf_gen_optimized
@@ -25,13 +18,15 @@ pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j$(JOBS) --btf_features=enc
pahole-flags-$(call test-ge, $(pahole-ver), 130) += --btf_features=attributes
-ifneq ($(KBUILD_EXTMOD),)
-module-pahole-flags-$(call test-ge, $(pahole-ver), 128) += --btf_features=distilled_base
-endif
-
endif
pahole-flags-$(CONFIG_PAHOLE_HAS_LANG_EXCLUDE) += --lang_exclude=rust
export PAHOLE_FLAGS := $(pahole-flags-y)
-export MODULE_PAHOLE_FLAGS := $(module-pahole-flags-y)
+
+resolve-btfids-flags-y :=
+resolve-btfids-flags-$(CONFIG_WERROR) += --fatal_warnings
+resolve-btfids-flags-$(if $(KBUILD_EXTMOD),y) += --distill_base
+resolve-btfids-flags-$(if $(KBUILD_VERBOSE),y) += --verbose
+
+export RESOLVE_BTFIDS_FLAGS := $(resolve-btfids-flags-y)
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index 149e12ff5700..adcbcde16a07 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -42,9 +42,8 @@ quiet_cmd_btf_ko = BTF [M] $@
cmd_btf_ko = \
if [ ! -f $(objtree)/vmlinux ]; then \
printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \
- else \
- LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) $(MODULE_PAHOLE_FLAGS) --btf_base $(objtree)/vmlinux $@; \
- $(RESOLVE_BTFIDS) -b $(objtree)/vmlinux $@; \
+ else \
+ $(CONFIG_SHELL) $(srctree)/scripts/gen-btf.sh --btf_base $(objtree)/vmlinux $@; \
fi;
# Same as newer-prereqs, but allows to exclude specified extra dependencies
diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux
index 276c3134a563..fcae1e432d9a 100644
--- a/scripts/Makefile.vmlinux
+++ b/scripts/Makefile.vmlinux
@@ -71,7 +71,7 @@ targets += vmlinux.unstripped .vmlinux.export.o
vmlinux.unstripped: scripts/link-vmlinux.sh vmlinux.o .vmlinux.export.o $(KBUILD_LDS) FORCE
+$(call if_changed_dep,link_vmlinux)
ifdef CONFIG_DEBUG_INFO_BTF
-vmlinux.unstripped: $(RESOLVE_BTFIDS)
+vmlinux.unstripped: $(RESOLVE_BTFIDS) $(srctree)/scripts/gen-btf.sh
endif
ifdef CONFIG_BUILDTIME_TABLE_SORT
diff --git a/scripts/gen-btf.sh b/scripts/gen-btf.sh
new file mode 100755
index 000000000000..8ca96eb10a69
--- /dev/null
+++ b/scripts/gen-btf.sh
@@ -0,0 +1,147 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
+#
+# This script generates BTF data for the provided ELF file.
+#
+# Kernel BTF generation involves these conceptual steps:
+# 1. pahole generates BTF from DWARF data
+# 2. resolve_btfids applies kernel-specific btf2btf
+# transformations and computes data for .BTF_ids section
+# 3. the result gets linked/objcopied into the target binary
+#
+# How step (3) should be done differs between vmlinux, and
+# kernel modules, which is the primary reason for the existence
+# of this script.
+#
+# For modules the script expects vmlinux passed in as --btf_base.
+# Generated .BTF, .BTF.base and .BTF_ids sections become embedded
+# into the input ELF file with objcopy.
+#
+# For vmlinux the input file remains unchanged and two files are produced:
+# - ${1}.btf.o ready for linking into vmlinux
+# - ${1}.BTF_ids with .BTF_ids data blob
+# This output is consumed by scripts/link-vmlinux.sh
+
+set -e
+
+usage()
+{
+ echo "Usage: $0 [--btf_base <file>] <target ELF file>"
+ exit 1
+}
+
+BTF_BASE=""
+
+while [ $# -gt 0 ]; do
+ case "$1" in
+ --btf_base)
+ BTF_BASE="$2"
+ shift 2
+ ;;
+ -*)
+ echo "Unknown option: $1" >&2
+ usage
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
+
+if [ $# -ne 1 ]; then
+ usage
+fi
+
+ELF_FILE="$1"
+shift
+
+is_enabled() {
+ grep -q "^$1=y" ${objtree}/include/config/auto.conf
+}
+
+case "${KBUILD_VERBOSE}" in
+*1*)
+ set -x
+ ;;
+esac
+
+gen_btf_data()
+{
+ btf1="${ELF_FILE}.BTF.1"
+ ${PAHOLE} -J ${PAHOLE_FLAGS} \
+ ${BTF_BASE:+--btf_base ${BTF_BASE}} \
+ --btf_encode_detached=${btf1} \
+ "${ELF_FILE}"
+
+ ${RESOLVE_BTFIDS} ${RESOLVE_BTFIDS_FLAGS} \
+ ${BTF_BASE:+--btf_base ${BTF_BASE}} \
+ --btf ${btf1} "${ELF_FILE}"
+}
+
+gen_btf_o()
+{
+ btf_data=${ELF_FILE}.btf.o
+
+ # Create ${btf_data} which contains just .BTF section but no symbols. Add
+ # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all
+ # deletes all symbols including __start_BTF and __stop_BTF, which will
+ # be redefined in the linker script.
+ echo "" | ${CC} ${CLANG_FLAGS} ${KBUILD_CPPFLAGS} ${KBUILD_CFLAGS} -fno-lto -c -x c -o ${btf_data} -
+ ${OBJCOPY} --add-section .BTF=${ELF_FILE}.BTF \
+ --set-section-flags .BTF=alloc,readonly ${btf_data}
+ ${OBJCOPY} --only-section=.BTF --strip-all ${btf_data}
+
+ # Change e_type to ET_REL so that it can be used to link final vmlinux.
+ # GNU ld 2.35+ and lld do not allow an ET_EXEC input.
+ if is_enabled CONFIG_CPU_BIG_ENDIAN; then
+ et_rel='\0\1'
+ else
+ et_rel='\1\0'
+ fi
+ printf "${et_rel}" | dd of="${btf_data}" conv=notrunc bs=1 seek=16 status=none
+}
+
+embed_btf_data()
+{
+ ${OBJCOPY} --add-section .BTF=${ELF_FILE}.BTF ${ELF_FILE}
+
+ # a module might not have a .BTF_ids or .BTF.base section
+ btf_base="${ELF_FILE}.BTF.base"
+ if [ -f "${btf_base}" ]; then
+ ${OBJCOPY} --add-section .BTF.base=${btf_base} ${ELF_FILE}
+ fi
+ btf_ids="${ELF_FILE}.BTF_ids"
+ if [ -f "${btf_ids}" ]; then
+ ${RESOLVE_BTFIDS} --patch_btfids ${btf_ids} ${ELF_FILE}
+ fi
+}
+
+cleanup()
+{
+ rm -f "${ELF_FILE}.BTF.1"
+ rm -f "${ELF_FILE}.BTF"
+ if [ "${BTFGEN_MODE}" = "module" ]; then
+ rm -f "${ELF_FILE}.BTF.base"
+ rm -f "${ELF_FILE}.BTF_ids"
+ fi
+}
+trap cleanup EXIT
+
+BTFGEN_MODE="vmlinux"
+if [ -n "${BTF_BASE}" ]; then
+ BTFGEN_MODE="module"
+fi
+
+gen_btf_data
+
+case "${BTFGEN_MODE}" in
+vmlinux)
+ gen_btf_o
+ ;;
+module)
+ embed_btf_data
+ ;;
+esac
+
+exit 0
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 4ab44c73da4d..16d6a048e07c 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -106,34 +106,6 @@ vmlinux_link()
${kallsymso} ${btf_vmlinux_bin_o} ${arch_vmlinux_o} ${ldlibs}
}
-# generate .BTF typeinfo from DWARF debuginfo
-# ${1} - vmlinux image
-gen_btf()
-{
- local btf_data=${1}.btf.o
-
- info BTF "${btf_data}"
- LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1}
-
- # Create ${btf_data} which contains just .BTF section but no symbols. Add
- # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all
- # deletes all symbols including __start_BTF and __stop_BTF, which will
- # be redefined in the linker script. Add 2>/dev/null to suppress GNU
- # objcopy warnings: "empty loadable segment detected at ..."
- ${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \
- --strip-all ${1} "${btf_data}" 2>/dev/null
- # Change e_type to ET_REL so that it can be used to link final vmlinux.
- # GNU ld 2.35+ and lld do not allow an ET_EXEC input.
- if is_enabled CONFIG_CPU_BIG_ENDIAN; then
- et_rel='\0\1'
- else
- et_rel='\1\0'
- fi
- printf "${et_rel}" | dd of="${btf_data}" conv=notrunc bs=1 seek=16 status=none
-
- btf_vmlinux_bin_o=${btf_data}
-}
-
# Create ${2}.o file with all symbols from the ${1} object file
kallsyms()
{
@@ -205,6 +177,7 @@ if is_enabled CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX; then
fi
btf_vmlinux_bin_o=
+btfids_vmlinux=
kallsymso=
strip_debug=
generate_map=
@@ -232,11 +205,14 @@ if is_enabled CONFIG_KALLSYMS || is_enabled CONFIG_DEBUG_INFO_BTF; then
fi
if is_enabled CONFIG_DEBUG_INFO_BTF; then
- if ! gen_btf .tmp_vmlinux1; then
+ info BTF .tmp_vmlinux1
+ if ! ${CONFIG_SHELL} ${srctree}/scripts/gen-btf.sh .tmp_vmlinux1; then
echo >&2 "Failed to generate BTF for vmlinux"
echo >&2 "Try to disable CONFIG_DEBUG_INFO_BTF"
exit 1
fi
+ btf_vmlinux_bin_o=.tmp_vmlinux1.btf.o
+ btfids_vmlinux=.tmp_vmlinux1.BTF_ids
fi
if is_enabled CONFIG_KALLSYMS; then
@@ -289,14 +265,9 @@ fi
vmlinux_link "${VMLINUX}"
-# fill in BTF IDs
if is_enabled CONFIG_DEBUG_INFO_BTF; then
- info BTFIDS "${VMLINUX}"
- RESOLVE_BTFIDS_ARGS=""
- if is_enabled CONFIG_WERROR; then
- RESOLVE_BTFIDS_ARGS=" --fatal_warnings "
- fi
- ${RESOLVE_BTFIDS} ${RESOLVE_BTFIDS_ARGS} "${VMLINUX}"
+ info BTFIDS ${VMLINUX}
+ ${RESOLVE_BTFIDS} --patch_btfids ${btfids_vmlinux} ${VMLINUX}
fi
mksysmap "${VMLINUX}" System.map
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index a9ed8992800f..22da07087e42 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -24,7 +24,7 @@ NET COMMANDS
============
| **bpftool** **net** { **show** | **list** } [ **dev** *NAME* ]
-| **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
+| **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** | **prepend** ]
| **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
| **bpftool** **net help**
|
@@ -58,11 +58,9 @@ bpftool net { show | list } [ dev *NAME* ]
then all bpf programs attached to non clsact qdiscs, and finally all bpf
programs attached to root and clsact qdisc.
-bpftool net attach *ATTACH_TYPE* *PROG* dev *NAME* [ overwrite ]
+bpftool net attach *ATTACH_TYPE* *PROG* dev *NAME* [ overwrite | prepend ]
Attach bpf program *PROG* to network interface *NAME* with type specified
- by *ATTACH_TYPE*. Previously attached bpf program can be replaced by the
- command used with **overwrite** option. Currently, only XDP-related modes
- are supported for *ATTACH_TYPE*.
+ by *ATTACH_TYPE*.
*ATTACH_TYPE* can be of:
**xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it;
@@ -72,11 +70,18 @@ bpftool net attach *ATTACH_TYPE* *PROG* dev *NAME* [ overwrite ]
**tcx_ingress** - Ingress TCX. runs on ingress net traffic;
**tcx_egress** - Egress TCX. runs on egress net traffic;
+ For XDP-related attach types (**xdp**, **xdpgeneric**, **xdpdrv**,
+ **xdpoffload**), the **overwrite** option can be used to replace a
+ previously attached bpf program.
+
+ For **tcx_ingress** and **tcx_egress** attach types, the **prepend** option
+ can be used to attach the program at the beginning of the chain instead of
+ at the end.
+
bpftool net detach *ATTACH_TYPE* dev *NAME*
Detach bpf program attached to network interface *NAME* with type specified
by *ATTACH_TYPE*. To detach bpf program, same *ATTACH_TYPE* previously used
- for attach must be specified. Currently, only XDP-related modes are
- supported for *ATTACH_TYPE*.
+ for attach must be specified.
bpftool net help
Print short help message.
@@ -192,6 +197,17 @@ EXAMPLES
lo(1) tcx/ingress tc_prog prog_id 29
|
+| **# bpftool net attach tcx_ingress name tc_prog2 dev lo prepend**
+| **# bpftool net**
+|
+
+::
+
+ tc:
+ lo(1) tcx/ingress tc_prog2 prog_id 30
+ lo(1) tcx/ingress tc_prog prog_id 29
+
+|
| **# bpftool net attach tcx_ingress name tc_prog dev lo**
| **# bpftool net detach tcx_ingress dev lo**
| **# bpftool net**
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 5442073a2e42..519ea5cb8ab1 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -130,8 +130,8 @@ include $(FEATURES_DUMP)
endif
endif
-LIBS = $(LIBBPF) -lelf -lz -lcrypto
-LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz -lcrypto
+LIBS = $(LIBBPF) -lelf -lcrypto -lz
+LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lcrypto -lz
ifeq ($(feature-libelf-zstd),1)
LIBS += -lzstd
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 53bcfeb1a76e..a28f0cc522e4 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -1142,7 +1142,14 @@ _bpftool()
return 0
;;
8)
- _bpftool_once_attr 'overwrite'
+ case ${words[3]} in
+ tcx_ingress|tcx_egress)
+ _bpftool_once_attr 'prepend'
+ ;;
+ *)
+ _bpftool_once_attr 'overwrite'
+ ;;
+ esac
return 0
;;
esac
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index e8daf963ecef..8bfcff9e2f63 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1191,6 +1191,7 @@ const char *bpf_attach_type_input_str(enum bpf_attach_type t)
case BPF_TRACE_FENTRY: return "fentry";
case BPF_TRACE_FEXIT: return "fexit";
case BPF_MODIFY_RETURN: return "mod_ret";
+ case BPF_TRACE_FSESSION: return "fsession";
case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select";
case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate";
default: return libbpf_bpf_attach_type_str(t);
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 993c7d9484a4..2f9e10752e28 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -731,10 +731,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
{ \n\
struct %1$s *skel; \n\
\n\
- skel = skel_alloc(sizeof(*skel)); \n\
+ skel = (struct %1$s *)skel_alloc(sizeof(*skel)); \n\
if (!skel) \n\
goto cleanup; \n\
- skel->ctx.sz = (void *)&skel->links - (void *)skel; \n\
+ skel->ctx.sz = (char *)&skel->links - (char *)skel; \n\
",
obj_name, opts.data_sz);
bpf_object__for_each_map(map, obj) {
@@ -755,7 +755,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
\n\
\"; \n\
\n\
- skel->%1$s = skel_prep_map_data((void *)data, %2$zd,\n\
+ skel->%1$s = (__typeof__(skel->%1$s))skel_prep_map_data((void *)data, %2$zd,\n\
sizeof(data) - 1);\n\
if (!skel->%1$s) \n\
goto cleanup; \n\
@@ -857,7 +857,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
codegen("\
\n\
- skel->%1$s = skel_finalize_map_data(&skel->maps.%1$s.initial_value, \n\
+ skel->%1$s = (__typeof__(skel->%1$s))skel_finalize_map_data(&skel->maps.%1$s.initial_value,\n\
%2$zd, %3$s, skel->maps.%1$s.map_fd);\n\
if (!skel->%1$s) \n\
return -ENOMEM; \n\
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index cfc6f944f7c3..f25d66c8395e 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -666,10 +666,16 @@ static int get_tcx_type(enum net_attach_type attach_type)
}
}
-static int do_attach_tcx(int progfd, enum net_attach_type attach_type, int ifindex)
+static int do_attach_tcx(int progfd, enum net_attach_type attach_type, int ifindex, bool prepend)
{
int type = get_tcx_type(attach_type);
+ if (prepend) {
+ LIBBPF_OPTS(bpf_prog_attach_opts, opts,
+ .flags = BPF_F_BEFORE
+ );
+ return bpf_prog_attach_opts(progfd, ifindex, type, &opts);
+ }
return bpf_prog_attach(progfd, ifindex, type, 0);
}
@@ -685,6 +691,7 @@ static int do_attach(int argc, char **argv)
enum net_attach_type attach_type;
int progfd, ifindex, err = 0;
bool overwrite = false;
+ bool prepend = false;
/* parse attach args */
if (!REQ_ARGS(5))
@@ -709,9 +716,25 @@ static int do_attach(int argc, char **argv)
if (argc) {
if (is_prefix(*argv, "overwrite")) {
+ if (attach_type != NET_ATTACH_TYPE_XDP &&
+ attach_type != NET_ATTACH_TYPE_XDP_GENERIC &&
+ attach_type != NET_ATTACH_TYPE_XDP_DRIVER &&
+ attach_type != NET_ATTACH_TYPE_XDP_OFFLOAD) {
+ p_err("'overwrite' is only supported for xdp types");
+ err = -EINVAL;
+ goto cleanup;
+ }
overwrite = true;
+ } else if (is_prefix(*argv, "prepend")) {
+ if (attach_type != NET_ATTACH_TYPE_TCX_INGRESS &&
+ attach_type != NET_ATTACH_TYPE_TCX_EGRESS) {
+ p_err("'prepend' is only supported for tcx_ingress/tcx_egress");
+ err = -EINVAL;
+ goto cleanup;
+ }
+ prepend = true;
} else {
- p_err("expected 'overwrite', got: '%s'?", *argv);
+ p_err("expected 'overwrite' or 'prepend', got: '%s'?", *argv);
err = -EINVAL;
goto cleanup;
}
@@ -728,7 +751,7 @@ static int do_attach(int argc, char **argv)
/* attach tcx prog */
case NET_ATTACH_TYPE_TCX_INGRESS:
case NET_ATTACH_TYPE_TCX_EGRESS:
- err = do_attach_tcx(progfd, attach_type, ifindex);
+ err = do_attach_tcx(progfd, attach_type, ifindex, prepend);
break;
default:
break;
@@ -985,7 +1008,7 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %1$s %2$s { show | list } [dev <devname>]\n"
- " %1$s %2$s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
+ " %1$s %2$s attach ATTACH_TYPE PROG dev <devname> [ overwrite | prepend ]\n"
" %1$s %2$s detach ATTACH_TYPE dev <devname>\n"
" %1$s %2$s help\n"
"\n"
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index ce1b556dfa90..1733a6e93a07 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -70,7 +70,8 @@ HOSTCFLAGS_resolve_btfids += -g \
-I$(srctree)/tools/include/uapi \
-I$(LIBBPF_INCLUDE) \
-I$(SUBCMD_INCLUDE) \
- $(LIBELF_FLAGS)
+ $(LIBELF_FLAGS) \
+ -Wall -Werror
LIBS = $(LIBELF_LIBS) -lz
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index d47191c6e55e..ca7fcd03efb6 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -71,9 +71,11 @@
#include <fcntl.h>
#include <errno.h>
#include <linux/btf_ids.h>
+#include <linux/kallsyms.h>
#include <linux/rbtree.h>
#include <linux/zalloc.h>
#include <linux/err.h>
+#include <linux/limits.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
#include <subcmd/parse-options.h>
@@ -98,6 +100,13 @@
# error "Unknown machine endianness!"
#endif
+enum btf_id_kind {
+ BTF_ID_KIND_NONE,
+ BTF_ID_KIND_SYM,
+ BTF_ID_KIND_SET,
+ BTF_ID_KIND_SET8
+};
+
struct btf_id {
struct rb_node rb_node;
char *name;
@@ -105,17 +114,20 @@ struct btf_id {
int id;
int cnt;
};
+ enum btf_id_kind kind;
int addr_cnt;
- bool is_set;
- bool is_set8;
Elf64_Addr addr[ADDR_CNT];
};
struct object {
const char *path;
- const char *btf;
+ const char *btf_path;
const char *base_btf_path;
+ struct btf *btf;
+ struct btf *base_btf;
+ bool distill_base;
+
struct {
int fd;
Elf *elf;
@@ -140,6 +152,25 @@ struct object {
int nr_typedefs;
};
+#define KF_IMPLICIT_ARGS (1 << 16)
+#define KF_IMPL_SUFFIX "_impl"
+
+struct kfunc {
+ const char *name;
+ u32 btf_id;
+ u32 flags;
+};
+
+struct btf2btf_context {
+ struct btf *btf;
+ u32 *decl_tags;
+ u32 nr_decl_tags;
+ u32 max_decl_tags;
+ struct kfunc *kfuncs;
+ u32 nr_kfuncs;
+ u32 max_kfuncs;
+};
+
static int verbose;
static int warnings;
@@ -194,8 +225,10 @@ static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
return NULL;
}
-static struct btf_id *
-btf_id__add(struct rb_root *root, char *name, bool unique)
+static struct btf_id *__btf_id__add(struct rb_root *root,
+ char *name,
+ enum btf_id_kind kind,
+ bool unique)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -218,12 +251,23 @@ btf_id__add(struct rb_root *root, char *name, bool unique)
if (id) {
pr_debug("adding symbol %s\n", name);
id->name = name;
+ id->kind = kind;
rb_link_node(&id->rb_node, parent, p);
rb_insert_color(&id->rb_node, root);
}
return id;
}
+static inline struct btf_id *btf_id__add(struct rb_root *root, char *name, enum btf_id_kind kind)
+{
+ return __btf_id__add(root, name, kind, false);
+}
+
+static inline struct btf_id *btf_id__add_unique(struct rb_root *root, char *name, enum btf_id_kind kind)
+{
+ return __btf_id__add(root, name, kind, true);
+}
+
static char *get_id(const char *prefix_end)
{
/*
@@ -257,22 +301,36 @@ static char *get_id(const char *prefix_end)
return id;
}
-static struct btf_id *add_set(struct object *obj, char *name, bool is_set8)
+static struct btf_id *add_set(struct object *obj, char *name, enum btf_id_kind kind)
{
+ int len = strlen(name);
+ int prefixlen;
+ char *id;
+
/*
* __BTF_ID__set__name
* name = ^
* id = ^
*/
- char *id = name + (is_set8 ? sizeof(BTF_SET8 "__") : sizeof(BTF_SET "__")) - 1;
- int len = strlen(name);
+ switch (kind) {
+ case BTF_ID_KIND_SET:
+ prefixlen = sizeof(BTF_SET "__") - 1;
+ break;
+ case BTF_ID_KIND_SET8:
+ prefixlen = sizeof(BTF_SET8 "__") - 1;
+ break;
+ default:
+ pr_err("Unexpected kind %d passed to %s() for symbol %s\n", kind, __func__, name);
+ return NULL;
+ }
+ id = name + prefixlen;
if (id >= name + len) {
pr_err("FAILED to parse set name: %s\n", name);
return NULL;
}
- return btf_id__add(&obj->sets, id, true);
+ return btf_id__add_unique(&obj->sets, id, kind);
}
static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
@@ -285,45 +343,19 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
return NULL;
}
- return btf_id__add(root, id, false);
+ return btf_id__add(root, id, BTF_ID_KIND_SYM);
}
-/* Older libelf.h and glibc elf.h might not yet define the ELF compression types. */
-#ifndef SHF_COMPRESSED
-#define SHF_COMPRESSED (1 << 11) /* Section with compressed data. */
-#endif
-
-/*
- * The data of compressed section should be aligned to 4
- * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld
- * sets sh_addralign to 1, which makes libelf fail with
- * misaligned section error during the update:
- * FAILED elf_update(WRITE): invalid section alignment
- *
- * While waiting for ld fix, we fix the compressed sections
- * sh_addralign value manualy.
- */
-static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
+static void bswap_32_data(void *data, u32 nr_bytes)
{
- int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8;
-
- if (!(sh->sh_flags & SHF_COMPRESSED))
- return 0;
-
- if (sh->sh_addralign == expected)
- return 0;
-
- pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n",
- sh->sh_addralign, expected);
+ u32 cnt, i;
+ u32 *ptr;
- sh->sh_addralign = expected;
+ cnt = nr_bytes / sizeof(u32);
+ ptr = data;
- if (gelf_update_shdr(scn, sh) == 0) {
- pr_err("FAILED cannot update section header: %s\n",
- elf_errmsg(-1));
- return -1;
- }
- return 0;
+ for (i = 0; i < cnt; i++)
+ ptr[i] = bswap_32(ptr[i]);
}
static int elf_collect(struct object *obj)
@@ -344,7 +376,7 @@ static int elf_collect(struct object *obj)
elf_version(EV_CURRENT);
- elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
+ elf = elf_begin(fd, ELF_C_READ_MMAP_PRIVATE, NULL);
if (!elf) {
close(fd);
pr_err("FAILED cannot create ELF descriptor: %s\n",
@@ -407,21 +439,20 @@ static int elf_collect(struct object *obj)
obj->efile.symbols_shndx = idx;
obj->efile.strtabidx = sh.sh_link;
} else if (!strcmp(name, BTF_IDS_SECTION)) {
+ /*
+ * If target endianness differs from host, we need to bswap32
+ * the .BTF_ids section data on load, because .BTF_ids has
+ * Elf_Type = ELF_T_BYTE, and so libelf returns data buffer in
+ * the target endianness. We repeat this on dump.
+ */
+ if (obj->efile.encoding != ELFDATANATIVE) {
+ pr_debug("bswap_32 .BTF_ids data from target to host endianness\n");
+ bswap_32_data(data->d_buf, data->d_size);
+ }
obj->efile.idlist = data;
obj->efile.idlist_shndx = idx;
obj->efile.idlist_addr = sh.sh_addr;
- } else if (!strcmp(name, BTF_BASE_ELF_SEC)) {
- /* If a .BTF.base section is found, do not resolve
- * BTF ids relative to vmlinux; resolve relative
- * to the .BTF.base section instead. btf__parse_split()
- * will take care of this once the base BTF it is
- * passed is NULL.
- */
- obj->base_btf_path = NULL;
}
-
- if (compressed_section_fix(elf, scn, &sh))
- return -1;
}
return 0;
@@ -488,35 +519,31 @@ static int symbols_collect(struct object *obj)
id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
/* set8 */
} else if (!strncmp(prefix, BTF_SET8, sizeof(BTF_SET8) - 1)) {
- id = add_set(obj, prefix, true);
+ id = add_set(obj, prefix, BTF_ID_KIND_SET8);
/*
* SET8 objects store list's count, which is encoded
* in symbol's size, together with 'cnt' field hence
* that - 1.
*/
- if (id) {
+ if (id)
id->cnt = sym.st_size / sizeof(uint64_t) - 1;
- id->is_set8 = true;
- }
/* set */
} else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
- id = add_set(obj, prefix, false);
+ id = add_set(obj, prefix, BTF_ID_KIND_SET);
/*
* SET objects store list's count, which is encoded
* in symbol's size, together with 'cnt' field hence
* that - 1.
*/
- if (id) {
+ if (id)
id->cnt = sym.st_size / sizeof(int) - 1;
- id->is_set = true;
- }
} else {
pr_err("FAILED unsupported prefix %s\n", prefix);
return -1;
}
if (!id)
- return -ENOMEM;
+ return -EINVAL;
if (id->addr_cnt >= ADDR_CNT) {
pr_err("FAILED symbol %s crossed the number of allowed lists\n",
@@ -529,16 +556,10 @@ static int symbols_collect(struct object *obj)
return 0;
}
-static int symbols_resolve(struct object *obj)
+static int load_btf(struct object *obj)
{
- int nr_typedefs = obj->nr_typedefs;
- int nr_structs = obj->nr_structs;
- int nr_unions = obj->nr_unions;
- int nr_funcs = obj->nr_funcs;
- struct btf *base_btf = NULL;
- int err, type_id;
- struct btf *btf;
- __u32 nr_types;
+ struct btf *base_btf = NULL, *btf = NULL;
+ int err;
if (obj->base_btf_path) {
base_btf = btf__parse(obj->base_btf_path, NULL);
@@ -546,18 +567,41 @@ static int symbols_resolve(struct object *obj)
if (err) {
pr_err("FAILED: load base BTF from %s: %s\n",
obj->base_btf_path, strerror(-err));
- return -1;
+ goto out_err;
}
}
- btf = btf__parse_split(obj->btf ?: obj->path, base_btf);
+ btf = btf__parse_split(obj->btf_path ?: obj->path, base_btf);
err = libbpf_get_error(btf);
if (err) {
pr_err("FAILED: load BTF from %s: %s\n",
- obj->btf ?: obj->path, strerror(-err));
- goto out;
+ obj->btf_path ?: obj->path, strerror(-err));
+ goto out_err;
}
+ obj->base_btf = base_btf;
+ obj->btf = btf;
+
+ return 0;
+
+out_err:
+ btf__free(base_btf);
+ btf__free(btf);
+ obj->base_btf = NULL;
+ obj->btf = NULL;
+ return err;
+}
+
+static int symbols_resolve(struct object *obj)
+{
+ int nr_typedefs = obj->nr_typedefs;
+ int nr_structs = obj->nr_structs;
+ int nr_unions = obj->nr_unions;
+ int nr_funcs = obj->nr_funcs;
+ struct btf *btf = obj->btf;
+ int err, type_id;
+ __u32 nr_types;
+
err = -1;
nr_types = btf__type_cnt(btf);
@@ -615,8 +659,6 @@ static int symbols_resolve(struct object *obj)
err = 0;
out:
- btf__free(base_btf);
- btf__free(btf);
return err;
}
@@ -627,7 +669,7 @@ static int id_patch(struct object *obj, struct btf_id *id)
int i;
/* For set, set8, id->id may be 0 */
- if (!id->id && !id->is_set && !id->is_set8) {
+ if (!id->id && id->kind != BTF_ID_KIND_SET && id->kind != BTF_ID_KIND_SET8) {
pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
warnings++;
}
@@ -680,6 +722,7 @@ static int sets_patch(struct object *obj)
{
Elf_Data *data = obj->efile.idlist;
struct rb_node *next;
+ int cnt;
next = rb_first(&obj->sets);
while (next) {
@@ -699,39 +742,28 @@ static int sets_patch(struct object *obj)
return -1;
}
- if (id->is_set) {
+ switch (id->kind) {
+ case BTF_ID_KIND_SET:
set = data->d_buf + off;
+ cnt = set->cnt;
qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id);
- } else {
+ break;
+ case BTF_ID_KIND_SET8:
set8 = data->d_buf + off;
+ cnt = set8->cnt;
/*
* Make sure id is at the beginning of the pairs
* struct, otherwise the below qsort would not work.
*/
BUILD_BUG_ON((u32 *)set8->pairs != &set8->pairs[0].id);
qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id);
-
- /*
- * When ELF endianness does not match endianness of the
- * host, libelf will do the translation when updating
- * the ELF. This, however, corrupts SET8 flags which are
- * already in the target endianness. So, let's bswap
- * them to the host endianness and libelf will then
- * correctly translate everything.
- */
- if (obj->efile.encoding != ELFDATANATIVE) {
- int i;
-
- set8->flags = bswap_32(set8->flags);
- for (i = 0; i < set8->cnt; i++) {
- set8->pairs[i].flags =
- bswap_32(set8->pairs[i].flags);
- }
- }
+ break;
+ default:
+ pr_err("Unexpected btf_id_kind %d for set '%s'\n", id->kind, id->name);
+ return -1;
}
- pr_debug("sorting addr %5lu: cnt %6d [%s]\n",
- off, id->is_set ? set->cnt : set8->cnt, id->name);
+ pr_debug("sorting addr %5lu: cnt %6d [%s]\n", off, cnt, id->name);
next = rb_next(next);
}
@@ -740,8 +772,6 @@ static int sets_patch(struct object *obj)
static int symbols_patch(struct object *obj)
{
- off_t err;
-
if (__symbols_patch(obj, &obj->structs) ||
__symbols_patch(obj, &obj->unions) ||
__symbols_patch(obj, &obj->typedefs) ||
@@ -752,24 +782,665 @@ static int symbols_patch(struct object *obj)
if (sets_patch(obj))
return -1;
- /* Set type to ensure endian translation occurs. */
- obj->efile.idlist->d_type = ELF_T_WORD;
+ return 0;
+}
+
+static int dump_raw_data(const char *out_path, const void *data, u32 size)
+{
+ size_t written;
+ FILE *file;
+
+ file = fopen(out_path, "wb");
+ if (!file) {
+ pr_err("Couldn't open %s for writing\n", out_path);
+ return -1;
+ }
+
+ written = fwrite(data, 1, size, file);
+ if (written != size) {
+ pr_err("Failed to write data to %s\n", out_path);
+ fclose(file);
+ unlink(out_path);
+ return -1;
+ }
+
+ fclose(file);
+ pr_debug("Dumped %lu bytes of data to %s\n", size, out_path);
- elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY);
+ return 0;
+}
- err = elf_update(obj->efile.elf, ELF_C_WRITE);
- if (err < 0) {
- pr_err("FAILED elf_update(WRITE): %s\n",
- elf_errmsg(-1));
+static int dump_raw_btf_ids(struct object *obj, const char *out_path)
+{
+ Elf_Data *data = obj->efile.idlist;
+ int err;
+
+ if (!data || !data->d_buf) {
+ pr_debug("%s has no BTF_ids data to dump\n", obj->path);
+ return 0;
+ }
+
+ /*
+ * If target endianness differs from host, we need to bswap32 the
+ * .BTF_ids section data before dumping so that the output is in
+ * target endianness.
+ */
+ if (obj->efile.encoding != ELFDATANATIVE) {
+ pr_debug("bswap_32 .BTF_ids data from host to target endianness\n");
+ bswap_32_data(data->d_buf, data->d_size);
+ }
+
+ err = dump_raw_data(out_path, data->d_buf, data->d_size);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static int dump_raw_btf(struct btf *btf, const char *out_path)
+{
+ const void *raw_btf_data;
+ u32 raw_btf_size;
+ int err;
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!raw_btf_data) {
+ pr_err("btf__raw_data() failed\n");
+ return -1;
+ }
+
+ err = dump_raw_data(out_path, raw_btf_data, raw_btf_size);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static const struct btf_type *btf_type_skip_qualifiers(const struct btf *btf, s32 type_id)
+{
+ const struct btf_type *t = btf__type_by_id(btf, type_id);
+
+ while (btf_is_mod(t))
+ t = btf__type_by_id(btf, t->type);
+
+ return t;
+}
+
+static int push_decl_tag_id(struct btf2btf_context *ctx, u32 decl_tag_id)
+{
+ u32 *arr = ctx->decl_tags;
+ u32 cap = ctx->max_decl_tags;
+
+ if (ctx->nr_decl_tags + 1 > cap) {
+ cap = max(cap + 256, cap * 2);
+ arr = realloc(arr, sizeof(u32) * cap);
+ if (!arr)
+ return -ENOMEM;
+ ctx->max_decl_tags = cap;
+ ctx->decl_tags = arr;
+ }
+
+ ctx->decl_tags[ctx->nr_decl_tags++] = decl_tag_id;
+
+ return 0;
+}
+
+static int push_kfunc(struct btf2btf_context *ctx, struct kfunc *kfunc)
+{
+ struct kfunc *arr = ctx->kfuncs;
+ u32 cap = ctx->max_kfuncs;
+
+ if (ctx->nr_kfuncs + 1 > cap) {
+ cap = max(cap + 256, cap * 2);
+ arr = realloc(arr, sizeof(struct kfunc) * cap);
+ if (!arr)
+ return -ENOMEM;
+ ctx->max_kfuncs = cap;
+ ctx->kfuncs = arr;
+ }
+
+ ctx->kfuncs[ctx->nr_kfuncs++] = *kfunc;
+
+ return 0;
+}
+
+static int collect_decl_tags(struct btf2btf_context *ctx)
+{
+ const u32 type_cnt = btf__type_cnt(ctx->btf);
+ struct btf *btf = ctx->btf;
+ const struct btf_type *t;
+ int err;
+
+ for (u32 id = 1; id < type_cnt; id++) {
+ t = btf__type_by_id(btf, id);
+ if (!btf_is_decl_tag(t))
+ continue;
+ err = push_decl_tag_id(ctx, id);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * To find the kfunc flags having its struct btf_id (with ELF addresses)
+ * we need to find the address that is in range of a set8.
+ * If a set8 is found, then the flags are located at addr + 4 bytes.
+ * Return 0 (no flags!) if not found.
+ */
+static u32 find_kfunc_flags(struct object *obj, struct btf_id *kfunc_id)
+{
+ const u32 *elf_data_ptr = obj->efile.idlist->d_buf;
+ u64 set_lower_addr, set_upper_addr, addr;
+ struct btf_id *set_id;
+ struct rb_node *next;
+ u32 flags;
+ u64 idx;
+
+ for (next = rb_first(&obj->sets); next; next = rb_next(next)) {
+ set_id = rb_entry(next, struct btf_id, rb_node);
+ if (set_id->kind != BTF_ID_KIND_SET8 || set_id->addr_cnt != 1)
+ continue;
+
+ set_lower_addr = set_id->addr[0];
+ set_upper_addr = set_lower_addr + set_id->cnt * sizeof(u64);
+
+ for (u32 i = 0; i < kfunc_id->addr_cnt; i++) {
+ addr = kfunc_id->addr[i];
+ /*
+ * Lower bound is exclusive to skip the 8-byte header of the set.
+ * Upper bound is inclusive to capture the last entry at offset 8*cnt.
+ */
+ if (set_lower_addr < addr && addr <= set_upper_addr) {
+ pr_debug("found kfunc %s in BTF_ID_FLAGS %s\n",
+ kfunc_id->name, set_id->name);
+ idx = addr - obj->efile.idlist_addr;
+ idx = idx / sizeof(u32) + 1;
+ flags = elf_data_ptr[idx];
+
+ return flags;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int collect_kfuncs(struct object *obj, struct btf2btf_context *ctx)
+{
+ const char *tag_name, *func_name;
+ struct btf *btf = ctx->btf;
+ const struct btf_type *t;
+ u32 flags, func_id;
+ struct kfunc kfunc;
+ struct btf_id *id;
+ int err;
+
+ if (ctx->nr_decl_tags == 0)
+ return 0;
+
+ for (u32 i = 0; i < ctx->nr_decl_tags; i++) {
+ t = btf__type_by_id(btf, ctx->decl_tags[i]);
+ if (btf_kflag(t) || btf_decl_tag(t)->component_idx != -1)
+ continue;
+
+ tag_name = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(tag_name, "bpf_kfunc") != 0)
+ continue;
+
+ func_id = t->type;
+ t = btf__type_by_id(btf, func_id);
+ if (!btf_is_func(t))
+ continue;
+
+ func_name = btf__name_by_offset(btf, t->name_off);
+ if (!func_name)
+ continue;
+
+ id = btf_id__find(&obj->funcs, func_name);
+ if (!id || id->kind != BTF_ID_KIND_SYM)
+ continue;
+
+ flags = find_kfunc_flags(obj, id);
+
+ kfunc.name = id->name;
+ kfunc.btf_id = func_id;
+ kfunc.flags = flags;
+
+ err = push_kfunc(ctx, &kfunc);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int build_btf2btf_context(struct object *obj, struct btf2btf_context *ctx)
+{
+ int err;
+
+ ctx->btf = obj->btf;
+
+ err = collect_decl_tags(ctx);
+ if (err) {
+ pr_err("ERROR: resolve_btfids: failed to collect decl tags from BTF\n");
+ return err;
+ }
+
+ err = collect_kfuncs(obj, ctx);
+ if (err) {
+ pr_err("ERROR: resolve_btfids: failed to collect kfuncs from BTF\n");
+ return err;
+ }
+
+ return 0;
+}
+
+
+/* Implicit BPF kfunc arguments can only be of particular types */
+static bool is_kf_implicit_arg(const struct btf *btf, const struct btf_param *p)
+{
+ static const char *const kf_implicit_arg_types[] = {
+ "bpf_prog_aux",
+ };
+ const struct btf_type *t;
+ const char *name;
+
+ t = btf_type_skip_qualifiers(btf, p->type);
+ if (!btf_is_ptr(t))
+ return false;
+
+ t = btf_type_skip_qualifiers(btf, t->type);
+ if (!btf_is_struct(t))
+ return false;
+
+ name = btf__name_by_offset(btf, t->name_off);
+ if (!name)
+ return false;
+
+ for (int i = 0; i < ARRAY_SIZE(kf_implicit_arg_types); i++)
+ if (strcmp(name, kf_implicit_arg_types[i]) == 0)
+ return true;
+
+ return false;
+}
+
+/*
+ * For a kfunc with KF_IMPLICIT_ARGS we do the following:
+ * 1. Add a new function with _impl suffix in the name, with the prototype
+ * of the original kfunc.
+ * 2. Add all decl tags except "bpf_kfunc" for the _impl func.
+ * 3. Add a new function prototype with modified list of arguments:
+ * omitting implicit args.
+ * 4. Change the prototype of the original kfunc to the new one.
+ *
+ * This way we transform the BTF associated with the kfunc from
+ * __bpf_kfunc bpf_foo(int arg1, void *implicit_arg);
+ * into
+ * bpf_foo_impl(int arg1, void *implicit_arg);
+ * __bpf_kfunc bpf_foo(int arg1);
+ *
+ * If a kfunc with KF_IMPLICIT_ARGS already has an _impl counterpart
+ * in BTF, then it's a legacy case: an _impl function is declared in the
+ * source code. In this case, we can skip adding an _impl function, but we
+ * still have to add a func prototype that omits implicit args.
+ */
+static int process_kfunc_with_implicit_args(struct btf2btf_context *ctx, struct kfunc *kfunc)
+{
+ s32 idx, new_proto_id, new_func_id, proto_id;
+ const char *param_name, *tag_name;
+ const struct btf_param *params;
+ enum btf_func_linkage linkage;
+ char tmp_name[KSYM_NAME_LEN];
+ struct btf *btf = ctx->btf;
+ int err, len, nr_params;
+ struct btf_type *t;
+
+ t = (struct btf_type *)btf__type_by_id(btf, kfunc->btf_id);
+ if (!t || !btf_is_func(t)) {
+ pr_err("ERROR: resolve_btfids: btf id %d is not a function\n", kfunc->btf_id);
+ return -EINVAL;
+ }
+
+ linkage = btf_vlen(t);
+
+ proto_id = t->type;
+ t = (struct btf_type *)btf__type_by_id(btf, proto_id);
+ if (!t || !btf_is_func_proto(t)) {
+ pr_err("ERROR: resolve_btfids: btf id %d is not a function prototype\n", proto_id);
+ return -EINVAL;
+ }
+
+ len = snprintf(tmp_name, sizeof(tmp_name), "%s%s", kfunc->name, KF_IMPL_SUFFIX);
+ if (len < 0 || len >= sizeof(tmp_name)) {
+ pr_err("ERROR: function name is too long: %s%s\n", kfunc->name, KF_IMPL_SUFFIX);
+ return -E2BIG;
+ }
+
+ if (btf__find_by_name_kind(btf, tmp_name, BTF_KIND_FUNC) > 0) {
+ pr_debug("resolve_btfids: function %s already exists in BTF\n", tmp_name);
+ goto add_new_proto;
+ }
+
+ /* Add a new function with _impl suffix and original prototype */
+ new_func_id = btf__add_func(btf, tmp_name, linkage, proto_id);
+ if (new_func_id < 0) {
+ pr_err("ERROR: resolve_btfids: failed to add func %s to BTF\n", tmp_name);
+ return new_func_id;
+ }
+
+ /* Copy all decl tags except "bpf_kfunc" from the original kfunc to the new one */
+ for (int i = 0; i < ctx->nr_decl_tags; i++) {
+ t = (struct btf_type *)btf__type_by_id(btf, ctx->decl_tags[i]);
+ if (t->type != kfunc->btf_id)
+ continue;
+
+ tag_name = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(tag_name, "bpf_kfunc") == 0)
+ continue;
+
+ idx = btf_decl_tag(t)->component_idx;
+
+ if (btf_kflag(t))
+ err = btf__add_decl_attr(btf, tag_name, new_func_id, idx);
+ else
+ err = btf__add_decl_tag(btf, tag_name, new_func_id, idx);
+
+ if (err < 0) {
+ pr_err("ERROR: resolve_btfids: failed to add decl tag %s for %s\n",
+ tag_name, tmp_name);
+ return -EINVAL;
+ }
+ }
+
+add_new_proto:
+ t = (struct btf_type *)btf__type_by_id(btf, proto_id);
+ new_proto_id = btf__add_func_proto(btf, t->type);
+ if (new_proto_id < 0) {
+ pr_err("ERROR: resolve_btfids: failed to add func proto for %s\n", kfunc->name);
+ return new_proto_id;
+ }
+
+ /* Add non-implicit args to the new prototype */
+ t = (struct btf_type *)btf__type_by_id(btf, proto_id);
+ nr_params = btf_vlen(t);
+ for (int i = 0; i < nr_params; i++) {
+ params = btf_params(t);
+ if (is_kf_implicit_arg(btf, &params[i]))
+ break;
+ param_name = btf__name_by_offset(btf, params[i].name_off);
+ err = btf__add_func_param(btf, param_name, params[i].type);
+ if (err < 0) {
+ pr_err("ERROR: resolve_btfids: failed to add param %s for %s\n",
+ param_name, kfunc->name);
+ return err;
+ }
+ t = (struct btf_type *)btf__type_by_id(btf, proto_id);
+ }
+
+ /* Finally change the prototype of the original kfunc to the new one */
+ t = (struct btf_type *)btf__type_by_id(btf, kfunc->btf_id);
+ t->type = new_proto_id;
+
+ pr_debug("resolve_btfids: updated BTF for kfunc with implicit args %s\n", kfunc->name);
+
+ return 0;
+}
+
+static int btf2btf(struct object *obj)
+{
+ struct btf2btf_context ctx = {};
+ int err;
+
+ err = build_btf2btf_context(obj, &ctx);
+ if (err)
+ goto out;
+
+ for (u32 i = 0; i < ctx.nr_kfuncs; i++) {
+ struct kfunc *kfunc = &ctx.kfuncs[i];
+
+ if (!(kfunc->flags & KF_IMPLICIT_ARGS))
+ continue;
+
+ err = process_kfunc_with_implicit_args(&ctx, kfunc);
+ if (err)
+ goto out;
+ }
+
+ err = 0;
+out:
+ free(ctx.decl_tags);
+ free(ctx.kfuncs);
+
+ return err;
+}
+
+/*
+ * Sort types by name in ascending order resulting in all
+ * anonymous types being placed before named types.
+ */
+static int cmp_type_names(const void *a, const void *b, void *priv)
+{
+ struct btf *btf = (struct btf *)priv;
+ const struct btf_type *ta = btf__type_by_id(btf, *(__u32 *)a);
+ const struct btf_type *tb = btf__type_by_id(btf, *(__u32 *)b);
+ const char *na, *nb;
+ int r;
+
+ na = btf__str_by_offset(btf, ta->name_off);
+ nb = btf__str_by_offset(btf, tb->name_off);
+ r = strcmp(na, nb);
+ if (r != 0)
+ return r;
+
+ /* preserve original relative order of anonymous or same-named types */
+ return *(__u32 *)a < *(__u32 *)b ? -1 : 1;
+}
+
+static int sort_btf_by_name(struct btf *btf)
+{
+ __u32 *permute_ids = NULL, *id_map = NULL;
+ int nr_types, i, err = 0;
+ __u32 start_id = 0, id;
+
+ if (btf__base_btf(btf))
+ start_id = btf__type_cnt(btf__base_btf(btf));
+ nr_types = btf__type_cnt(btf) - start_id;
+
+ permute_ids = calloc(nr_types, sizeof(*permute_ids));
+ if (!permute_ids) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ id_map = calloc(nr_types, sizeof(*id_map));
+ if (!id_map) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0, id = start_id; i < nr_types; i++, id++)
+ permute_ids[i] = id;
+
+ qsort_r(permute_ids, nr_types, sizeof(*permute_ids), cmp_type_names,
+ btf);
+
+ for (i = 0; i < nr_types; i++) {
+ id = permute_ids[i] - start_id;
+ id_map[id] = i + start_id;
+ }
+
+ err = btf__permute(btf, id_map, nr_types, NULL);
+ if (err)
+ pr_err("FAILED: btf permute: %s\n", strerror(-err));
+
+out:
+ free(permute_ids);
+ free(id_map);
+ return err;
+}
+
+static int finalize_btf(struct object *obj)
+{
+ struct btf *base_btf = obj->base_btf, *btf = obj->btf;
+ int err;
+
+ if (obj->base_btf && obj->distill_base) {
+ err = btf__distill_base(obj->btf, &base_btf, &btf);
+ if (err) {
+ pr_err("FAILED to distill base BTF: %s\n", strerror(errno));
+ goto out_err;
+ }
+
+ btf__free(obj->base_btf);
+ btf__free(obj->btf);
+ obj->base_btf = base_btf;
+ obj->btf = btf;
+ }
+
+ err = sort_btf_by_name(obj->btf);
+ if (err) {
+ pr_err("FAILED to sort BTF: %s\n", strerror(errno));
+ goto out_err;
}
- pr_debug("update %s for %s\n",
- err >= 0 ? "ok" : "failed", obj->path);
- return err < 0 ? -1 : 0;
+ return 0;
+
+out_err:
+ btf__free(base_btf);
+ btf__free(btf);
+ obj->base_btf = NULL;
+ obj->btf = NULL;
+
+ return err;
+}
+
+static inline int make_out_path(char *buf, u32 buf_sz, const char *in_path, const char *suffix)
+{
+ int len = snprintf(buf, buf_sz, "%s%s", in_path, suffix);
+
+ if (len < 0 || len >= buf_sz) {
+ pr_err("Output path is too long: %s%s\n", in_path, suffix);
+ return -E2BIG;
+ }
+
+ return 0;
+}
+
+/*
+ * Patch the .BTF_ids section of an ELF file with data from provided file.
+ * Equivalent to: objcopy --update-section .BTF_ids=<btfids> <elf>
+ *
+ * 1. Find .BTF_ids section in the ELF
+ * 2. Verify that blob file size matches section size
+ * 3. Update section data buffer with blob data
+ * 4. Write the ELF file
+ */
+static int patch_btfids(const char *btfids_path, const char *elf_path)
+{
+ Elf_Scn *scn = NULL;
+ FILE *btfids_file;
+ size_t shdrstrndx;
+ int fd, err = -1;
+ Elf_Data *data;
+ struct stat st;
+ GElf_Shdr sh;
+ char *name;
+ Elf *elf;
+
+ elf_version(EV_CURRENT);
+
+ fd = open(elf_path, O_RDWR, 0666);
+ if (fd < 0) {
+ pr_err("FAILED to open %s: %s\n", elf_path, strerror(errno));
+ return -1;
+ }
+
+ elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
+ if (!elf) {
+ close(fd);
+ pr_err("FAILED cannot create ELF descriptor: %s\n", elf_errmsg(-1));
+ return -1;
+ }
+
+ elf_flagelf(elf, ELF_C_SET, ELF_F_LAYOUT);
+
+ if (elf_getshdrstrndx(elf, &shdrstrndx) != 0) {
+ pr_err("FAILED cannot get shdr str ndx\n");
+ goto out;
+ }
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_err("FAILED to get section header\n");
+ goto out;
+ }
+
+ name = elf_strptr(elf, shdrstrndx, sh.sh_name);
+ if (!name)
+ continue;
+
+ if (strcmp(name, BTF_IDS_SECTION) == 0)
+ break;
+ }
+
+ if (!scn) {
+ pr_err("FAILED: section %s not found in %s\n", BTF_IDS_SECTION, elf_path);
+ goto out;
+ }
+
+ data = elf_getdata(scn, NULL);
+ if (!data) {
+ pr_err("FAILED to get %s section data from %s\n", BTF_IDS_SECTION, elf_path);
+ goto out;
+ }
+
+ if (stat(btfids_path, &st) < 0) {
+ pr_err("FAILED to stat %s: %s\n", btfids_path, strerror(errno));
+ goto out;
+ }
+
+ if ((size_t)st.st_size != data->d_size) {
+ pr_err("FAILED: size mismatch - %s section in %s is %zu bytes, %s is %zu bytes\n",
+ BTF_IDS_SECTION, elf_path, data->d_size, btfids_path, (size_t)st.st_size);
+ goto out;
+ }
+
+ btfids_file = fopen(btfids_path, "rb");
+ if (!btfids_file) {
+ pr_err("FAILED to open %s: %s\n", btfids_path, strerror(errno));
+ goto out;
+ }
+
+ pr_debug("Copying data from %s to %s section of %s (%zu bytes)\n",
+ btfids_path, BTF_IDS_SECTION, elf_path, data->d_size);
+
+ if (fread(data->d_buf, data->d_size, 1, btfids_file) != 1) {
+ pr_err("FAILED to read %s\n", btfids_path);
+ fclose(btfids_file);
+ goto out;
+ }
+ fclose(btfids_file);
+
+ elf_flagdata(data, ELF_C_SET, ELF_F_DIRTY);
+ if (elf_update(elf, ELF_C_WRITE) < 0) {
+ pr_err("FAILED to update ELF file %s\n", elf_path);
+ goto out;
+ }
+
+ err = 0;
+out:
+ elf_end(elf);
+ close(fd);
+
+ return err;
}
static const char * const resolve_btfids_usage[] = {
"resolve_btfids [<options>] <ELF object>",
+ "resolve_btfids --patch_btfids <.BTF_ids file> <ELF object>",
NULL
};
@@ -786,16 +1457,24 @@ int main(int argc, const char **argv)
.funcs = RB_ROOT,
.sets = RB_ROOT,
};
+ const char *btfids_path = NULL;
bool fatal_warnings = false;
+ bool resolve_btfids = true;
+ char out_path[PATH_MAX];
+
struct option btfid_options[] = {
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show errors, etc)"),
- OPT_STRING(0, "btf", &obj.btf, "BTF data",
- "BTF data"),
+ OPT_STRING(0, "btf", &obj.btf_path, "file",
+ "path to a file with input BTF data"),
OPT_STRING('b', "btf_base", &obj.base_btf_path, "file",
"path of file providing base BTF"),
OPT_BOOLEAN(0, "fatal_warnings", &fatal_warnings,
"turn warnings into errors"),
+ OPT_BOOLEAN(0, "distill_base", &obj.distill_base,
+ "distill --btf_base and emit .BTF.base section data"),
+ OPT_STRING(0, "patch_btfids", &btfids_path, "file",
+ "path to .BTF_ids section data blob to patch into ELF file"),
OPT_END()
};
int err = -1;
@@ -807,6 +1486,9 @@ int main(int argc, const char **argv)
obj.path = argv[0];
+ if (btfids_path)
+ return patch_btfids(btfids_path, obj.path);
+
if (elf_collect(&obj))
goto out;
@@ -816,23 +1498,55 @@ int main(int argc, const char **argv)
*/
if (obj.efile.idlist_shndx == -1 ||
obj.efile.symbols_shndx == -1) {
- pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n");
- err = 0;
- goto out;
+ pr_debug("Cannot find .BTF_ids or symbols sections, skip symbols resolution\n");
+ resolve_btfids = false;
}
- if (symbols_collect(&obj))
+ if (resolve_btfids)
+ if (symbols_collect(&obj))
+ goto out;
+
+ if (load_btf(&obj))
goto out;
+ if (btf2btf(&obj))
+ goto out;
+
+ if (finalize_btf(&obj))
+ goto out;
+
+ if (!resolve_btfids)
+ goto dump_btf;
+
if (symbols_resolve(&obj))
goto out;
if (symbols_patch(&obj))
goto out;
+ err = make_out_path(out_path, sizeof(out_path), obj.path, BTF_IDS_SECTION);
+ err = err ?: dump_raw_btf_ids(&obj, out_path);
+ if (err)
+ goto out;
+
+dump_btf:
+ err = make_out_path(out_path, sizeof(out_path), obj.path, BTF_ELF_SEC);
+ err = err ?: dump_raw_btf(obj.btf, out_path);
+ if (err)
+ goto out;
+
+ if (obj.base_btf && obj.distill_base) {
+ err = make_out_path(out_path, sizeof(out_path), obj.path, BTF_BASE_ELF_SEC);
+ err = err ?: dump_raw_btf(obj.base_btf, out_path);
+ if (err)
+ goto out;
+ }
+
if (!(fatal_warnings && warnings))
err = 0;
out:
+ btf__free(obj.base_btf);
+ btf__free(obj.btf);
if (obj.efile.elf) {
elf_end(obj.efile.elf);
close(obj.efile.fd);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index be7d8e060e10..5e38b4887de6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order {
BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */
BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */
BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */
+ /*
+ * Walks the immediate children of the specified parent
+ * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE,
+ * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP
+ * the iterator does not include the specified parent as one of the
+ * returned iterator elements.
+ */
+ BPF_CGROUP_ITER_CHILDREN,
};
union bpf_iter_link_info {
@@ -918,6 +926,16 @@ union bpf_iter_link_info {
* Number of bytes read from the stream on success, or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
+ * BPF_PROG_ASSOC_STRUCT_OPS
+ * Description
+ * Associate a BPF program with a struct_ops map. The struct_ops
+ * map is identified by *map_fd* and the BPF program is
+ * identified by *prog_fd*.
+ *
+ * Return
+ * 0 on success or -1 if an error occurred (in which case,
+ * *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -974,6 +992,7 @@ enum bpf_cmd {
BPF_PROG_BIND_MAP,
BPF_TOKEN_CREATE,
BPF_PROG_STREAM_READ_BY_FD,
+ BPF_PROG_ASSOC_STRUCT_OPS,
__MAX_BPF_CMD,
};
@@ -1134,6 +1153,7 @@ enum bpf_attach_type {
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
+ BPF_TRACE_FSESSION,
__MAX_BPF_ATTACH_TYPE
};
@@ -1373,6 +1393,8 @@ enum {
BPF_NOEXIST = 1, /* create new element if it didn't exist */
BPF_EXIST = 2, /* update existing element */
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
+ BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */
+ BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */
};
/* flags for BPF_MAP_CREATE command */
@@ -1894,6 +1916,12 @@ union bpf_attr {
__u32 prog_fd;
} prog_stream_read;
+ struct {
+ __u32 map_fd;
+ __u32 prog_fd;
+ __u32 flags;
+ } prog_assoc_struct_ops;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index b66f5fbfbbb2..5846de364209 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -794,6 +794,7 @@ int bpf_link_create(int prog_fd, int target_fd,
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
+ case BPF_TRACE_FSESSION:
case BPF_LSM_MAC:
attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0);
if (!OPTS_ZEROED(opts, tracing))
@@ -1397,3 +1398,22 @@ int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len,
err = sys_bpf(BPF_PROG_STREAM_READ_BY_FD, &attr, attr_sz);
return libbpf_err_errno(err);
}
+
+int bpf_prog_assoc_struct_ops(int prog_fd, int map_fd,
+ struct bpf_prog_assoc_struct_ops_opts *opts)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_assoc_struct_ops);
+ union bpf_attr attr;
+ int err;
+
+ if (!OPTS_VALID(opts, bpf_prog_assoc_struct_ops_opts))
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
+ attr.prog_assoc_struct_ops.map_fd = map_fd;
+ attr.prog_assoc_struct_ops.prog_fd = prog_fd;
+ attr.prog_assoc_struct_ops.flags = OPTS_GET(opts, flags, 0);
+
+ err = sys_bpf(BPF_PROG_ASSOC_STRUCT_OPS, &attr, attr_sz);
+ return libbpf_err_errno(err);
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index e983a3e40d61..2c8e88ddb674 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -289,6 +289,14 @@ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
* Update spin_lock-ed map elements. This must be
* specified if the map value contains a spinlock.
*
+ * **BPF_F_CPU**
+ * As for percpu maps, update value on the specified CPU. And the cpu
+ * info is embedded into the high 32 bits of **opts->elem_flags**.
+ *
+ * **BPF_F_ALL_CPUS**
+ * As for percpu maps, update value across all CPUs. This flag cannot
+ * be used with BPF_F_CPU at the same time.
+ *
* @param fd BPF map file descriptor
* @param keys pointer to an array of *count* keys
* @param values pointer to an array of *count* values
@@ -733,6 +741,27 @@ struct bpf_prog_stream_read_opts {
LIBBPF_API int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len,
struct bpf_prog_stream_read_opts *opts);
+struct bpf_prog_assoc_struct_ops_opts {
+ size_t sz;
+ __u32 flags;
+ size_t :0;
+};
+#define bpf_prog_assoc_struct_ops_opts__last_field flags
+
+/**
+ * @brief **bpf_prog_assoc_struct_ops** associates a BPF program with a
+ * struct_ops map.
+ *
+ * @param prog_fd FD for the BPF program
+ * @param map_fd FD for the struct_ops map to be associated with the BPF program
+ * @param opts optional options, can be NULL
+ *
+ * @return 0 on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_assoc_struct_ops(int prog_fd, int map_fd,
+ struct bpf_prog_assoc_struct_ops_opts *opts);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index d4e4e388e625..c145da05a67c 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -315,8 +315,8 @@ enum libbpf_tristate {
___param, sizeof(___param)); \
})
-extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args,
- __u32 len__sz, void *aux__prog) __weak __ksym;
+extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args,
+ __u32 len__sz) __weak __ksym;
#define bpf_stream_printk(stream_id, fmt, args...) \
({ \
@@ -328,7 +328,7 @@ extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const vo
___bpf_fill(___param, args); \
_Pragma("GCC diagnostic pop") \
\
- bpf_stream_vprintk_impl(stream_id, ___fmt, ___param, sizeof(___param), NULL); \
+ bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param)); \
})
/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 84a4b0abc8be..83fe79ffcb8f 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -92,6 +92,8 @@ struct btf {
* - for split BTF counts number of types added on top of base BTF.
*/
__u32 nr_types;
+ /* the start IDs of named types in sorted BTF */
+ int named_start_id;
/* if not NULL, points to the base BTF on top of which the current
* split BTF is based
*/
@@ -897,46 +899,105 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
return type_id;
}
-__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+static void btf_check_sorted(struct btf *btf)
{
- __u32 i, nr_types = btf__type_cnt(btf);
+ __u32 i, n, named_start_id = 0;
- if (!strcmp(type_name, "void"))
- return 0;
+ n = btf__type_cnt(btf);
+ for (i = btf->start_id + 1; i < n; i++) {
+ struct btf_type *ta = btf_type_by_id(btf, i - 1);
+ struct btf_type *tb = btf_type_by_id(btf, i);
+ const char *na = btf__str_by_offset(btf, ta->name_off);
+ const char *nb = btf__str_by_offset(btf, tb->name_off);
- for (i = 1; i < nr_types; i++) {
- const struct btf_type *t = btf__type_by_id(btf, i);
- const char *name = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(na, nb) > 0)
+ return;
- if (name && !strcmp(type_name, name))
- return i;
+ if (named_start_id == 0 && na[0] != '\0')
+ named_start_id = i - 1;
+ if (named_start_id == 0 && nb[0] != '\0')
+ named_start_id = i;
}
- return libbpf_err(-ENOENT);
+ if (named_start_id)
+ btf->named_start_id = named_start_id;
+}
+
+static __s32 btf_find_type_by_name_bsearch(const struct btf *btf, const char *name,
+ __s32 start_id)
+{
+ const struct btf_type *t;
+ const char *tname;
+ __s32 l, r, m;
+
+ l = start_id;
+ r = btf__type_cnt(btf) - 1;
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ t = btf_type_by_id(btf, m);
+ tname = btf__str_by_offset(btf, t->name_off);
+ if (strcmp(tname, name) >= 0) {
+ if (l == r)
+ return r;
+ r = m;
+ } else {
+ l = m + 1;
+ }
+ }
+
+ return btf__type_cnt(btf);
}
static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
- const char *type_name, __u32 kind)
+ const char *type_name, __s32 kind)
{
- __u32 i, nr_types = btf__type_cnt(btf);
+ __u32 nr_types = btf__type_cnt(btf);
+ const struct btf_type *t;
+ const char *tname;
+ __s32 id;
- if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
- return 0;
+ if (start_id < btf->start_id) {
+ id = btf_find_by_name_kind(btf->base_btf, start_id,
+ type_name, kind);
+ if (id >= 0)
+ return id;
+ start_id = btf->start_id;
+ }
- for (i = start_id; i < nr_types; i++) {
- const struct btf_type *t = btf__type_by_id(btf, i);
- const char *name;
+ if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
+ return 0;
- if (btf_kind(t) != kind)
- continue;
- name = btf__name_by_offset(btf, t->name_off);
- if (name && !strcmp(type_name, name))
- return i;
+ if (btf->named_start_id > 0 && type_name[0]) {
+ start_id = max(start_id, btf->named_start_id);
+ id = btf_find_type_by_name_bsearch(btf, type_name, start_id);
+ for (; id < nr_types; id++) {
+ t = btf__type_by_id(btf, id);
+ tname = btf__str_by_offset(btf, t->name_off);
+ if (strcmp(tname, type_name) != 0)
+ return libbpf_err(-ENOENT);
+ if (kind < 0 || btf_kind(t) == kind)
+ return id;
+ }
+ } else {
+ for (id = start_id; id < nr_types; id++) {
+ t = btf_type_by_id(btf, id);
+ if (kind > 0 && btf_kind(t) != kind)
+ continue;
+ tname = btf__str_by_offset(btf, t->name_off);
+ if (strcmp(tname, type_name) == 0)
+ return id;
+ }
}
return libbpf_err(-ENOENT);
}
+/* the kind value of -1 indicates that kind matching should be skipped */
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+ return btf_find_by_name_kind(btf, 1, type_name, -1);
+}
+
__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
__u32 kind)
{
@@ -1006,6 +1067,7 @@ static struct btf *btf_new_empty(struct btf *base_btf)
btf->fd = -1;
btf->ptr_sz = sizeof(void *);
btf->swapped_endian = false;
+ btf->named_start_id = 0;
if (base_btf) {
btf->base_btf = base_btf;
@@ -1057,6 +1119,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
btf->start_id = 1;
btf->start_str_off = 0;
btf->fd = -1;
+ btf->named_start_id = 0;
if (base_btf) {
btf->base_btf = base_btf;
@@ -1091,6 +1154,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
err = err ?: btf_sanity_check(btf);
if (err)
goto done;
+ btf_check_sorted(btf);
done:
if (err) {
@@ -1715,6 +1779,7 @@ static void btf_invalidate_raw_data(struct btf *btf)
free(btf->raw_data_swapped);
btf->raw_data_swapped = NULL;
}
+ btf->named_start_id = 0;
}
/* Ensure BTF is ready to be modified (by splitting into a three memory
@@ -2069,7 +2134,7 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
int sz, name_off;
/* non-empty name */
- if (!name || !name[0])
+ if (str_is_empty(name))
return libbpf_err(-EINVAL);
/* byte_sz must be power of 2 */
if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
@@ -2117,7 +2182,7 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
int sz, name_off;
/* non-empty name */
- if (!name || !name[0])
+ if (str_is_empty(name))
return libbpf_err(-EINVAL);
/* byte_sz must be one of the explicitly allowed values */
@@ -2172,7 +2237,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
if (!t)
return libbpf_err(-ENOMEM);
- if (name && name[0]) {
+ if (!str_is_empty(name)) {
name_off = btf__add_str(btf, name);
if (name_off < 0)
return name_off;
@@ -2249,7 +2314,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
if (!t)
return libbpf_err(-ENOMEM);
- if (name && name[0]) {
+ if (!str_is_empty(name)) {
name_off = btf__add_str(btf, name);
if (name_off < 0)
return name_off;
@@ -2350,7 +2415,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
if (!m)
return libbpf_err(-ENOMEM);
- if (name && name[0]) {
+ if (!str_is_empty(name)) {
name_off = btf__add_str(btf, name);
if (name_off < 0)
return name_off;
@@ -2388,7 +2453,7 @@ static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz,
if (!t)
return libbpf_err(-ENOMEM);
- if (name && name[0]) {
+ if (!str_is_empty(name)) {
name_off = btf__add_str(btf, name);
if (name_off < 0)
return name_off;
@@ -2446,7 +2511,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
return libbpf_err(-EINVAL);
/* non-empty name */
- if (!name || !name[0])
+ if (str_is_empty(name))
return libbpf_err(-EINVAL);
if (value < INT_MIN || value > UINT_MAX)
return libbpf_err(-E2BIG);
@@ -2523,7 +2588,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
return libbpf_err(-EINVAL);
/* non-empty name */
- if (!name || !name[0])
+ if (str_is_empty(name))
return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
@@ -2563,7 +2628,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
*/
int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
{
- if (!name || !name[0])
+ if (str_is_empty(name))
return libbpf_err(-EINVAL);
switch (fwd_kind) {
@@ -2599,7 +2664,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
*/
int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
{
- if (!name || !name[0])
+ if (str_is_empty(name))
return libbpf_err(-EINVAL);
return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id, 0);
@@ -2651,7 +2716,7 @@ int btf__add_restrict(struct btf *btf, int ref_type_id)
*/
int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
{
- if (!value || !value[0])
+ if (str_is_empty(value))
return libbpf_err(-EINVAL);
return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 0);
@@ -2668,7 +2733,7 @@ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
*/
int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id)
{
- if (!value || !value[0])
+ if (str_is_empty(value))
return libbpf_err(-EINVAL);
return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 1);
@@ -2687,7 +2752,7 @@ int btf__add_func(struct btf *btf, const char *name,
{
int id;
- if (!name || !name[0])
+ if (str_is_empty(name))
return libbpf_err(-EINVAL);
if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
linkage != BTF_FUNC_EXTERN)
@@ -2773,7 +2838,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
if (!p)
return libbpf_err(-ENOMEM);
- if (name && name[0]) {
+ if (!str_is_empty(name)) {
name_off = btf__add_str(btf, name);
if (name_off < 0)
return name_off;
@@ -2808,7 +2873,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
int sz, name_off;
/* non-empty name */
- if (!name || !name[0])
+ if (str_is_empty(name))
return libbpf_err(-EINVAL);
if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
linkage != BTF_VAR_GLOBAL_EXTERN)
@@ -2857,7 +2922,7 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
int sz, name_off;
/* non-empty name */
- if (!name || !name[0])
+ if (str_is_empty(name))
return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
@@ -2934,7 +2999,7 @@ static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
struct btf_type *t;
int sz, value_off;
- if (!value || !value[0] || component_idx < -1)
+ if (str_is_empty(value) || component_idx < -1)
return libbpf_err(-EINVAL);
if (validate_type_id(ref_type_id))
@@ -4431,11 +4496,14 @@ static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2,
struct btf_type *t1, *t2;
int k1, k2;
recur:
- if (depth <= 0)
- return false;
-
t1 = btf_type_by_id(d->btf, id1);
t2 = btf_type_by_id(d->btf, id2);
+ if (depth <= 0) {
+ pr_debug("Reached depth limit for identical type comparison for '%s'/'%s'\n",
+ btf__name_by_offset(d->btf, t1->name_off),
+ btf__name_by_offset(d->btf, t2->name_off));
+ return false;
+ }
k1 = btf_kind(t1);
k2 = btf_kind(t2);
@@ -4497,8 +4565,16 @@ recur:
for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
if (m1->type == m2->type)
continue;
- if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1))
+ if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) {
+ if (t1->name_off) {
+ pr_debug("%s '%s' size=%d vlen=%d id1[%u] id2[%u] shallow-equal but not identical for field#%d '%s'\n",
+ k1 == BTF_KIND_STRUCT ? "STRUCT" : "UNION",
+ btf__name_by_offset(d->btf, t1->name_off),
+ t1->size, btf_vlen(t1), id1, id2, i,
+ btf__name_by_offset(d->btf, m1->name_off));
+ }
return false;
+ }
}
return true;
}
@@ -4739,8 +4815,16 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
canon_m = btf_members(canon_type);
for (i = 0; i < vlen; i++) {
eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
- if (eq <= 0)
+ if (eq <= 0) {
+ if (cand_type->name_off) {
+ pr_debug("%s '%s' size=%d vlen=%d cand_id[%u] canon_id[%u] shallow-equal but not equiv for field#%d '%s': %d\n",
+ cand_kind == BTF_KIND_STRUCT ? "STRUCT" : "UNION",
+ btf__name_by_offset(d->btf, cand_type->name_off),
+ cand_type->size, vlen, cand_id, canon_id, i,
+ btf__name_by_offset(d->btf, cand_m->name_off), eq);
+ }
return eq;
+ }
cand_m++;
canon_m++;
}
@@ -5868,3 +5952,136 @@ int btf__relocate(struct btf *btf, const struct btf *base_btf)
btf->owns_base = false;
return libbpf_err(err);
}
+
+struct btf_permute {
+ struct btf *btf;
+ __u32 *id_map;
+ __u32 start_offs;
+};
+
+/* Callback function to remap individual type ID references */
+static int btf_permute_remap_type_id(__u32 *type_id, void *ctx)
+{
+ struct btf_permute *p = ctx;
+ __u32 new_id = *type_id;
+
+ /* refer to the base BTF or VOID type */
+ if (new_id < p->btf->start_id)
+ return 0;
+
+ if (new_id >= btf__type_cnt(p->btf))
+ return -EINVAL;
+
+ *type_id = p->id_map[new_id - p->btf->start_id + p->start_offs];
+ return 0;
+}
+
+int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt,
+ const struct btf_permute_opts *opts)
+{
+ struct btf_permute p;
+ struct btf_ext *btf_ext;
+ void *nt, *new_types = NULL;
+ __u32 *order_map = NULL;
+ int err = 0, i;
+ __u32 n, id, start_offs = 0;
+
+ if (!OPTS_VALID(opts, btf_permute_opts))
+ return libbpf_err(-EINVAL);
+
+ if (btf__base_btf(btf)) {
+ n = btf->nr_types;
+ } else {
+ if (id_map[0] != 0)
+ return libbpf_err(-EINVAL);
+ n = btf__type_cnt(btf);
+ start_offs = 1;
+ }
+
+ if (id_map_cnt != n)
+ return libbpf_err(-EINVAL);
+
+ /* record the sequence of types */
+ order_map = calloc(id_map_cnt, sizeof(*id_map));
+ if (!order_map) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ new_types = calloc(btf->hdr->type_len, 1);
+ if (!new_types) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ if (btf_ensure_modifiable(btf)) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ for (i = start_offs; i < id_map_cnt; i++) {
+ id = id_map[i];
+ if (id < btf->start_id || id >= btf__type_cnt(btf)) {
+ err = -EINVAL;
+ goto done;
+ }
+ id -= btf->start_id - start_offs;
+ /* cannot be mapped to the same ID */
+ if (order_map[id]) {
+ err = -EINVAL;
+ goto done;
+ }
+ order_map[id] = i + btf->start_id - start_offs;
+ }
+
+ p.btf = btf;
+ p.id_map = id_map;
+ p.start_offs = start_offs;
+ nt = new_types;
+ for (i = start_offs; i < id_map_cnt; i++) {
+ struct btf_field_iter it;
+ const struct btf_type *t;
+ __u32 *type_id;
+ int type_size;
+
+ id = order_map[i];
+ t = btf__type_by_id(btf, id);
+ type_size = btf_type_size(t);
+ memcpy(nt, t, type_size);
+
+ /* fix up referenced IDs for BTF */
+ err = btf_field_iter_init(&it, nt, BTF_FIELD_ITER_IDS);
+ if (err)
+ goto done;
+ while ((type_id = btf_field_iter_next(&it))) {
+ err = btf_permute_remap_type_id(type_id, &p);
+ if (err)
+ goto done;
+ }
+
+ nt += type_size;
+ }
+
+ /* fix up referenced IDs for btf_ext */
+ btf_ext = OPTS_GET(opts, btf_ext, NULL);
+ if (btf_ext) {
+ err = btf_ext_visit_type_ids(btf_ext, btf_permute_remap_type_id, &p);
+ if (err)
+ goto done;
+ }
+
+ for (nt = new_types, i = 0; i < id_map_cnt - start_offs; i++) {
+ btf->type_offs[i] = nt - new_types;
+ nt += btf_type_size(nt);
+ }
+
+ free(order_map);
+ free(btf->types_data);
+ btf->types_data = new_types;
+ return 0;
+
+done:
+ free(order_map);
+ free(new_types);
+ return libbpf_err(err);
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index cc01494d6210..b30008c267c0 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -281,6 +281,48 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
*/
LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf);
+struct btf_permute_opts {
+ size_t sz;
+ /* optional .BTF.ext info along the main BTF info */
+ struct btf_ext *btf_ext;
+ size_t :0;
+};
+#define btf_permute_opts__last_field btf_ext
+
+/**
+ * @brief **btf__permute()** rearranges BTF types in-place according to a specified ID mapping
+ * @param btf BTF object to permute
+ * @param id_map Array mapping original type IDs to new IDs
+ * @param id_map_cnt Number of elements in @id_map
+ * @param opts Optional parameters, including BTF extension data for reference updates
+ * @return 0 on success, negative error code on failure
+ *
+ * **btf__permute()** reorders BTF types based on the provided @id_map array,
+ * updating all internal type references to maintain consistency. The function
+ * operates in-place, modifying the BTF object directly.
+ *
+ * For **base BTF**:
+ * - @id_map must include all types from ID 0 to `btf__type_cnt(btf) - 1`
+ * - @id_map_cnt must be `btf__type_cnt(btf)`
+ * - Mapping is defined as `id_map[original_id] = new_id`
+ * - `id_map[0]` must be 0 (void type cannot be moved)
+ *
+ * For **split BTF**:
+ * - @id_map must include only split types (types added on top of the base BTF)
+ * - @id_map_cnt must be `btf__type_cnt(btf) - btf__type_cnt(btf__base_btf(btf))`
+ * - Mapping is defined as `id_map[original_id - start_id] = new_id`
+ * - `start_id` equals `btf__type_cnt(btf__base_btf(btf))`
+ *
+ * After permutation, all type references within the BTF data and optional
+ * BTF extension (if provided via @opts) are updated automatically.
+ *
+ * On error, returns a negative error code and sets errno:
+ * - `-EINVAL`: Invalid parameters or invalid ID mapping
+ * - `-ENOMEM`: Memory allocation failure
+ */
+LIBBPF_API int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt,
+ const struct btf_permute_opts *opts);
+
struct btf_dump;
struct btf_dump_opts {
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 6388392f49a0..53c6624161d7 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1762,9 +1762,18 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d,
__u16 left_shift_bits, right_shift_bits;
const __u8 *bytes = data;
__u8 nr_copy_bits;
+ __u8 start_bit, nr_bytes;
__u64 num = 0;
int i;
+ /* Calculate how many bytes cover the bitfield */
+ start_bit = bits_offset % 8;
+ nr_bytes = (start_bit + bit_sz + 7) / 8;
+
+ /* Bound check */
+ if (data + nr_bytes > d->typed_dump->data_end)
+ return -E2BIG;
+
/* Maximum supported bitfield size is 64 bits */
if (t->size > 8) {
pr_warn("unexpected bitfield size %d\n", t->size);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f4dfd23148a5..0c8bf0b5cce4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -115,6 +115,7 @@ static const char * const attach_type_name[] = {
[BPF_TRACE_FENTRY] = "trace_fentry",
[BPF_TRACE_FEXIT] = "trace_fexit",
[BPF_MODIFY_RETURN] = "modify_return",
+ [BPF_TRACE_FSESSION] = "trace_fsession",
[BPF_LSM_MAC] = "lsm_mac",
[BPF_LSM_CGROUP] = "lsm_cgroup",
[BPF_SK_LOOKUP] = "sk_lookup",
@@ -380,7 +381,7 @@ struct reloc_desc {
const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
struct {
int map_idx;
- int sym_off;
+ unsigned int sym_off;
/*
* The following two fields can be unionized, as the
* ext_idx field is used for extern symbols, and the
@@ -757,13 +758,14 @@ struct bpf_object {
int arena_map_idx;
void *arena_data;
size_t arena_data_sz;
+ size_t arena_data_off;
void *jumptables_data;
size_t jumptables_data_sz;
struct {
struct bpf_program *prog;
- int sym_off;
+ unsigned int sym_off;
int fd;
} *jumptable_maps;
size_t jumptable_map_cnt;
@@ -2903,7 +2905,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
var_extra = btf_var(var);
map_name = btf__name_by_offset(obj->btf, var->name_off);
- if (map_name == NULL || map_name[0] == '\0') {
+ if (str_is_empty(map_name)) {
pr_warn("map #%d: empty name.\n", var_idx);
return -EINVAL;
}
@@ -2991,10 +2993,11 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
void *data, size_t data_sz)
{
const long page_sz = sysconf(_SC_PAGE_SIZE);
+ const size_t data_alloc_sz = roundup(data_sz, page_sz);
size_t mmap_sz;
mmap_sz = bpf_map_mmap_sz(map);
- if (roundup(data_sz, page_sz) > mmap_sz) {
+ if (data_alloc_sz > mmap_sz) {
pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
sec_name, mmap_sz, data_sz);
return -E2BIG;
@@ -3006,6 +3009,9 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
memcpy(obj->arena_data, data, data_sz);
obj->arena_data_sz = data_sz;
+ /* place globals at the end of the arena */
+ obj->arena_data_off = mmap_sz - data_alloc_sz;
+
/* make bpf_map__init_value() work for ARENA maps */
map->mmaped = obj->arena_data;
@@ -4276,7 +4282,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
if (!sym_is_extern(sym))
continue;
ext_name = elf_sym_str(obj, sym->st_name);
- if (!ext_name || !ext_name[0])
+ if (str_is_empty(ext_name))
continue;
ext = obj->externs;
@@ -4663,7 +4669,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
reloc_desc->type = RELO_DATA;
reloc_desc->insn_idx = insn_idx;
reloc_desc->map_idx = obj->arena_map_idx;
- reloc_desc->sym_off = sym->st_value;
+ reloc_desc->sym_off = sym->st_value + obj->arena_data_off;
map = &obj->maps[obj->arena_map_idx];
pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n",
@@ -5624,7 +5630,8 @@ retry:
return err;
}
if (obj->arena_data) {
- memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
+ memcpy(map->mmaped + obj->arena_data_off, obj->arena_data,
+ obj->arena_data_sz);
zfree(&obj->arena_data);
}
}
@@ -6192,7 +6199,7 @@ static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
}
-static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off)
+static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, unsigned int sym_off)
{
size_t i;
@@ -6210,7 +6217,7 @@ static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym
return -ENOENT;
}
-static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd)
+static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, unsigned int sym_off, int map_fd)
{
size_t cnt = obj->jumptable_map_cnt;
size_t size = sizeof(obj->jumptable_maps[0]);
@@ -6244,7 +6251,7 @@ static int find_subprog_idx(struct bpf_program *prog, int insn_idx)
static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo)
{
const __u32 jt_entry_size = 8;
- int sym_off = relo->sym_off;
+ unsigned int sym_off = relo->sym_off;
int jt_size = relo->sym_size;
__u32 max_entries = jt_size / jt_entry_size;
__u32 value_size = sizeof(struct bpf_insn_array_value);
@@ -6260,7 +6267,7 @@ static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struc
return map_fd;
if (sym_off % jt_entry_size) {
- pr_warn("map '.jumptables': jumptable start %d should be multiple of %u\n",
+ pr_warn("map '.jumptables': jumptable start %u should be multiple of %u\n",
sym_off, jt_entry_size);
return -EINVAL;
}
@@ -6316,7 +6323,7 @@ static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struc
* should contain values that fit in u32.
*/
if (insn_off > UINT32_MAX) {
- pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %d\n",
+ pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %u\n",
(long long)jt[i], sym_off + i * jt_entry_size);
err = -EINVAL;
goto err_close;
@@ -9853,6 +9860,8 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
+ SEC_DEF("fsession+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF, attach_trace),
+ SEC_DEF("fsession.s+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace),
SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
@@ -10913,7 +10922,7 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
}
static int validate_map_op(const struct bpf_map *map, size_t key_sz,
- size_t value_sz, bool check_value_sz)
+ size_t value_sz, bool check_value_sz, __u64 flags)
{
if (!map_is_created(map)) /* map is not yet created */
return -ENOENT;
@@ -10940,6 +10949,20 @@ static int validate_map_op(const struct bpf_map *map, size_t key_sz,
int num_cpu = libbpf_num_possible_cpus();
size_t elem_sz = roundup(map->def.value_size, 8);
+ if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) {
+ if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS)) {
+ pr_warn("map '%s': BPF_F_CPU and BPF_F_ALL_CPUS are mutually exclusive\n",
+ map->name);
+ return -EINVAL;
+ }
+ if (map->def.value_size != value_sz) {
+ pr_warn("map '%s': unexpected value size %zu provided for either BPF_F_CPU or BPF_F_ALL_CPUS, expected %u\n",
+ map->name, value_sz, map->def.value_size);
+ return -EINVAL;
+ }
+ break;
+ }
+
if (value_sz != num_cpu * elem_sz) {
pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
@@ -10964,7 +10987,7 @@ int bpf_map__lookup_elem(const struct bpf_map *map,
{
int err;
- err = validate_map_op(map, key_sz, value_sz, true);
+ err = validate_map_op(map, key_sz, value_sz, true, flags);
if (err)
return libbpf_err(err);
@@ -10977,7 +11000,7 @@ int bpf_map__update_elem(const struct bpf_map *map,
{
int err;
- err = validate_map_op(map, key_sz, value_sz, true);
+ err = validate_map_op(map, key_sz, value_sz, true, flags);
if (err)
return libbpf_err(err);
@@ -10989,7 +11012,7 @@ int bpf_map__delete_elem(const struct bpf_map *map,
{
int err;
- err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
+ err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, flags);
if (err)
return libbpf_err(err);
@@ -11002,7 +11025,7 @@ int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
{
int err;
- err = validate_map_op(map, key_sz, value_sz, true);
+ err = validate_map_op(map, key_sz, value_sz, true, flags);
if (err)
return libbpf_err(err);
@@ -11014,7 +11037,7 @@ int bpf_map__get_next_key(const struct bpf_map *map,
{
int err;
- err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
+ err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, 0);
if (err)
return libbpf_err(err);
@@ -14134,6 +14157,37 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
return 0;
}
+int bpf_program__assoc_struct_ops(struct bpf_program *prog, struct bpf_map *map,
+ struct bpf_prog_assoc_struct_ops_opts *opts)
+{
+ int prog_fd, map_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("prog '%s': can't associate BPF program without FD (was it loaded?)\n",
+ prog->name);
+ return libbpf_err(-EINVAL);
+ }
+
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+ pr_warn("prog '%s': can't associate struct_ops program\n", prog->name);
+ return libbpf_err(-EINVAL);
+ }
+
+ map_fd = bpf_map__fd(map);
+ if (map_fd < 0) {
+ pr_warn("map '%s': can't associate BPF map without FD (was it created?)\n", map->name);
+ return libbpf_err(-EINVAL);
+ }
+
+ if (!bpf_map__is_struct_ops(map)) {
+ pr_warn("map '%s': can't associate non-struct_ops map\n", map->name);
+ return libbpf_err(-EINVAL);
+ }
+
+ return bpf_prog_assoc_struct_ops(prog_fd, map_fd, opts);
+}
+
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
{
int err = 0, n, len, start, end = -1;
@@ -14399,7 +14453,10 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
if (!map_skel->mmaped)
continue;
- *map_skel->mmaped = map->mmaped;
+ if (map->def.type == BPF_MAP_TYPE_ARENA)
+ *map_skel->mmaped = map->mmaped + map->obj->arena_data_off;
+ else
+ *map_skel->mmaped = map->mmaped;
}
return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 65e68e964b89..dfc37a615578 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1006,6 +1006,22 @@ LIBBPF_API int
bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
const char *attach_func_name);
+struct bpf_prog_assoc_struct_ops_opts; /* defined in bpf.h */
+
+/**
+ * @brief **bpf_program__assoc_struct_ops()** associates a BPF program with a
+ * struct_ops map.
+ *
+ * @param prog BPF program
+ * @param map struct_ops map to be associated with the BPF program
+ * @param opts optional options, can be NULL
+ *
+ * @return 0, on success; negative error code, otherwise
+ */
+LIBBPF_API int
+bpf_program__assoc_struct_ops(struct bpf_program *prog, struct bpf_map *map,
+ struct bpf_prog_assoc_struct_ops_opts *opts);
+
/**
* @brief **bpf_object__find_map_by_name()** returns BPF map of
* the given name, if it exists within the passed BPF object
@@ -1200,12 +1216,13 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
* @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
* @param value pointer to memory in which looked up value will be stored
* @param value_sz size in byte of value data memory; it has to match BPF map
- * definition's **value_size**. For per-CPU BPF maps value size has to be
- * a product of BPF map value size and number of possible CPUs in the system
- * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for
- * per-CPU values value size has to be aligned up to closest 8 bytes for
- * alignment reasons, so expected size is: `round_up(value_size, 8)
- * * libbpf_num_possible_cpus()`.
+ * definition's **value_size**. For per-CPU BPF maps, value size can be
+ * `value_size` if either **BPF_F_CPU** or **BPF_F_ALL_CPUS** is specified
+ * in **flags**, otherwise a product of BPF map value size and number of
+ * possible CPUs in the system (could be fetched with
+ * **libbpf_num_possible_cpus()**). Note also that for per-CPU values value
+ * size has to be aligned up to closest 8 bytes, so expected size is:
+ * `round_up(value_size, 8) * libbpf_num_possible_cpus()`.
* @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
@@ -1223,13 +1240,7 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map,
* @param key pointer to memory containing bytes of the key
* @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
* @param value pointer to memory containing bytes of the value
- * @param value_sz size in byte of value data memory; it has to match BPF map
- * definition's **value_size**. For per-CPU BPF maps value size has to be
- * a product of BPF map value size and number of possible CPUs in the system
- * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for
- * per-CPU values value size has to be aligned up to closest 8 bytes for
- * alignment reasons, so expected size is: `round_up(value_size, 8)
- * * libbpf_num_possible_cpus()`.
+ * @param value_sz refer to **bpf_map__lookup_elem**'s description.'
* @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 8ed8749907d4..d18fbcea7578 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -451,4 +451,7 @@ LIBBPF_1.7.0 {
global:
bpf_map__set_exclusive_program;
bpf_map__exclusive_program;
+ bpf_prog_assoc_struct_ops;
+ bpf_program__assoc_struct_ops;
+ btf__permute;
} LIBBPF_1.6.0;
diff --git a/tools/sched_ext/README.md b/tools/sched_ext/README.md
index 16a42e4060f6..56a9d1557ac4 100644
--- a/tools/sched_ext/README.md
+++ b/tools/sched_ext/README.md
@@ -65,7 +65,6 @@ It's also recommended that you also include the following Kconfig options:
```
CONFIG_BPF_JIT_ALWAYS_ON=y
CONFIG_BPF_JIT_DEFAULT_ON=y
-CONFIG_PAHOLE_HAS_SPLIT_BTF=y
CONFIG_PAHOLE_HAS_BTF_TAG=y
```
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 19c1638e312a..a3ea98211ea6 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -23,7 +23,6 @@ test_tcpnotify_user
test_libbpf
xdping
test_cpp
-test_progs_verification_cert
*.d
*.subskel.h
*.skel.h
@@ -45,3 +44,6 @@ xdp_synproxy
xdp_hw_metadata
xdp_features
verification_cert.h
+*.BTF
+*.BTF_ids
+*.BTF.base
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index a17baf8c6fd7..f7e1e5f5511c 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,4 +1,5 @@
# TEMPORARY
# Alphabetical order
+exe_ctx # execution context check (e.g., hardirq, softirq, etc)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4aa60e83ff19..c6bf4dfb1495 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -4,6 +4,7 @@ include ../../../scripts/Makefile.arch
include ../../../scripts/Makefile.include
CXX ?= $(CROSS_COMPILE)g++
+OBJCOPY ?= $(CROSS_COMPILE)objcopy
CURDIR := $(abspath .)
TOOLSDIR := $(abspath ../../..)
@@ -107,8 +108,6 @@ TEST_PROGS := test_kmod.sh \
test_xdping.sh \
test_bpftool_build.sh \
test_bpftool.sh \
- test_bpftool_map.sh \
- test_bpftool_metadata.sh \
test_doc_build.sh \
test_xsk.sh \
test_xdp_features.sh
@@ -643,6 +642,9 @@ $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
) > $$@)
endif
+$(TRUNNER_OUTPUT)/resolve_btfids.test.o: $(RESOLVE_BTFIDS) $(TRUNNER_OUTPUT)/btf_data.bpf.o
+$(TRUNNER_OUTPUT)/resolve_btfids.test.o: private TEST_NEEDS_BTFIDS = 1
+
# compile individual test files
# Note: we cd into output directory to ensure embedded BPF object is found
$(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
@@ -650,6 +652,10 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
| $(TRUNNER_OUTPUT)/%.test.d
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -MMD -MT $$@ -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+ $$(if $$(TEST_NEEDS_BTFIDS), \
+ $$(call msg,BTFIDS,$(TRUNNER_BINARY),$$@) \
+ $(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@; \
+ $(RESOLVE_BTFIDS) --patch_btfids $$@.BTF_ids $$@)
$(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \
$(TRUNNER_TESTS_DIR)/%.c \
@@ -695,13 +701,11 @@ $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
$(TRUNNER_LIB_OBJS) \
- $(RESOLVE_BTFIDS) \
$(TRUNNER_BPFTOOL) \
$(OUTPUT)/veristat \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@
- $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
$(OUTPUT)/$(if $2,$2/)bpftool
@@ -716,9 +720,12 @@ $(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
$(Q)mkdir -p $(BUILD_DIR)
$(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
+# Generates a header with C array declaration, containing test_progs_verification_cert bytes
$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
- $(Q)ln -fs $< test_progs_verification_cert && \
- xxd -i test_progs_verification_cert > $@
+ $(Q)(echo "unsigned char test_progs_verification_cert[] = {"; \
+ hexdump -v -e '12/1 " 0x%02x," "\n"' $< | sed 's/0x ,//g; $$s/,$$//'; \
+ echo "};"; \
+ echo "unsigned int test_progs_verification_cert_len = $$(wc -c < $<);") > $@
# Define test_progs test runner.
TRUNNER_TESTS_DIR := prog_tests
@@ -741,7 +748,8 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
json_writer.c \
$(VERIFY_SIG_HDR) \
flow_dissector_load.h \
- ip_check_defrag_frags.h
+ ip_check_defrag_frags.h \
+ bpftool_helpers.c
TRUNNER_LIB_SOURCES := find_bit.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/liburandom_read.so \
@@ -890,10 +898,10 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature bpftool $(TEST_KMOD_TARGETS) \
$(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \
+ *.BTF *.BTF_ids *.BTF.base \
no_alu32 cpuv4 bpf_gcc \
liburandom_read.so) \
- $(OUTPUT)/FEATURE-DUMP.selftests \
- test_progs_verification_cert
+ $(OUTPUT)/FEATURE-DUMP.selftests
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index bd29bb2e6cb5..8368bd3a0665 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -265,6 +265,7 @@ static const struct argp_option opts[] = {
{ "verbose", 'v', NULL, 0, "Verbose debug output"},
{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
{ "quiet", 'q', NULL, 0, "Be more quiet"},
+ { "stacktrace", 's', NULL, 0, "Get stack trace"},
{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
"Set of CPUs for producer threads; implies --affinity"},
{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
@@ -350,6 +351,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'q':
env.quiet = true;
break;
+ case 's':
+ env.stacktrace = true;
+ break;
case ARG_PROD_AFFINITY_SET:
env.affinity = true;
if (parse_num_list(arg, &env.prod_cpus.cpus,
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index bea323820ffb..7cf21936e7ed 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -26,6 +26,7 @@ struct env {
bool list;
bool affinity;
bool quiet;
+ bool stacktrace;
int consumer_cnt;
int producer_cnt;
int nr_cpus;
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 34018fc3927f..aeec9edd3851 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -146,6 +146,7 @@ static void setup_ctx(void)
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
ctx.skel->rodata->batch_iters = args.batch_iters;
+ ctx.skel->rodata->stacktrace = env.stacktrace;
}
static void load_ctx(void)
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index 83e05e837871..123b7feb6935 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -49,6 +49,11 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
done
+header "Perfbuf, multi-producer"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+ summarize "pb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 --rb-sample-rate 50 pb-libbpf)"
+done
+
header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 2cd9165c7348..4b7210c318dd 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -580,11 +580,6 @@ extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
-extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, void *value),
- unsigned int flags__k, void *aux__ign) __ksym;
-#define bpf_wq_set_callback(timer, cb, flags) \
- bpf_wq_set_callback_impl(timer, cb, flags, NULL)
struct bpf_iter_kmem_cache;
extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym;
@@ -615,9 +610,17 @@ extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
+#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
+
extern bool CONFIG_PREEMPT_RT __kconfig __weak;
#ifdef bpf_target_x86
-extern const int __preempt_count __ksym;
+extern const int __preempt_count __ksym __weak;
+
+struct pcpu_hot___local {
+ int preempt_count;
+} __attribute__((preserve_access_index));
+
+extern struct pcpu_hot___local pcpu_hot __ksym __weak;
#endif
struct task_struct___preempt_rt {
@@ -627,7 +630,19 @@ struct task_struct___preempt_rt {
static inline int get_preempt_count(void)
{
#if defined(bpf_target_x86)
- return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+ /* By default, read the per-CPU __preempt_count. */
+ if (bpf_ksym_exists(&__preempt_count))
+ return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+
+ /*
+ * If __preempt_count does not exist, try to read preempt_count under
+ * struct pcpu_hot. Between v6.1 and v6.14 -- more specifically,
+ * [64701838bf057, 46e8fff6d45fe), preempt_count had been managed
+ * under struct pcpu_hot.
+ */
+ if (bpf_core_field_exists(pcpu_hot.preempt_count))
+ return ((struct pcpu_hot___local *)
+ bpf_this_cpu_ptr(&pcpu_hot))->preempt_count;
#elif defined(bpf_target_arm64)
return bpf_get_current_task_btf()->thread_info.preempt.count;
#endif
@@ -653,4 +668,60 @@ static inline int bpf_in_interrupt(void)
(tsk->softirq_disable_cnt & SOFTIRQ_MASK);
}
+/* Description
+ * Report whether it is in NMI context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_nmi(void)
+{
+ return get_preempt_count() & NMI_MASK;
+}
+
+/* Description
+ * Report whether it is in hard IRQ context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_hardirq(void)
+{
+ return get_preempt_count() & HARDIRQ_MASK;
+}
+
+/* Description
+ * Report whether it is in softirq context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_serving_softirq(void)
+{
+ struct task_struct___preempt_rt *tsk;
+ int pcnt;
+
+ pcnt = get_preempt_count();
+ if (!CONFIG_PREEMPT_RT)
+ return (pcnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;
+
+ tsk = (void *) bpf_get_current_task_btf();
+ return (tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;
+}
+
+/* Description
+ * Report whether it is in task context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_task(void)
+{
+ struct task_struct___preempt_rt *tsk;
+ int pcnt;
+
+ pcnt = get_preempt_count();
+ if (!CONFIG_PREEMPT_RT)
+ return !(pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+
+ tsk = (void *) bpf_get_current_task_btf();
+ return !((pcnt & (NMI_MASK | HARDIRQ_MASK)) |
+ ((tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET));
+}
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index e0189254bb6e..7dad01439391 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -79,9 +79,6 @@ extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
struct bpf_dynptr *sig_ptr,
struct bpf_key *trusted_keyring) __ksym;
-extern bool bpf_session_is_return(void) __ksym __weak;
-extern __u64 *bpf_session_cookie(void) __ksym __weak;
-
struct dentry;
/* Description
* Returns xattr of a dentry
diff --git a/tools/testing/selftests/bpf/bpftool_helpers.c b/tools/testing/selftests/bpf/bpftool_helpers.c
new file mode 100644
index 000000000000..a5824945a4a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpftool_helpers.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "bpftool_helpers.h"
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define BPFTOOL_PATH_MAX_LEN 64
+#define BPFTOOL_FULL_CMD_MAX_LEN 512
+
+#define BPFTOOL_DEFAULT_PATH "tools/sbin/bpftool"
+
+static int detect_bpftool_path(char *buffer)
+{
+ char tmp[BPFTOOL_PATH_MAX_LEN];
+
+ /* Check default bpftool location (will work if we are running the
+ * default flavor of test_progs)
+ */
+ snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "./%s", BPFTOOL_DEFAULT_PATH);
+ if (access(tmp, X_OK) == 0) {
+ strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN);
+ return 0;
+ }
+
+ /* Check alternate bpftool location (will work if we are running a
+ * specific flavor of test_progs, e.g. cpuv4 or no_alu32)
+ */
+ snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "../%s", BPFTOOL_DEFAULT_PATH);
+ if (access(tmp, X_OK) == 0) {
+ strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN);
+ return 0;
+ }
+
+ /* Failed to find bpftool binary */
+ return 1;
+}
+
+static int run_command(char *args, char *output_buf, size_t output_max_len)
+{
+ static char bpftool_path[BPFTOOL_PATH_MAX_LEN] = {0};
+ bool suppress_output = !(output_buf && output_max_len);
+ char command[BPFTOOL_FULL_CMD_MAX_LEN];
+ FILE *f;
+ int ret;
+
+ /* Detect and cache bpftool binary location */
+ if (bpftool_path[0] == 0 && detect_bpftool_path(bpftool_path))
+ return 1;
+
+ ret = snprintf(command, BPFTOOL_FULL_CMD_MAX_LEN, "%s %s%s",
+ bpftool_path, args,
+ suppress_output ? " > /dev/null 2>&1" : "");
+
+ f = popen(command, "r");
+ if (!f)
+ return 1;
+
+ if (!suppress_output)
+ fread(output_buf, 1, output_max_len, f);
+ ret = pclose(f);
+
+ return ret;
+}
+
+int run_bpftool_command(char *args)
+{
+ return run_command(args, NULL, 0);
+}
+
+int get_bpftool_command_output(char *args, char *output_buf, size_t output_max_len)
+{
+ return run_command(args, output_buf, output_max_len);
+}
+
diff --git a/tools/testing/selftests/bpf/bpftool_helpers.h b/tools/testing/selftests/bpf/bpftool_helpers.h
new file mode 100644
index 000000000000..dec1ba201410
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpftool_helpers.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+#define MAX_BPFTOOL_CMD_LEN (256)
+
+int run_bpftool_command(char *args);
+int get_bpftool_command_output(char *args, char *output_buf, size_t output_max_len);
diff --git a/tools/testing/selftests/bpf/cgroup_iter_memcg.h b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
new file mode 100644
index 000000000000..3f59b127943b
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef __CGROUP_ITER_MEMCG_H
+#define __CGROUP_ITER_MEMCG_H
+
+struct memcg_query {
+ /* some node_stat_item's */
+ unsigned long nr_anon_mapped;
+ unsigned long nr_shmem;
+ unsigned long nr_file_pages;
+ unsigned long nr_file_mapped;
+ /* some memcg_stat_item */
+ unsigned long memcg_kmem;
+ /* some vm_event_item */
+ unsigned long pgfault;
+};
+
+#endif /* __CGROUP_ITER_MEMCG_H */
diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
deleted file mode 100644
index a4121d2248ac..000000000000
--- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c
+++ /dev/null
@@ -1,128 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
-#define _GNU_SOURCE
-#include <sched.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <errno.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_util.h"
-#include "test_maps.h"
-#include "task_local_storage_helpers.h"
-#include "read_bpf_task_storage_busy.skel.h"
-
-struct lookup_ctx {
- bool start;
- bool stop;
- int pid_fd;
- int map_fd;
- int loop;
-};
-
-static void *lookup_fn(void *arg)
-{
- struct lookup_ctx *ctx = arg;
- long value;
- int i = 0;
-
- while (!ctx->start)
- usleep(1);
-
- while (!ctx->stop && i++ < ctx->loop)
- bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value);
- return NULL;
-}
-
-static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr)
-{
- unsigned int i;
-
- ctx->stop = true;
- ctx->start = true;
- for (i = 0; i < nr; i++)
- pthread_join(tids[i], NULL);
-}
-
-void test_task_storage_map_stress_lookup(void)
-{
-#define MAX_NR_THREAD 4096
- unsigned int i, nr = 256, loop = 8192, cpu = 0;
- struct read_bpf_task_storage_busy *skel;
- pthread_t tids[MAX_NR_THREAD];
- struct lookup_ctx ctx;
- cpu_set_t old, new;
- const char *cfg;
- int err;
-
- cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
- if (cfg) {
- nr = atoi(cfg);
- if (nr > MAX_NR_THREAD)
- nr = MAX_NR_THREAD;
- }
- cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
- if (cfg)
- loop = atoi(cfg);
- cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
- if (cfg)
- cpu = atoi(cfg);
-
- skel = read_bpf_task_storage_busy__open_and_load();
- err = libbpf_get_error(skel);
- CHECK(err, "open_and_load", "error %d\n", err);
-
- /* Only for a fully preemptible kernel */
- if (!skel->kconfig->CONFIG_PREEMPTION) {
- printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__);
- read_bpf_task_storage_busy__destroy(skel);
- skips++;
- return;
- }
-
- /* Save the old affinity setting */
- sched_getaffinity(getpid(), sizeof(old), &old);
-
- /* Pinned on a specific CPU */
- CPU_ZERO(&new);
- CPU_SET(cpu, &new);
- sched_setaffinity(getpid(), sizeof(new), &new);
-
- ctx.start = false;
- ctx.stop = false;
- ctx.pid_fd = sys_pidfd_open(getpid(), 0);
- ctx.map_fd = bpf_map__fd(skel->maps.task);
- ctx.loop = loop;
- for (i = 0; i < nr; i++) {
- err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
- if (err) {
- abort_lookup(&ctx, tids, i);
- CHECK(err, "pthread_create", "error %d\n", err);
- goto out;
- }
- }
-
- ctx.start = true;
- for (i = 0; i < nr; i++)
- pthread_join(tids[i], NULL);
-
- skel->bss->pid = getpid();
- err = read_bpf_task_storage_busy__attach(skel);
- CHECK(err, "attach", "error %d\n", err);
-
- /* Trigger program */
- sys_gettid();
- skel->bss->pid = 0;
-
- CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
-out:
- read_bpf_task_storage_busy__destroy(skel);
- /* Restore affinity setting */
- sched_setaffinity(getpid(), sizeof(old), &old);
- printf("%s:PASS\n", __func__);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c
index d15867cddde0..4f2866a615ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c
@@ -27,17 +27,23 @@ static int list_sum(struct arena_list_head *head)
return sum;
}
-static void test_arena_list_add_del(int cnt)
+static void test_arena_list_add_del(int cnt, bool nonsleepable)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
struct arena_list *skel;
int expected_sum = (u64)cnt * (cnt - 1) / 2;
int ret, sum;
- skel = arena_list__open_and_load();
- if (!ASSERT_OK_PTR(skel, "arena_list__open_and_load"))
+ skel = arena_list__open();
+ if (!ASSERT_OK_PTR(skel, "arena_list__open"))
return;
+ skel->rodata->nonsleepable = nonsleepable;
+
+ ret = arena_list__load(skel);
+ if (!ASSERT_OK(ret, "arena_list__load"))
+ goto out;
+
skel->bss->cnt = cnt;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_add), &opts);
ASSERT_OK(ret, "ret_add");
@@ -65,7 +71,11 @@ out:
void test_arena_list(void)
{
if (test__start_subtest("arena_list_1"))
- test_arena_list_add_del(1);
+ test_arena_list_add_del(1, false);
if (test__start_subtest("arena_list_1000"))
- test_arena_list_add_del(1000);
+ test_arena_list_add_del(1000, false);
+ if (test__start_subtest("arena_list_1_nonsleepable"))
+ test_arena_list_add_del(1, true);
+ if (test__start_subtest("arena_list_1000_nonsleepable"))
+ test_arena_list_add_del(1000, true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
index d138cc7b1bda..75b0cf2467ab 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
@@ -240,6 +240,208 @@ static void check_nonstatic_global_other_sec(struct bpf_gotox *skel)
bpf_link__destroy(link);
}
+/*
+ * The following subtests do not use skeleton rather than to check
+ * if the test should be skipped.
+ */
+
+static int create_jt_map(__u32 max_entries)
+{
+ const char *map_name = "jt";
+ __u32 key_size = 4;
+ __u32 value_size = sizeof(struct bpf_insn_array_value);
+
+ return bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, map_name,
+ key_size, value_size, max_entries, NULL);
+}
+
+static int prog_load(struct bpf_insn *insns, __u32 insn_cnt)
+{
+ return bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
+}
+
+static int __check_ldimm64_off_prog_load(__u32 max_entries, __u32 off)
+{
+ struct bpf_insn insns[] = {
+ BPF_LD_IMM64_RAW(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int map_fd, ret;
+
+ map_fd = create_jt_map(max_entries);
+ if (!ASSERT_GE(map_fd, 0, "create_jt_map"))
+ return -1;
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) {
+ close(map_fd);
+ return -1;
+ }
+
+ insns[0].imm = map_fd;
+ insns[1].imm = off;
+
+ ret = prog_load(insns, ARRAY_SIZE(insns));
+ close(map_fd);
+ return ret;
+}
+
+/*
+ * Check that loads from an instruction array map are only allowed with offsets
+ * which are multiples of 8 and do not point to outside of the map.
+ */
+static void check_ldimm64_off_load(struct bpf_gotox *skel __always_unused)
+{
+ const __u32 max_entries = 10;
+ int prog_fd;
+ __u32 off;
+
+ for (off = 0; off < max_entries; off++) {
+ prog_fd = __check_ldimm64_off_prog_load(max_entries, off * 8);
+ if (!ASSERT_GE(prog_fd, 0, "__check_ldimm64_off_prog_load"))
+ return;
+ close(prog_fd);
+ }
+
+ prog_fd = __check_ldimm64_off_prog_load(max_entries, 7 /* not a multiple of 8 */);
+ if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_off_prog_load: should be -EACCES")) {
+ close(prog_fd);
+ return;
+ }
+
+ prog_fd = __check_ldimm64_off_prog_load(max_entries, max_entries * 8 /* too large */);
+ if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_off_prog_load: should be -EACCES")) {
+ close(prog_fd);
+ return;
+ }
+}
+
+static int __check_ldimm64_gotox_prog_load(struct bpf_insn *insns,
+ __u32 insn_cnt,
+ __u32 off1, __u32 off2)
+{
+ const __u32 values[] = {5, 7, 9, 11, 13, 15};
+ const __u32 max_entries = ARRAY_SIZE(values);
+ struct bpf_insn_array_value val = {};
+ int map_fd, ret, i;
+
+ map_fd = create_jt_map(max_entries);
+ if (!ASSERT_GE(map_fd, 0, "create_jt_map"))
+ return -1;
+
+ for (i = 0; i < max_entries; i++) {
+ val.orig_off = values[i];
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0,
+ "bpf_map_update_elem")) {
+ close(map_fd);
+ return -1;
+ }
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) {
+ close(map_fd);
+ return -1;
+ }
+
+ /* r1 = &map + offset1 */
+ insns[0].imm = map_fd;
+ insns[1].imm = off1;
+
+ /* r1 += off2 */
+ insns[2].imm = off2;
+
+ ret = prog_load(insns, insn_cnt);
+ close(map_fd);
+ return ret;
+}
+
+static void reject_offsets(struct bpf_insn *insns, __u32 insn_cnt, __u32 off1, __u32 off2)
+{
+ int prog_fd;
+
+ prog_fd = __check_ldimm64_gotox_prog_load(insns, insn_cnt, off1, off2);
+ if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_gotox_prog_load"))
+ close(prog_fd);
+}
+
+/*
+ * Verify a bit more complex programs which include indirect jumps
+ * and with jump tables loaded with a non-zero offset
+ */
+static void check_ldimm64_off_gotox(struct bpf_gotox *skel __always_unused)
+{
+ struct bpf_insn insns[] = {
+ /*
+ * The following instructions perform an indirect jump to
+ * labels below. Thus valid offsets in the map are {0,...,5}.
+ * The program rewrites the offsets in the instructions below:
+ * r1 = &map + offset1
+ * r1 += offset2
+ * r1 = *r1
+ * gotox r1
+ */
+ BPF_LD_IMM64_RAW(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_1, 0, 0, 0),
+
+ /* case 0: */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* case 1: */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* case 2: */
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ /* case 3: */
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* case 4: */
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_EXIT_INSN(),
+ /* default: */
+ BPF_MOV64_IMM(BPF_REG_0, 5),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, err;
+ __u32 off1, off2;
+
+ /* allow all combinations off1 + off2 < 6 */
+ for (off1 = 0; off1 < 6; off1++) {
+ for (off2 = 0; off1 + off2 < 6; off2++) {
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ prog_fd = __check_ldimm64_gotox_prog_load(insns, ARRAY_SIZE(insns),
+ off1 * 8, off2 * 8);
+ if (!ASSERT_GE(prog_fd, 0, "__check_ldimm64_gotox_prog_load"))
+ return;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err")) {
+ close(prog_fd);
+ return;
+ }
+
+ if (!ASSERT_EQ(topts.retval, off1 + off2, "test_run_opts retval")) {
+ close(prog_fd);
+ return;
+ }
+
+ close(prog_fd);
+ }
+ }
+
+ /* reject off1 + off2 >= 6 */
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 8 * 3);
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 7, 8 * 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 0, 8 * 7);
+
+ /* reject (off1 + off2) % 8 != 0 */
+ reject_offsets(insns, ARRAY_SIZE(insns), 3, 3);
+ reject_offsets(insns, ARRAY_SIZE(insns), 7, 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 0, 7);
+}
+
void test_bpf_gotox(void)
{
struct bpf_gotox *skel;
@@ -288,5 +490,11 @@ void test_bpf_gotox(void)
if (test__start_subtest("one-map-two-jumps"))
__subtest(skel, check_one_map_two_jumps);
+ if (test__start_subtest("check-ldimm64-off"))
+ __subtest(skel, check_ldimm64_off_load);
+
+ if (test__start_subtest("check-ldimm64-off-gotox"))
+ __subtest(skel, check_ldimm64_off_gotox);
+
bpf_gotox__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index dd6512fa652b..215878ea04de 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -19,6 +19,10 @@ struct {
{ "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" },
{ "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" },
{ "write_not_allowlisted_field", "no write support to nf_conn at off" },
+ { "lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted arg1" },
+ { "lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted arg3" },
+ { "xdp_lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted arg1" },
+ { "xdp_lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted arg3" },
};
enum {
@@ -111,7 +115,6 @@ static void test_bpf_nf_ct(int mode)
if (!ASSERT_OK(err, "bpf_prog_test_run"))
goto end;
- ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
ASSERT_EQ(skel->bss->test_einval_reserved_new, -EINVAL, "Test EINVAL for reserved in new struct not set to 0");
ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
diff --git a/tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c b/tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c
new file mode 100644
index 000000000000..e0eb869cb1b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <sys/stat.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <bpftool_helpers.h>
+#include <test_progs.h>
+#include <bpf/bpf.h>
+#include "security_bpf_map.skel.h"
+
+#define PROTECTED_MAP_NAME "prot_map"
+#define UNPROTECTED_MAP_NAME "not_prot_map"
+#define BPF_ITER_FILE "bpf_iter_map_elem.bpf.o"
+#define BPFFS_PIN_DIR "/sys/fs/bpf/test_bpftool_map"
+#define INNER_MAP_NAME "inner_map_tt"
+#define OUTER_MAP_NAME "outer_map_tt"
+
+#define MAP_NAME_MAX_LEN 64
+#define PATH_MAX_LEN 128
+
+enum map_protection {
+ PROTECTED,
+ UNPROTECTED
+};
+
+struct test_desc {
+ char *name;
+ enum map_protection protection;
+ struct bpf_map *map;
+ char *map_name;
+ bool pinned;
+ char pin_path[PATH_MAX_LEN];
+ bool write_must_fail;
+};
+
+static struct security_bpf_map *general_setup(void)
+{
+ struct security_bpf_map *skel;
+ uint32_t key, value;
+ int ret, i;
+
+ skel = security_bpf_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ goto end;
+
+ struct bpf_map *maps[] = {skel->maps.prot_map, skel->maps.not_prot_map};
+
+ ret = security_bpf_map__attach(skel);
+ if (!ASSERT_OK(ret, "attach maps security programs"))
+ goto end_destroy;
+
+ for (i = 0; i < sizeof(maps)/sizeof(struct bpf_map *); i++) {
+ for (key = 0; key < 2; key++) {
+ int ret = bpf_map__update_elem(maps[i], &key,
+ sizeof(key), &key, sizeof(key),
+ 0);
+ if (!ASSERT_OK(ret, "set initial map value"))
+ goto end_destroy;
+ }
+ }
+
+ key = 0;
+ value = 1;
+ ret = bpf_map__update_elem(skel->maps.prot_status_map, &key,
+ sizeof(key), &value, sizeof(value), 0);
+ if (!ASSERT_OK(ret, "configure map protection"))
+ goto end_destroy;
+
+ if (!ASSERT_OK(mkdir(BPFFS_PIN_DIR, S_IFDIR), "create bpffs pin dir"))
+ goto end_destroy;
+
+ return skel;
+end_destroy:
+ security_bpf_map__destroy(skel);
+end:
+ return NULL;
+}
+
+static void general_cleanup(struct security_bpf_map *skel)
+{
+ rmdir(BPFFS_PIN_DIR);
+ security_bpf_map__destroy(skel);
+}
+
+static void update_test_desc(struct security_bpf_map *skel,
+ struct test_desc *test)
+{
+ /* Now that the skeleton is loaded, update all missing fields to
+ * have the subtest properly configured
+ */
+ if (test->protection == PROTECTED) {
+ test->map = skel->maps.prot_map;
+ test->map_name = PROTECTED_MAP_NAME;
+ } else {
+ test->map = skel->maps.not_prot_map;
+ test->map_name = UNPROTECTED_MAP_NAME;
+ }
+}
+
+static int test_setup(struct security_bpf_map *skel, struct test_desc *desc)
+{
+ int ret;
+
+ update_test_desc(skel, desc);
+
+ if (desc->pinned) {
+ ret = snprintf(desc->pin_path, PATH_MAX_LEN, "%s/%s", BPFFS_PIN_DIR,
+ desc->name);
+ if (!ASSERT_GT(ret, 0, "format pin path"))
+ return 1;
+ ret = bpf_map__pin(desc->map, desc->pin_path);
+ if (!ASSERT_OK(ret, "pin map"))
+ return 1;
+ }
+
+ return 0;
+}
+
+static void test_cleanup(struct test_desc *desc)
+{
+ if (desc->pinned)
+ bpf_map__unpin(desc->map, NULL);
+}
+
+static int lookup_map_value(char *map_handle)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "map lookup %s key 0 0 0 0",
+ map_handle);
+ if (!ASSERT_GT(ret, 0, "format map lookup cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int read_map_btf_data(char *map_handle)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "btf dump map %s",
+ map_handle);
+ if (!ASSERT_GT(ret, 0, "format map btf dump cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int write_map_value(char *map_handle)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN,
+ "map update %s key 0 0 0 0 value 1 1 1 1", map_handle);
+ if (!ASSERT_GT(ret, 0, "format value write cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int delete_map_value(char *map_handle)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN,
+ "map delete %s key 0 0 0 0", map_handle);
+ if (!ASSERT_GT(ret, 0, "format value deletion cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int iterate_on_map_values(char *map_handle, char *iter_pin_path)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "iter pin %s %s map %s",
+ BPF_ITER_FILE, iter_pin_path, map_handle);
+ if (!ASSERT_GT(ret, 0, "format iterator creation cmd"))
+ return 1;
+ ret = run_bpftool_command(cmd);
+ if (ret)
+ return ret;
+ ret = snprintf(cmd, MAP_NAME_MAX_LEN, "cat %s", iter_pin_path);
+ if (ret < 0)
+ goto cleanup;
+ ret = system(cmd);
+
+cleanup:
+ unlink(iter_pin_path);
+ return ret;
+}
+
+static int create_inner_map(void)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(
+ cmd, MAX_BPFTOOL_CMD_LEN,
+ "map create %s/%s type array key 4 value 4 entries 4 name %s",
+ BPFFS_PIN_DIR, INNER_MAP_NAME, INNER_MAP_NAME);
+ if (!ASSERT_GT(ret, 0, "format inner map create cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int create_outer_map(void)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(
+ cmd, MAX_BPFTOOL_CMD_LEN,
+ "map create %s/%s type hash_of_maps key 4 value 4 entries 2 name %s inner_map name %s",
+ BPFFS_PIN_DIR, OUTER_MAP_NAME, OUTER_MAP_NAME, INNER_MAP_NAME);
+ if (!ASSERT_GT(ret, 0, "format outer map create cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static void delete_pinned_map(char *map_name)
+{
+ char pin_path[PATH_MAX_LEN];
+ int ret;
+
+ ret = snprintf(pin_path, PATH_MAX_LEN, "%s/%s", BPFFS_PIN_DIR,
+ map_name);
+ if (ret >= 0)
+ unlink(pin_path);
+}
+
+static int add_outer_map_entry(int key)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(
+ cmd, MAX_BPFTOOL_CMD_LEN,
+ "map update pinned %s/%s key %d 0 0 0 value name %s",
+ BPFFS_PIN_DIR, OUTER_MAP_NAME, key, INNER_MAP_NAME);
+ if (!ASSERT_GT(ret, 0, "format outer map value addition cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static void test_basic_access(struct test_desc *desc)
+{
+ char map_handle[MAP_NAME_MAX_LEN];
+ char iter_pin_path[PATH_MAX_LEN];
+ int ret;
+
+ if (desc->pinned)
+ ret = snprintf(map_handle, MAP_NAME_MAX_LEN, "pinned %s",
+ desc->pin_path);
+ else
+ ret = snprintf(map_handle, MAP_NAME_MAX_LEN, "name %s",
+ desc->map_name);
+ if (!ASSERT_GT(ret, 0, "format map handle"))
+ return;
+
+ ret = lookup_map_value(map_handle);
+ ASSERT_OK(ret, "read map value");
+
+ ret = read_map_btf_data(map_handle);
+ ASSERT_OK(ret, "read map btf data");
+
+ ret = write_map_value(map_handle);
+ ASSERT_OK(desc->write_must_fail ? !ret : ret, "write map value");
+
+ ret = delete_map_value(map_handle);
+ ASSERT_OK(desc->write_must_fail ? !ret : ret, "delete map value");
+ /* Restore deleted value */
+ if (!ret)
+ write_map_value(map_handle);
+
+ ret = snprintf(iter_pin_path, PATH_MAX_LEN, "%s/iter", BPFFS_PIN_DIR);
+ if (ASSERT_GT(ret, 0, "format iter pin path")) {
+ ret = iterate_on_map_values(map_handle, iter_pin_path);
+ ASSERT_OK(ret, "iterate on map values");
+ }
+}
+
+static void test_create_nested_maps(void)
+{
+ if (!ASSERT_OK(create_inner_map(), "create inner map"))
+ return;
+ if (!ASSERT_OK(create_outer_map(), "create outer map"))
+ goto end_cleanup_inner;
+ ASSERT_OK(add_outer_map_entry(0), "add a first entry in outer map");
+ ASSERT_OK(add_outer_map_entry(1), "add a second entry in outer map");
+ ASSERT_NEQ(add_outer_map_entry(2), 0, "add a third entry in outer map");
+
+ delete_pinned_map(OUTER_MAP_NAME);
+end_cleanup_inner:
+ delete_pinned_map(INNER_MAP_NAME);
+}
+
+static void test_btf_list(void)
+{
+ ASSERT_OK(run_bpftool_command("btf list"), "list btf data");
+}
+
+static struct test_desc tests[] = {
+ {
+ .name = "unprotected_unpinned",
+ .protection = UNPROTECTED,
+ .map_name = UNPROTECTED_MAP_NAME,
+ .pinned = false,
+ .write_must_fail = false,
+ },
+ {
+ .name = "unprotected_pinned",
+ .protection = UNPROTECTED,
+ .map_name = UNPROTECTED_MAP_NAME,
+ .pinned = true,
+ .write_must_fail = false,
+ },
+ {
+ .name = "protected_unpinned",
+ .protection = PROTECTED,
+ .map_name = UNPROTECTED_MAP_NAME,
+ .pinned = false,
+ .write_must_fail = true,
+ },
+ {
+ .name = "protected_pinned",
+ .protection = PROTECTED,
+ .map_name = UNPROTECTED_MAP_NAME,
+ .pinned = true,
+ .write_must_fail = true,
+ }
+};
+
+static const size_t tests_count = ARRAY_SIZE(tests);
+
+void test_bpftool_maps_access(void)
+{
+ struct security_bpf_map *skel;
+ struct test_desc *current;
+ int i;
+
+ skel = general_setup();
+ if (!ASSERT_OK_PTR(skel, "prepare programs"))
+ goto cleanup;
+
+ for (i = 0; i < tests_count; i++) {
+ current = &tests[i];
+ if (!test__start_subtest(current->name))
+ continue;
+ if (ASSERT_OK(test_setup(skel, current), "subtest setup")) {
+ test_basic_access(current);
+ test_cleanup(current);
+ }
+ }
+ if (test__start_subtest("nested_maps"))
+ test_create_nested_maps();
+ if (test__start_subtest("btf_list"))
+ test_btf_list();
+
+cleanup:
+ general_cleanup(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c b/tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c
new file mode 100644
index 000000000000..408ace90dc7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <bpftool_helpers.h>
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <stdbool.h>
+
+#define BPFFS_DIR "/sys/fs/bpf/test_metadata"
+#define BPFFS_USED BPFFS_DIR "/used"
+#define BPFFS_UNUSED BPFFS_DIR "/unused"
+
+#define BPF_FILE_USED "metadata_used.bpf.o"
+#define BPF_FILE_UNUSED "metadata_unused.bpf.o"
+#define METADATA_MAP_NAME "metadata.rodata"
+
+#define MAX_BPFTOOL_OUTPUT_LEN (64*1024)
+
+#define MAX_TOKENS_TO_CHECK 3
+static char output[MAX_BPFTOOL_OUTPUT_LEN];
+
+struct test_desc {
+ char *name;
+ char *bpf_prog;
+ char *bpffs_path;
+ char *expected_output[MAX_TOKENS_TO_CHECK];
+ char *expected_output_json[MAX_TOKENS_TO_CHECK];
+ char *metadata_map_name;
+};
+
+static int setup(struct test_desc *test)
+{
+ return mkdir(BPFFS_DIR, 0700);
+}
+
+static void cleanup(struct test_desc *test)
+{
+ unlink(test->bpffs_path);
+ rmdir(BPFFS_DIR);
+}
+
+static int check_metadata(char *buf, char * const *tokens, int count)
+{
+ int i;
+
+ for (i = 0; i < count && tokens[i]; i++)
+ if (!strstr(buf, tokens[i]))
+ return 1;
+
+ return 0;
+}
+
+static void run_test(struct test_desc *test)
+{
+ int ret;
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "prog load %s %s",
+ test->bpf_prog, test->bpffs_path);
+ if (!ASSERT_GT(ret, 0, "format prog insert command"))
+ return;
+ ret = run_bpftool_command(cmd);
+ if (!ASSERT_OK(ret, "load program"))
+ return;
+
+ /* Check output with default format */
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "prog show pinned %s",
+ test->bpffs_path);
+ if (!ASSERT_GT(ret, 0, "format pinned prog check command"))
+ return;
+ ret = get_bpftool_command_output(cmd, output,
+ MAX_BPFTOOL_OUTPUT_LEN);
+ if (ASSERT_OK(ret, "get program info")) {
+ ret = check_metadata(output, test->expected_output,
+ ARRAY_SIZE(test->expected_output));
+ ASSERT_OK(ret, "find metadata");
+ }
+
+ /* Check output with json format */
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "prog -j show pinned %s",
+ test->bpffs_path);
+ if (!ASSERT_GT(ret, 0, "format pinned prog check command in json"))
+ return;
+ ret = get_bpftool_command_output(cmd, output,
+ MAX_BPFTOOL_OUTPUT_LEN);
+ if (ASSERT_OK(ret, "get program info in json")) {
+ ret = check_metadata(output, test->expected_output_json,
+ ARRAY_SIZE(test->expected_output_json));
+ ASSERT_OK(ret, "find metadata in json");
+ }
+
+ /* Check that the corresponding map can be found and accessed */
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "map show name %s",
+ test->metadata_map_name);
+ if (!ASSERT_GT(ret, 0, "format map check command"))
+ return;
+ ASSERT_OK(run_bpftool_command(cmd), "access metadata map");
+}
+
+static struct test_desc tests[] = {
+ {
+ .name = "metadata_unused",
+ .bpf_prog = BPF_FILE_UNUSED,
+ .bpffs_path = BPFFS_UNUSED,
+ .expected_output = {
+ "a = \"foo\"",
+ "b = 1"
+ },
+ .expected_output_json = {
+ "\"metadata\":{\"a\":\"foo\",\"b\":1}"
+ },
+ .metadata_map_name = METADATA_MAP_NAME
+ },
+ {
+ .name = "metadata_used",
+ .bpf_prog = BPF_FILE_USED,
+ .bpffs_path = BPFFS_USED,
+ .expected_output = {
+ "a = \"bar\"",
+ "b = 2"
+ },
+ .expected_output_json = {
+ "\"metadata\":{\"a\":\"bar\",\"b\":2}"
+ },
+ .metadata_map_name = METADATA_MAP_NAME
+ }
+};
+static const int tests_count = ARRAY_SIZE(tests);
+
+void test_bpftool_metadata(void)
+{
+ int i;
+
+ for (i = 0; i < tests_count; i++) {
+ if (!test__start_subtest(tests[i].name))
+ continue;
+ if (ASSERT_OK(setup(&tests[i]), "setup bpffs pin dir")) {
+ run_test(&tests[i]);
+ cleanup(&tests[i]);
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 10cba526d3e6..f1642794f70e 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -875,8 +875,8 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
"int cpu_number = (int)100", 100);
#endif
- TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_cgrp_storage_busy", int, BTF_F_COMPACT,
- "static int bpf_cgrp_storage_busy = (int)2", 2);
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_bprintf_nest_level", int, BTF_F_COMPACT,
+ "static int bpf_bprintf_nest_level = (int)2", 2);
}
struct btf_dump_string_ctx {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_permute.c b/tools/testing/selftests/bpf/prog_tests/btf_permute.c
new file mode 100644
index 000000000000..04ade5ad77ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_permute.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Xiaomi */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+
+static void permute_base_check(struct btf *btf)
+{
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] STRUCT 's2' size=4 vlen=1\n"
+ "\t'm' type_id=4 bits_offset=0",
+ "[2] FUNC 'f' type_id=6 linkage=static",
+ "[3] PTR '(anon)' type_id=4",
+ "[4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[5] STRUCT 's1' size=4 vlen=1\n"
+ "\t'm' type_id=4 bits_offset=0",
+ "[6] FUNC_PROTO '(anon)' ret_type_id=4 vlen=1\n"
+ "\t'p' type_id=3");
+}
+
+/* Ensure btf__permute works as expected in the base-BTF scenario */
+static void test_permute_base(void)
+{
+ struct btf *btf;
+ __u32 permute_ids[7];
+ int err;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_main_btf"))
+ return;
+
+ btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(btf, 1); /* [2] ptr to int */
+ btf__add_struct(btf, "s1", 4); /* [3] struct s1 { */
+ btf__add_field(btf, "m", 1, 0, 0); /* int m; */
+ /* } */
+ btf__add_struct(btf, "s2", 4); /* [4] struct s2 { */
+ btf__add_field(btf, "m", 1, 0, 0); /* int m; */
+ /* } */
+ btf__add_func_proto(btf, 1); /* [5] int (*)(int *p); */
+ btf__add_func_param(btf, "p", 2);
+ btf__add_func(btf, "f", BTF_FUNC_STATIC, 5); /* [6] int f(int *p); */
+
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[4] STRUCT 's2' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[5] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p' type_id=2",
+ "[6] FUNC 'f' type_id=5 linkage=static");
+
+ permute_ids[0] = 0; /* [0] -> [0] */
+ permute_ids[1] = 4; /* [1] -> [4] */
+ permute_ids[2] = 3; /* [2] -> [3] */
+ permute_ids[3] = 5; /* [3] -> [5] */
+ permute_ids[4] = 1; /* [4] -> [1] */
+ permute_ids[5] = 6; /* [5] -> [6] */
+ permute_ids[6] = 2; /* [6] -> [2] */
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_OK(err, "btf__permute_base"))
+ goto done;
+ permute_base_check(btf);
+
+ /* ids[0] must be 0 for base BTF */
+ permute_ids[0] = 4; /* [0] -> [0] */
+ permute_ids[1] = 0; /* [1] -> [4] */
+ permute_ids[2] = 3; /* [2] -> [3] */
+ permute_ids[3] = 5; /* [3] -> [5] */
+ permute_ids[4] = 1; /* [4] -> [1] */
+ permute_ids[5] = 6; /* [5] -> [6] */
+ permute_ids[6] = 2; /* [6] -> [2] */
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_base"))
+ goto done;
+ /* BTF is not modified */
+ permute_base_check(btf);
+
+ /* id_map_cnt is invalid */
+ permute_ids[0] = 0; /* [0] -> [0] */
+ permute_ids[1] = 4; /* [1] -> [4] */
+ permute_ids[2] = 3; /* [2] -> [3] */
+ permute_ids[3] = 5; /* [3] -> [5] */
+ permute_ids[4] = 1; /* [4] -> [1] */
+ permute_ids[5] = 6; /* [5] -> [6] */
+ permute_ids[6] = 2; /* [6] -> [2] */
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids) - 1, NULL);
+ if (!ASSERT_ERR(err, "btf__permute_base"))
+ goto done;
+ /* BTF is not modified */
+ permute_base_check(btf);
+
+ /* Multiple types can not be mapped to the same ID */
+ permute_ids[0] = 0;
+ permute_ids[1] = 4;
+ permute_ids[2] = 4;
+ permute_ids[3] = 5;
+ permute_ids[4] = 1;
+ permute_ids[5] = 6;
+ permute_ids[6] = 2;
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_base"))
+ goto done;
+ /* BTF is not modified */
+ permute_base_check(btf);
+
+ /* Type ID must be valid */
+ permute_ids[0] = 0;
+ permute_ids[1] = 4;
+ permute_ids[2] = 3;
+ permute_ids[3] = 5;
+ permute_ids[4] = 1;
+ permute_ids[5] = 7;
+ permute_ids[6] = 2;
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_base"))
+ goto done;
+ /* BTF is not modified */
+ permute_base_check(btf);
+
+done:
+ btf__free(btf);
+}
+
+static void permute_split_check(struct btf *btf)
+{
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's2' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[4] FUNC 'f' type_id=5 linkage=static",
+ "[5] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p' type_id=2",
+ "[6] STRUCT 's1' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0");
+}
+
+/* Ensure btf__permute works as expected in the split-BTF scenario */
+static void test_permute_split(void)
+{
+ struct btf *split_btf = NULL, *base_btf = NULL;
+ __u32 permute_ids[4];
+ int err, start_id;
+
+ base_btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(base_btf, "empty_main_btf"))
+ return;
+
+ btf__add_int(base_btf, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(base_btf, 1); /* [2] ptr to int */
+ VALIDATE_RAW_BTF(
+ base_btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1");
+ split_btf = btf__new_empty_split(base_btf);
+ if (!ASSERT_OK_PTR(split_btf, "empty_split_btf"))
+ goto cleanup;
+ btf__add_struct(split_btf, "s1", 4); /* [3] struct s1 { */
+ btf__add_field(split_btf, "m", 1, 0, 0); /* int m; */
+ /* } */
+ btf__add_struct(split_btf, "s2", 4); /* [4] struct s2 { */
+ btf__add_field(split_btf, "m", 1, 0, 0); /* int m; */
+ /* } */
+ btf__add_func_proto(split_btf, 1); /* [5] int (*)(int p); */
+ btf__add_func_param(split_btf, "p", 2);
+ btf__add_func(split_btf, "f", BTF_FUNC_STATIC, 5); /* [6] int f(int *p); */
+
+ VALIDATE_RAW_BTF(
+ split_btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[4] STRUCT 's2' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[5] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p' type_id=2",
+ "[6] FUNC 'f' type_id=5 linkage=static");
+
+ start_id = btf__type_cnt(base_btf);
+ permute_ids[3 - start_id] = 6; /* [3] -> [6] */
+ permute_ids[4 - start_id] = 3; /* [4] -> [3] */
+ permute_ids[5 - start_id] = 5; /* [5] -> [5] */
+ permute_ids[6 - start_id] = 4; /* [6] -> [4] */
+ err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_OK(err, "btf__permute_split"))
+ goto cleanup;
+ permute_split_check(split_btf);
+
+ /*
+ * For split BTF, id_map_cnt must equal to the number of types
+ * added on top of base BTF
+ */
+ permute_ids[3 - start_id] = 4;
+ permute_ids[4 - start_id] = 3;
+ permute_ids[5 - start_id] = 5;
+ permute_ids[6 - start_id] = 6;
+ err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids) - 1, NULL);
+ if (!ASSERT_ERR(err, "btf__permute_split"))
+ goto cleanup;
+ /* BTF is not modified */
+ permute_split_check(split_btf);
+
+ /* Multiple types can not be mapped to the same ID */
+ permute_ids[3 - start_id] = 4;
+ permute_ids[4 - start_id] = 3;
+ permute_ids[5 - start_id] = 3;
+ permute_ids[6 - start_id] = 6;
+ err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_split"))
+ goto cleanup;
+ /* BTF is not modified */
+ permute_split_check(split_btf);
+
+ /* Can not map to base ID */
+ permute_ids[3 - start_id] = 4;
+ permute_ids[4 - start_id] = 2;
+ permute_ids[5 - start_id] = 5;
+ permute_ids[6 - start_id] = 6;
+ err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_split"))
+ goto cleanup;
+ /* BTF is not modified */
+ permute_split_check(split_btf);
+
+cleanup:
+ btf__free(split_btf);
+ btf__free(base_btf);
+}
+
+void test_btf_permute(void)
+{
+ if (test__start_subtest("permute_base"))
+ test_permute_base();
+ if (test__start_subtest("permute_split"))
+ test_permute_split();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
index 574d9a0cdc8e..0f88a9d00a22 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -190,6 +190,16 @@ static void test_walk_self_only(struct cgroup_iter *skel)
BPF_CGROUP_ITER_SELF_ONLY, "self_only");
}
+static void test_walk_children(struct cgroup_iter *skel)
+{
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n%8llu\n" EPILOGUE, cg_id[CHILD1],
+ cg_id[CHILD2]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_CHILDREN, "children");
+}
+
static void test_walk_dead_self_only(struct cgroup_iter *skel)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -325,6 +335,8 @@ void test_cgroup_iter(void)
test_walk_dead_self_only(skel);
if (test__start_subtest("cgroup_iter__self_only_css_task"))
test_walk_self_only_css_task();
+ if (test__start_subtest("cgroup_iter__children"))
+ test_walk_children(skel);
out:
cgroup_iter__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
new file mode 100644
index 000000000000..a5afd16705f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "cgroup_helpers.h"
+#include "cgroup_iter_memcg.h"
+#include "cgroup_iter_memcg.skel.h"
+
+static int read_stats(struct bpf_link *link)
+{
+ int fd, ret = 0;
+ ssize_t bytes;
+
+ fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(fd, "bpf_iter_create"))
+ return 1;
+
+ /*
+ * Invoke iter program by reading from its fd. We're not expecting any
+ * data to be written by the bpf program so the result should be zero.
+ * Results will be read directly through the custom data section
+ * accessible through skel->data_query.memcg_query.
+ */
+ bytes = read(fd, NULL, 0);
+ if (!ASSERT_EQ(bytes, 0, "read fd"))
+ ret = 1;
+
+ close(fd);
+ return ret;
+}
+
+static void test_anon(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ void *map;
+ size_t len;
+
+ len = sysconf(_SC_PAGESIZE) * 1024;
+
+ /*
+ * Increase memcg anon usage by mapping and writing
+ * to a new anon region.
+ */
+ map = mmap(NULL, len, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon"))
+ return;
+
+ memset(map, 1, len);
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup;
+
+ ASSERT_GT(memcg_query->nr_anon_mapped, 0, "final anon mapped val");
+
+cleanup:
+ munmap(map, len);
+}
+
+static void test_file(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ void *map;
+ size_t len;
+ char *path;
+ int fd;
+
+ len = sysconf(_SC_PAGESIZE) * 1024;
+ path = "/tmp/test_cgroup_iter_memcg";
+
+ /*
+ * Increase memcg file usage by creating and writing
+ * to a mapped file.
+ */
+ fd = open(path, O_CREAT | O_RDWR, 0644);
+ if (!ASSERT_OK_FD(fd, "open fd"))
+ return;
+ if (!ASSERT_OK(ftruncate(fd, len), "ftruncate"))
+ goto cleanup_fd;
+
+ map = mmap(NULL, len, PROT_WRITE, MAP_SHARED, fd, 0);
+ if (!ASSERT_NEQ(map, MAP_FAILED, "mmap file"))
+ goto cleanup_fd;
+
+ memset(map, 1, len);
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup_map;
+
+ ASSERT_GT(memcg_query->nr_file_pages, 0, "final file value");
+ ASSERT_GT(memcg_query->nr_file_mapped, 0, "final file mapped value");
+
+cleanup_map:
+ munmap(map, len);
+cleanup_fd:
+ close(fd);
+ unlink(path);
+}
+
+static void test_shmem(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ size_t len;
+ int fd;
+
+ len = sysconf(_SC_PAGESIZE) * 1024;
+
+ /*
+ * Increase memcg shmem usage by creating and writing
+ * to a shmem object.
+ */
+ fd = shm_open("/tmp_shmem", O_CREAT | O_RDWR, 0644);
+ if (!ASSERT_OK_FD(fd, "shm_open"))
+ return;
+
+ if (!ASSERT_OK(fallocate(fd, 0, 0, len), "fallocate"))
+ goto cleanup;
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup;
+
+ ASSERT_GT(memcg_query->nr_shmem, 0, "final shmem value");
+
+cleanup:
+ close(fd);
+ shm_unlink("/tmp_shmem");
+}
+
+#define NR_PIPES 64
+static void test_kmem(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ int fds[NR_PIPES][2], i;
+
+ /*
+ * Increase kmem value by creating pipes which will allocate some
+ * kernel buffers.
+ */
+ for (i = 0; i < NR_PIPES; i++) {
+ if (!ASSERT_OK(pipe(fds[i]), "pipe"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup;
+
+ ASSERT_GT(memcg_query->memcg_kmem, 0, "kmem value");
+
+cleanup:
+ for (i = i - 1; i >= 0; i--) {
+ close(fds[i][0]);
+ close(fds[i][1]);
+ }
+}
+
+static void test_pgfault(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ void *map;
+ size_t len;
+
+ len = sysconf(_SC_PAGESIZE) * 1024;
+
+ /* Create region to use for triggering a page fault. */
+ map = mmap(NULL, len, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon"))
+ return;
+
+ /* Trigger page fault. */
+ memset(map, 1, len);
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup;
+
+ ASSERT_GT(memcg_query->pgfault, 0, "final pgfault val");
+
+cleanup:
+ munmap(map, len);
+}
+
+void test_cgroup_iter_memcg(void)
+{
+ char *cgroup_rel_path = "/cgroup_iter_memcg_test";
+ struct cgroup_iter_memcg *skel;
+ struct bpf_link *link;
+ int cgroup_fd;
+
+ cgroup_fd = cgroup_setup_and_join(cgroup_rel_path);
+ if (!ASSERT_OK_FD(cgroup_fd, "cgroup_setup_and_join"))
+ return;
+
+ skel = cgroup_iter_memcg__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_iter_memcg__open_and_load"))
+ goto cleanup_cgroup_fd;
+
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo = {
+ .cgroup.cgroup_fd = cgroup_fd,
+ .cgroup.order = BPF_CGROUP_ITER_SELF_ONLY,
+ };
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(skel->progs.cgroup_memcg_query, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto cleanup_skel;
+
+ if (test__start_subtest("cgroup_iter_memcg__anon"))
+ test_anon(link, &skel->data_query->memcg_query);
+ if (test__start_subtest("cgroup_iter_memcg__shmem"))
+ test_shmem(link, &skel->data_query->memcg_query);
+ if (test__start_subtest("cgroup_iter_memcg__file"))
+ test_file(link, &skel->data_query->memcg_query);
+ if (test__start_subtest("cgroup_iter_memcg__kmem"))
+ test_kmem(link, &skel->data_query->memcg_query);
+ if (test__start_subtest("cgroup_iter_memcg__pgfault"))
+ test_pgfault(link, &skel->data_query->memcg_query);
+
+ bpf_link__destroy(link);
+cleanup_skel:
+ cgroup_iter_memcg__destroy(skel);
+cleanup_cgroup_fd:
+ close(cgroup_fd);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
index e442be9dde7e..fb2cea710db3 100644
--- a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -233,7 +233,7 @@ static void subtest_dmabuf_iter_check_lots_of_buffers(struct dmabuf_iter *skel)
while ((bytes_read = read(iter_fd, buf, sizeof(buf))) > 0)
total_bytes_read += bytes_read;
- ASSERT_GT(total_bytes_read, getpagesize(), "total_bytes_read");
+ ASSERT_GT(total_bytes_read, 4096, "total_bytes_read");
close(iter_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/exe_ctx.c b/tools/testing/selftests/bpf/prog_tests/exe_ctx.c
new file mode 100644
index 000000000000..aed6a6ef0876
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/exe_ctx.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Valve Corporation.
+ * Author: Changwoo Min <changwoo@igalia.com>
+ */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "test_ctx.skel.h"
+
+void test_exe_ctx(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ cpu_set_t old_cpuset, target_cpuset;
+ struct test_ctx *skel;
+ int err, prog_fd;
+
+ /* 1. Pin the current process to CPU 0. */
+ if (sched_getaffinity(0, sizeof(old_cpuset), &old_cpuset) == 0) {
+ CPU_ZERO(&target_cpuset);
+ CPU_SET(0, &target_cpuset);
+ ASSERT_OK(sched_setaffinity(0, sizeof(target_cpuset),
+ &target_cpuset), "setaffinity");
+ }
+
+ skel = test_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto restore_affinity;
+
+ err = test_ctx__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* 2. When we run this, the kernel will execute the BPF prog on CPU 0. */
+ prog_fd = bpf_program__fd(skel->progs.trigger_all_contexts);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "test_run_trigger");
+
+ /* 3. Wait for the local CPU's softirq/tasklet to finish. */
+ for (int i = 0; i < 1000; i++) {
+ if (skel->bss->count_task > 0 &&
+ skel->bss->count_hardirq > 0 &&
+ skel->bss->count_softirq > 0)
+ break;
+ usleep(1000); /* Wait 1ms per iteration, up to 1 sec total */
+ }
+
+ /* On CPU 0, these should now all be non-zero. */
+ ASSERT_GT(skel->bss->count_task, 0, "task_ok");
+ ASSERT_GT(skel->bss->count_hardirq, 0, "hardirq_ok");
+ ASSERT_GT(skel->bss->count_softirq, 0, "softirq_ok");
+
+cleanup:
+ test_ctx__destroy(skel);
+
+restore_affinity:
+ ASSERT_OK(sched_setaffinity(0, sizeof(old_cpuset), &old_cpuset),
+ "restore_affinity");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fsession_test.c b/tools/testing/selftests/bpf/prog_tests/fsession_test.c
new file mode 100644
index 000000000000..a299aeb8cc2e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fsession_test.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 ChinaTelecom */
+#include <test_progs.h>
+#include "fsession_test.skel.h"
+
+static int check_result(struct fsession_test *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* Trigger test function calls */
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return err;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return topts.retval;
+
+ for (int i = 0; i < sizeof(*skel->bss) / sizeof(__u64); i++) {
+ if (!ASSERT_EQ(((__u64 *)skel->bss)[i], 1, "test_result"))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void test_fsession_basic(void)
+{
+ struct fsession_test *skel = NULL;
+ int err;
+
+ skel = fsession_test__open();
+ if (!ASSERT_OK_PTR(skel, "fsession_test__open"))
+ return;
+
+ err = fsession_test__load(skel);
+ if (err == -EOPNOTSUPP) {
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_OK(err, "fsession_test__load"))
+ goto cleanup;
+
+ err = fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "fsession_attach"))
+ goto cleanup;
+
+ check_result(skel);
+cleanup:
+ fsession_test__destroy(skel);
+}
+
+static void test_fsession_reattach(void)
+{
+ struct fsession_test *skel = NULL;
+ int err;
+
+ skel = fsession_test__open();
+ if (!ASSERT_OK_PTR(skel, "fsession_test__open"))
+ return;
+
+ err = fsession_test__load(skel);
+ if (err == -EOPNOTSUPP) {
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_OK(err, "fsession_test__load"))
+ goto cleanup;
+
+ /* first attach */
+ err = fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "fsession_first_attach"))
+ goto cleanup;
+
+ if (check_result(skel))
+ goto cleanup;
+
+ /* detach */
+ fsession_test__detach(skel);
+
+ /* reset counters */
+ memset(skel->bss, 0, sizeof(*skel->bss));
+
+ /* second attach */
+ err = fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "fsession_second_attach"))
+ goto cleanup;
+
+ if (check_result(skel))
+ goto cleanup;
+
+cleanup:
+ fsession_test__destroy(skel);
+}
+
+static void test_fsession_cookie(void)
+{
+ struct fsession_test *skel = NULL;
+ int err;
+
+ skel = fsession_test__open();
+ if (!ASSERT_OK_PTR(skel, "fsession_test__open"))
+ goto cleanup;
+
+ /*
+ * The test_fsession_basic() will test the session cookie with
+ * bpf_get_func_ip() case, so we need only check
+ * the cookie without bpf_get_func_ip() case here
+ */
+ bpf_program__set_autoload(skel->progs.test6, false);
+
+ err = fsession_test__load(skel);
+ if (err == -EOPNOTSUPP) {
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_OK(err, "fsession_test__load"))
+ goto cleanup;
+
+ err = fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "fsession_attach"))
+ goto cleanup;
+
+ skel->bss->test6_entry_result = 1;
+ skel->bss->test6_exit_result = 1;
+
+ check_result(skel);
+cleanup:
+ fsession_test__destroy(skel);
+}
+
+void test_fsession_test(void)
+{
+ if (test__start_subtest("fsession_test"))
+ test_fsession_basic();
+ if (test__start_subtest("fsession_reattach"))
+ test_fsession_reattach();
+ if (test__start_subtest("fsession_cookie"))
+ test_fsession_cookie();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
index 64a9c95d4acf..96b27de05524 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -33,11 +33,15 @@ void test_get_func_args_test(void)
ASSERT_EQ(topts.retval >> 16, 1, "test_run");
ASSERT_EQ(topts.retval & 0xffff, 1234 + 29, "test_run");
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+ ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
+ ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+ ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
cleanup:
get_func_args_test__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
index c40242dfa8fb..7772a0f288d3 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -46,6 +46,8 @@ static void test_function_entry(void)
ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
ASSERT_EQ(skel->bss->test8_result, 1, "test8_result");
+ ASSERT_EQ(skel->bss->test9_entry_result, 1, "test9_entry_result");
+ ASSERT_EQ(skel->bss->test9_exit_result, 1, "test9_exit_result");
cleanup:
get_func_ip_test__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
index 3cea71f9c500..a539980a2fbe 100644
--- a/tools/testing/selftests/bpf/prog_tests/iters.c
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -253,6 +253,11 @@ static void subtest_css_iters(void)
{ "/cg1/cg2" },
{ "/cg1/cg2/cg3" },
{ "/cg1/cg2/cg3/cg4" },
+ { "/cg1/cg5" },
+ { "/cg1/cg5/cg6" },
+ { "/cg1/cg7" },
+ { "/cg1/cg7/cg8" },
+ { "/cg1/cg7/cg8/cg9" },
};
int err, cg_nr = ARRAY_SIZE(cgs);
int i;
@@ -284,7 +289,8 @@ static void subtest_css_iters(void)
ASSERT_EQ(skel->bss->post_order_cnt, cg_nr, "post_order_cnt");
ASSERT_EQ(skel->bss->last_cg_id, get_cgroup_id(cgs[0].path), "last_cg_id");
- ASSERT_EQ(skel->bss->tree_high, cg_nr - 1, "tree_high");
+ ASSERT_EQ(skel->bss->children_cnt, 3, "children_cnt");
+ ASSERT_EQ(skel->bss->tree_high, 3, "tree_high");
iters_css__detach(skel);
cleanup:
cleanup_cgroup_environment();
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c b/tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c
new file mode 100644
index 000000000000..5e4793c9c29a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "kfunc_implicit_args.skel.h"
+
+void test_kfunc_implicit_args(void)
+{
+ RUN_TESTS(kfunc_implicit_args);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 6cfaa978bc9a..9caef222e528 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <sys/prctl.h>
#include <test_progs.h>
#include "kprobe_multi.skel.h"
#include "trace_helpers.h"
@@ -540,6 +542,46 @@ cleanup:
kprobe_multi_override__destroy(skel);
}
+static void test_override(void)
+{
+ struct kprobe_multi_override *skel = NULL;
+ int err;
+
+ skel = kprobe_multi_override__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+
+ /* no override */
+ err = prctl(0xffff, 0);
+ ASSERT_EQ(err, -1, "err");
+
+ /* kprobe.multi override */
+ skel->links.test_override = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override,
+ SYS_PREFIX "sys_prctl", NULL);
+ if (!ASSERT_OK_PTR(skel->links.test_override, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ err = prctl(0xffff, 0);
+ ASSERT_EQ(err, 123, "err");
+
+ bpf_link__destroy(skel->links.test_override);
+ skel->links.test_override = NULL;
+
+ /* kprobe override */
+ skel->links.test_kprobe_override = bpf_program__attach_kprobe(skel->progs.test_kprobe_override,
+ false, SYS_PREFIX "sys_prctl");
+ if (!ASSERT_OK_PTR(skel->links.test_kprobe_override, "bpf_program__attach_kprobe"))
+ goto cleanup;
+
+ err = prctl(0xffff, 0);
+ ASSERT_EQ(err, 123, "err");
+
+cleanup:
+ kprobe_multi_override__destroy(skel);
+}
+
#ifdef __x86_64__
static void test_attach_write_ctx(void)
{
@@ -597,6 +639,8 @@ void test_kprobe_multi_test(void)
test_attach_api_fails();
if (test__start_subtest("attach_override"))
test_attach_override();
+ if (test__start_subtest("override"))
+ test_override();
if (test__start_subtest("session"))
test_session_skel_api();
if (test__start_subtest("session_cookie"))
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 8743df599567..f372162c0280 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -131,6 +131,25 @@ static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu)
return 0;
}
+static void wait_for_map_release(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, lopts);
+ struct map_kptr *skel;
+ int ret;
+
+ skel = map_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
+ return;
+
+ do {
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.count_ref), &lopts);
+ ASSERT_OK(ret, "count_ref ret");
+ ASSERT_OK(lopts.retval, "count_ref retval");
+ } while (skel->bss->num_of_refs != 2);
+
+ map_kptr__destroy(skel);
+}
+
void serial_test_map_kptr(void)
{
struct rcu_tasks_trace_gp *skel;
@@ -148,11 +167,15 @@ void serial_test_map_kptr(void)
ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ wait_for_map_release();
+
/* Observe refcount dropping to 1 on bpf_map_free_deferred */
test_map_kptr_success(false);
ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ wait_for_map_release();
+
/* Observe refcount dropping to 1 on synchronous delete elem */
test_map_kptr_success(true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
index 343da65864d6..a72ae0b29f6e 100644
--- a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "cgroup_helpers.h"
#include "percpu_alloc_array.skel.h"
#include "percpu_alloc_cgrp_local_storage.skel.h"
#include "percpu_alloc_fail.skel.h"
@@ -115,6 +116,328 @@ static void test_failure(void) {
RUN_TESTS(percpu_alloc_fail);
}
+static void test_percpu_map_op_cpu_flag(struct bpf_map *map, void *keys, size_t key_sz, u32 entries,
+ int nr_cpus, bool test_batch)
+{
+ size_t value_sz = sizeof(u32), value_sz_cpus, value_sz_total;
+ u32 *values = NULL, *values_percpu = NULL;
+ const u32 value = 0xDEADC0DE;
+ int i, j, cpu, map_fd, err;
+ u64 batch = 0, flags;
+ void *values_row;
+ u32 count, v;
+ LIBBPF_OPTS(bpf_map_batch_opts, batch_opts);
+
+ value_sz_cpus = value_sz * nr_cpus;
+ values = calloc(entries, value_sz_cpus);
+ if (!ASSERT_OK_PTR(values, "calloc values"))
+ return;
+
+ values_percpu = calloc(entries, roundup(value_sz, 8) * nr_cpus);
+ if (!ASSERT_OK_PTR(values_percpu, "calloc values_percpu")) {
+ free(values);
+ return;
+ }
+
+ value_sz_total = value_sz_cpus * entries;
+ memset(values, 0, value_sz_total);
+
+ map_fd = bpf_map__fd(map);
+ flags = BPF_F_CPU | BPF_F_ALL_CPUS;
+ err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem_flags cpu|all_cpus"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_update_elem cpu|all_cpus"))
+ goto out;
+
+ flags = BPF_F_ALL_CPUS;
+ err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem_flags all_cpus"))
+ goto out;
+
+ flags = BPF_F_LOCK | BPF_F_CPU;
+ err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem_flags BPF_F_LOCK"))
+ goto out;
+
+ flags = BPF_F_LOCK | BPF_F_ALL_CPUS;
+ err = bpf_map_update_elem(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_update_elem BPF_F_LOCK"))
+ goto out;
+
+ flags = (u64)nr_cpus << 32 | BPF_F_CPU;
+ err = bpf_map_update_elem(map_fd, keys, values, flags);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_elem -ERANGE"))
+ goto out;
+
+ err = bpf_map__update_elem(map, keys, key_sz, values, value_sz, flags);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map__update_elem -ERANGE"))
+ goto out;
+
+ err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map_lookup_elem_flags -ERANGE"))
+ goto out;
+
+ err = bpf_map__lookup_elem(map, keys, key_sz, values, value_sz, flags);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map__lookup_elem -ERANGE"))
+ goto out;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ /* clear value on all cpus */
+ values[0] = 0;
+ flags = BPF_F_ALL_CPUS;
+ for (i = 0; i < entries; i++) {
+ err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values,
+ value_sz, flags);
+ if (!ASSERT_OK(err, "bpf_map__update_elem all_cpus"))
+ goto out;
+ }
+
+ /* update value on specified cpu */
+ for (i = 0; i < entries; i++) {
+ values[0] = value;
+ flags = (u64)cpu << 32 | BPF_F_CPU;
+ err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values,
+ value_sz, flags);
+ if (!ASSERT_OK(err, "bpf_map__update_elem specified cpu"))
+ goto out;
+
+ /* lookup then check value on CPUs */
+ for (j = 0; j < nr_cpus; j++) {
+ flags = (u64)j << 32 | BPF_F_CPU;
+ err = bpf_map__lookup_elem(map, keys + i * key_sz, key_sz, values,
+ value_sz, flags);
+ if (!ASSERT_OK(err, "bpf_map__lookup_elem specified cpu"))
+ goto out;
+ if (!ASSERT_EQ(values[0], j != cpu ? 0 : value,
+ "bpf_map__lookup_elem value on specified cpu"))
+ goto out;
+ }
+ }
+ }
+
+ if (!test_batch)
+ goto out;
+
+ count = entries;
+ batch_opts.elem_flags = (u64)nr_cpus << 32 | BPF_F_CPU;
+ err = bpf_map_update_batch(map_fd, keys, values, &count, &batch_opts);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_batch -ERANGE"))
+ goto out;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ memset(values, 0, value_sz_total);
+
+ /* clear values across all CPUs */
+ count = entries;
+ batch_opts.elem_flags = BPF_F_ALL_CPUS;
+ err = bpf_map_update_batch(map_fd, keys, values, &count, &batch_opts);
+ if (!ASSERT_OK(err, "bpf_map_update_batch all_cpus"))
+ goto out;
+ if (!ASSERT_EQ(count, entries, "bpf_map_update_batch count"))
+ goto out;
+
+ /* update values on specified CPU */
+ for (i = 0; i < entries; i++)
+ values[i] = value;
+
+ count = entries;
+ batch_opts.elem_flags = (u64)cpu << 32 | BPF_F_CPU;
+ err = bpf_map_update_batch(map_fd, keys, values, &count, &batch_opts);
+ if (!ASSERT_OK(err, "bpf_map_update_batch specified cpu"))
+ goto out;
+ if (!ASSERT_EQ(count, entries, "bpf_map_update_batch count"))
+ goto out;
+
+ /* lookup values on specified CPU */
+ batch = 0;
+ count = entries;
+ memset(values, 0, entries * value_sz);
+ err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values, &count, &batch_opts);
+ if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch specified cpu"))
+ goto out;
+ if (!ASSERT_EQ(count, entries, "bpf_map_lookup_batch count"))
+ goto out;
+
+ for (i = 0; i < entries; i++)
+ if (!ASSERT_EQ(values[i], value,
+ "bpf_map_lookup_batch value on specified cpu"))
+ goto out;
+
+ /* lookup values from all CPUs */
+ batch = 0;
+ count = entries;
+ batch_opts.elem_flags = 0;
+ memset(values_percpu, 0, roundup(value_sz, 8) * nr_cpus * entries);
+ err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values_percpu, &count,
+ &batch_opts);
+ if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch all_cpus"))
+ goto out;
+ if (!ASSERT_EQ(count, entries, "bpf_map_lookup_batch count"))
+ goto out;
+
+ for (i = 0; i < entries; i++) {
+ values_row = (void *) values_percpu +
+ roundup(value_sz, 8) * i * nr_cpus;
+ for (j = 0; j < nr_cpus; j++) {
+ v = *(u32 *) (values_row + roundup(value_sz, 8) * j);
+ if (!ASSERT_EQ(v, j != cpu ? 0 : value,
+ "bpf_map_lookup_batch value all_cpus"))
+ goto out;
+ }
+ }
+ }
+
+out:
+ free(values_percpu);
+ free(values);
+}
+
+static void test_percpu_map_cpu_flag(enum bpf_map_type map_type)
+{
+ struct percpu_alloc_array *skel;
+ size_t key_sz = sizeof(int);
+ int *keys, nr_cpus, i, err;
+ struct bpf_map *map;
+ u32 max_entries;
+
+ nr_cpus = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(nr_cpus, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ max_entries = nr_cpus * 2;
+ keys = calloc(max_entries, key_sz);
+ if (!ASSERT_OK_PTR(keys, "calloc keys"))
+ return;
+
+ for (i = 0; i < max_entries; i++)
+ keys[i] = i;
+
+ skel = percpu_alloc_array__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open")) {
+ free(keys);
+ return;
+ }
+
+ map = skel->maps.percpu;
+ bpf_map__set_type(map, map_type);
+ bpf_map__set_max_entries(map, max_entries);
+
+ err = percpu_alloc_array__load(skel);
+ if (!ASSERT_OK(err, "test_percpu_alloc__load"))
+ goto out;
+
+ test_percpu_map_op_cpu_flag(map, keys, key_sz, nr_cpus, nr_cpus, true);
+out:
+ percpu_alloc_array__destroy(skel);
+ free(keys);
+}
+
+static void test_percpu_array_cpu_flag(void)
+{
+ test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_ARRAY);
+}
+
+static void test_percpu_hash_cpu_flag(void)
+{
+ test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_HASH);
+}
+
+static void test_lru_percpu_hash_cpu_flag(void)
+{
+ test_percpu_map_cpu_flag(BPF_MAP_TYPE_LRU_PERCPU_HASH);
+}
+
+static void test_percpu_cgroup_storage_cpu_flag(void)
+{
+ struct percpu_alloc_array *skel = NULL;
+ struct bpf_cgroup_storage_key key;
+ int cgroup, prog_fd, nr_cpus, err;
+ struct bpf_map *map;
+
+ nr_cpus = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(nr_cpus, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ return;
+
+ cgroup = create_and_get_cgroup("/cg_percpu");
+ if (!ASSERT_GE(cgroup, 0, "create_and_get_cgroup")) {
+ cleanup_cgroup_environment();
+ return;
+ }
+
+ err = join_cgroup("/cg_percpu");
+ if (!ASSERT_OK(err, "join_cgroup"))
+ goto out;
+
+ skel = percpu_alloc_array__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open_and_load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.cgroup_egress);
+ err = bpf_prog_attach(prog_fd, cgroup, BPF_CGROUP_INET_EGRESS, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ map = skel->maps.percpu_cgroup_storage;
+ err = bpf_map_get_next_key(bpf_map__fd(map), NULL, &key);
+ if (!ASSERT_OK(err, "bpf_map_get_next_key"))
+ goto out;
+
+ test_percpu_map_op_cpu_flag(map, &key, sizeof(key), 1, nr_cpus, false);
+out:
+ bpf_prog_detach2(-1, cgroup, BPF_CGROUP_INET_EGRESS);
+ close(cgroup);
+ cleanup_cgroup_environment();
+ percpu_alloc_array__destroy(skel);
+}
+
+static void test_map_op_cpu_flag(enum bpf_map_type map_type)
+{
+ u32 max_entries = 1, count = max_entries;
+ u64 flags, batch = 0, val = 0;
+ int err, map_fd, key = 0;
+ LIBBPF_OPTS(bpf_map_batch_opts, batch_opts);
+
+ map_fd = bpf_map_create(map_type, "test_cpu_flag", sizeof(int), sizeof(u64), max_entries,
+ NULL);
+ if (!ASSERT_GE(map_fd, 0, "bpf_map_create"))
+ return;
+
+ flags = BPF_F_ALL_CPUS;
+ err = bpf_map_update_elem(map_fd, &key, &val, flags);
+ ASSERT_ERR(err, "bpf_map_update_elem all_cpus");
+
+ batch_opts.elem_flags = BPF_F_ALL_CPUS;
+ err = bpf_map_update_batch(map_fd, &key, &val, &count, &batch_opts);
+ ASSERT_ERR(err, "bpf_map_update_batch all_cpus");
+
+ flags = BPF_F_CPU;
+ err = bpf_map_lookup_elem_flags(map_fd, &key, &val, flags);
+ ASSERT_ERR(err, "bpf_map_lookup_elem_flags cpu");
+
+ batch_opts.elem_flags = BPF_F_CPU;
+ err = bpf_map_lookup_batch(map_fd, NULL, &batch, &key, &val, &count, &batch_opts);
+ ASSERT_ERR(err, "bpf_map_lookup_batch cpu");
+
+ close(map_fd);
+}
+
+static void test_array_cpu_flag(void)
+{
+ test_map_op_cpu_flag(BPF_MAP_TYPE_ARRAY);
+}
+
+static void test_hash_cpu_flag(void)
+{
+ test_map_op_cpu_flag(BPF_MAP_TYPE_HASH);
+}
+
void test_percpu_alloc(void)
{
if (test__start_subtest("array"))
@@ -125,4 +448,16 @@ void test_percpu_alloc(void)
test_cgrp_local_storage();
if (test__start_subtest("failure_tests"))
test_failure();
+ if (test__start_subtest("cpu_flag_percpu_array"))
+ test_percpu_array_cpu_flag();
+ if (test__start_subtest("cpu_flag_percpu_hash"))
+ test_percpu_hash_cpu_flag();
+ if (test__start_subtest("cpu_flag_lru_percpu_hash"))
+ test_lru_percpu_hash_cpu_flag();
+ if (test__start_subtest("cpu_flag_percpu_cgroup_storage"))
+ test_percpu_cgroup_storage_cpu_flag();
+ if (test__start_subtest("cpu_flag_array"))
+ test_array_cpu_flag();
+ if (test__start_subtest("cpu_flag_hash"))
+ test_hash_cpu_flag();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 51544372f52e..41dfaaabb73f 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -101,9 +101,9 @@ static int resolve_symbols(void)
int type_id;
__u32 nr;
- btf = btf__parse_elf("btf_data.bpf.o", NULL);
+ btf = btf__parse_raw("resolve_btfids.test.o.BTF");
if (CHECK(libbpf_get_error(btf), "resolve",
- "Failed to load BTF from btf_data.bpf.o\n"))
+ "Failed to load BTF from resolve_btfids.test.o.BTF\n"))
return -1;
nr = btf__type_cnt(btf);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
index e4940583924b..e2c867fd5244 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
@@ -5,9 +5,14 @@
#include "sk_bypass_prot_mem.skel.h"
#include "network_helpers.h"
+#ifndef PAGE_SIZE
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
+
#define NR_PAGES 32
#define NR_SOCKETS 2
-#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS)
+#define BUF_TOTAL (NR_PAGES * PAGE_SIZE / NR_SOCKETS)
#define BUF_SINGLE 1024
#define NR_SEND (BUF_TOTAL / BUF_SINGLE)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 1e3e4392dcca..256707e7d20d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
#include <error.h>
-#include <netinet/tcp.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
#include <sys/epoll.h>
#include "test_progs.h"
@@ -22,6 +23,15 @@
#define TCP_REPAIR_ON 1
#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
+/**
+ * SOL_TCP is defined in <netinet/tcp.h> (glibc), but the copybuf_address
+ * field of tcp_zerocopy_receive is not yet included in older versions.
+ * This workaround remains necessary until the glibc update propagates.
+ */
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
static int connected_socket_v4(void)
{
struct sockaddr_in addr = {
@@ -536,13 +546,14 @@ out:
}
-static void test_sockmap_skb_verdict_fionread(bool pass_prog)
+static void do_test_sockmap_skb_verdict_fionread(int sotype, bool pass_prog)
{
int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
int expected, zero = 0, sent, recvd, avail;
struct test_sockmap_pass_prog *pass = NULL;
struct test_sockmap_drop_prog *drop = NULL;
char buf[256] = "0123456789";
+ int split_len = sizeof(buf) / 2;
if (pass_prog) {
pass = test_sockmap_pass_prog__open_and_load();
@@ -550,7 +561,10 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
return;
verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
map = bpf_map__fd(pass->maps.sock_map_rx);
- expected = sizeof(buf);
+ if (sotype == SOCK_DGRAM)
+ expected = split_len; /* FIONREAD for UDP is different from TCP */
+ else
+ expected = sizeof(buf);
} else {
drop = test_sockmap_drop_prog__open_and_load();
if (!ASSERT_OK_PTR(drop, "open_and_load"))
@@ -566,7 +580,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
- err = create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
+ err = create_socket_pairs(AF_INET, sotype, &c0, &c1, &p0, &p1);
if (!ASSERT_OK(err, "create_socket_pairs()"))
goto out;
@@ -574,8 +588,9 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
goto out_close;
- sent = xsend(p1, &buf, sizeof(buf), 0);
- ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
+ sent = xsend(p1, &buf, split_len, 0);
+ sent += xsend(p1, &buf, sizeof(buf) - split_len, 0);
+ ASSERT_EQ(sent, sizeof(buf), "xsend(p1)");
err = ioctl(c1, FIONREAD, &avail);
ASSERT_OK(err, "ioctl(FIONREAD) error");
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
@@ -597,6 +612,12 @@ out:
test_sockmap_drop_prog__destroy(drop);
}
+static void test_sockmap_skb_verdict_fionread(bool pass_prog)
+{
+ do_test_sockmap_skb_verdict_fionread(SOCK_STREAM, pass_prog);
+ do_test_sockmap_skb_verdict_fionread(SOCK_DGRAM, pass_prog);
+}
+
static void test_sockmap_skb_verdict_change_tail(void)
{
struct test_sockmap_change_tail *skel;
@@ -1042,6 +1063,257 @@ close_map:
xclose(map);
}
+/* it is used to reproduce WARNING */
+static void test_sockmap_zc(void)
+{
+ int map, err, sent, recvd, zero = 0, one = 1, on = 1;
+ char buf[10] = "0123456789", rcv[11], addr[100];
+ struct test_sockmap_pass_prog *skel = NULL;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct tcp_zerocopy_receive zc;
+ socklen_t zc_len = sizeof(zc);
+ struct bpf_program *prog;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1))
+ goto end;
+
+ prog = skel->progs.prog_skb_verdict_ingress;
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto end;
+
+ sent = xsend(c0, buf, sizeof(buf), 0);
+ if (!ASSERT_EQ(sent, sizeof(buf), "xsend"))
+ goto end;
+
+ /* trigger tcp_bpf_recvmsg_parser and inc copied_seq of p1 */
+ recvd = recv_timeout(p1, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1)"))
+ goto end;
+
+ /* uninstall sockmap of p1 */
+ bpf_map_delete_elem(map, &one);
+
+ /* trigger tcp stack and the rcv_nxt of p1 is less than copied_seq */
+ sent = xsend(c1, buf, sizeof(buf) - 1, 0);
+ if (!ASSERT_EQ(sent, sizeof(buf) - 1, "xsend"))
+ goto end;
+
+ err = setsockopt(p1, SOL_SOCKET, SO_ZEROCOPY, &on, sizeof(on));
+ if (!ASSERT_OK(err, "setsockopt"))
+ goto end;
+
+ memset(&zc, 0, sizeof(zc));
+ zc.copybuf_address = (__u64)((unsigned long)addr);
+ zc.copybuf_len = sizeof(addr);
+
+ err = getsockopt(p1, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto end;
+
+end:
+ if (c0 >= 0)
+ close(c0);
+ if (p0 >= 0)
+ close(p0);
+ if (c1 >= 0)
+ close(c1);
+ if (p1 >= 0)
+ close(p1);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+/* it is used to check whether copied_seq of sk is correct */
+static void test_sockmap_copied_seq(bool strp)
+{
+ int i, map, err, sent, recvd, zero = 0, one = 1;
+ struct test_sockmap_pass_prog *skel = NULL;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ char buf[10] = "0123456789", rcv[11];
+ struct bpf_program *prog;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1))
+ goto end;
+
+ prog = skel->progs.prog_skb_verdict_ingress;
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach verdict"))
+ goto end;
+
+ if (strp) {
+ prog = skel->progs.prog_skb_verdict_ingress_strp;
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach parser"))
+ goto end;
+ }
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto end;
+
+ /* just trigger sockamp: data sent by c0 will be received by p1 */
+ sent = xsend(c0, buf, sizeof(buf), 0);
+ if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c0), bpf"))
+ goto end;
+
+ /* do partial read */
+ recvd = recv_timeout(p1, rcv, 1, MSG_DONTWAIT, 1);
+ recvd += recv_timeout(p1, rcv + 1, sizeof(rcv) - 1, MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1), bpf") ||
+ !ASSERT_OK(memcmp(buf, rcv, recvd), "data mismatch"))
+ goto end;
+
+ /* uninstall sockmap of p1 and p0 */
+ err = bpf_map_delete_elem(map, &one);
+ if (!ASSERT_OK(err, "bpf_map_delete_elem(1)"))
+ goto end;
+
+ err = bpf_map_delete_elem(map, &zero);
+ if (!ASSERT_OK(err, "bpf_map_delete_elem(0)"))
+ goto end;
+
+ /* now all sockets become plain socket, they should still work */
+ for (i = 0; i < 5; i++) {
+ /* test copied_seq of p1 by running tcp native stack */
+ sent = xsend(c1, buf, sizeof(buf), 0);
+ if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c1), native"))
+ goto end;
+
+ recvd = recv(p1, rcv, sizeof(rcv), MSG_DONTWAIT);
+ if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1), native"))
+ goto end;
+
+ /* p0 previously redirected skb to p1, we also check copied_seq of p0 */
+ sent = xsend(c0, buf, sizeof(buf), 0);
+ if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c0), native"))
+ goto end;
+
+ recvd = recv(p0, rcv, sizeof(rcv), MSG_DONTWAIT);
+ if (!ASSERT_EQ(recvd, sent, "recv_timeout(p0), native"))
+ goto end;
+ }
+
+end:
+ if (c0 >= 0)
+ close(c0);
+ if (p0 >= 0)
+ close(p0);
+ if (c1 >= 0)
+ close(c1);
+ if (p1 >= 0)
+ close(p1);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+/* Wait until FIONREAD returns the expected value or timeout */
+static int wait_for_fionread(int fd, int expected, unsigned int timeout_ms)
+{
+ unsigned int elapsed = 0;
+ int avail = 0;
+
+ while (elapsed < timeout_ms) {
+ if (ioctl(fd, FIONREAD, &avail) < 0)
+ return -errno;
+ if (avail >= expected)
+ return avail;
+ usleep(1000);
+ elapsed++;
+ }
+ return avail;
+}
+
+/* it is used to send data to via native stack and BPF redirecting */
+static void test_sockmap_multi_channels(int sotype)
+{
+ int map, err, sent, recvd, zero = 0, one = 1, avail = 0, expected;
+ struct test_sockmap_pass_prog *skel = NULL;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ char buf[10] = "0123456789", rcv[11];
+ struct bpf_program *prog;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ err = create_socket_pairs(AF_INET, sotype, &c0, &c1, &p0, &p1);
+ if (err)
+ goto end;
+
+ prog = skel->progs.prog_skb_verdict_ingress;
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach verdict"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto end;
+
+ /* send data to p1 via native stack */
+ sent = xsend(c1, buf, 2, 0);
+ if (!ASSERT_EQ(sent, 2, "xsend(2)"))
+ goto end;
+
+ avail = wait_for_fionread(p1, 2, IO_TIMEOUT_SEC);
+ ASSERT_EQ(avail, 2, "ioctl(FIONREAD) partial return");
+
+ /* send data to p1 via bpf redirecting */
+ sent = xsend(c0, buf + 2, sizeof(buf) - 2, 0);
+ if (!ASSERT_EQ(sent, sizeof(buf) - 2, "xsend(remain-data)"))
+ goto end;
+
+ /* Poll FIONREAD until expected bytes arrive, poll_read() is unreliable
+ * here since it may return immediately if prior data is already queued.
+ */
+ expected = sotype == SOCK_DGRAM ? 2 : sizeof(buf);
+ avail = wait_for_fionread(p1, expected, IO_TIMEOUT_SEC);
+ ASSERT_EQ(avail, expected, "ioctl(FIONREAD) full return");
+
+ recvd = recv_timeout(p1, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(p1)") ||
+ !ASSERT_OK(memcmp(buf, rcv, recvd), "data mismatch"))
+ goto end;
+end:
+ if (c0 >= 0)
+ close(c0);
+ if (p0 >= 0)
+ close(p0);
+ if (c1 >= 0)
+ close(c1);
+ if (p1 >= 0)
+ close(p1);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -1108,4 +1380,14 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_vsock_poll();
if (test__start_subtest("sockmap vsock unconnected"))
test_sockmap_vsock_unconnected();
+ if (test__start_subtest("sockmap with zc"))
+ test_sockmap_zc();
+ if (test__start_subtest("sockmap recover"))
+ test_sockmap_copied_seq(false);
+ if (test__start_subtest("sockmap recover with strp"))
+ test_sockmap_copied_seq(true);
+ if (test__start_subtest("sockmap tcp multi channels"))
+ test_sockmap_multi_channels(SOCK_STREAM);
+ if (test__start_subtest("sockmap udp multi channels"))
+ test_sockmap_multi_channels(SOCK_DGRAM);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
index c9efdd2a5b18..da42b00e3d1f 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
@@ -74,11 +74,20 @@ static void test_stacktrace_ips_kprobe_multi(bool retprobe)
load_kallsyms();
- check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
- ksym_get_addr("bpf_testmod_stacktrace_test_3"),
- ksym_get_addr("bpf_testmod_stacktrace_test_2"),
- ksym_get_addr("bpf_testmod_stacktrace_test_1"),
- ksym_get_addr("bpf_testmod_test_read"));
+ if (retprobe) {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ } else {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5,
+ ksym_get_addr("bpf_testmod_stacktrace_test"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ }
cleanup:
stacktrace_ips__destroy(skel);
@@ -128,6 +137,99 @@ cleanup:
stacktrace_ips__destroy(skel);
}
+static void test_stacktrace_ips_kprobe(bool retprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_opts, opts,
+ .retprobe = retprobe
+ );
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct stacktrace_ips *skel;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.kprobe_test = bpf_program__attach_kprobe_opts(
+ skel->progs.kprobe_test,
+ "bpf_testmod_stacktrace_test", &opts);
+ if (!ASSERT_OK_PTR(skel->links.kprobe_test, "bpf_program__attach_kprobe_opts"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ if (retprobe) {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ } else {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5,
+ ksym_get_addr("bpf_testmod_stacktrace_test"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ }
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void test_stacktrace_ips_trampoline(bool retprobe)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct stacktrace_ips *skel;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ if (retprobe) {
+ skel->links.fexit_test = bpf_program__attach_trace(skel->progs.fexit_test);
+ if (!ASSERT_OK_PTR(skel->links.fexit_test, "bpf_program__attach_trace"))
+ goto cleanup;
+ } else {
+ skel->links.fentry_test = bpf_program__attach_trace(skel->progs.fentry_test);
+ if (!ASSERT_OK_PTR(skel->links.fentry_test, "bpf_program__attach_trace"))
+ goto cleanup;
+ }
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ if (retprobe) {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ } else {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5,
+ ksym_get_addr("bpf_testmod_stacktrace_test"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ }
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
static void __test_stacktrace_ips(void)
{
if (test__start_subtest("kprobe_multi"))
@@ -136,6 +238,14 @@ static void __test_stacktrace_ips(void)
test_stacktrace_ips_kprobe_multi(true);
if (test__start_subtest("raw_tp"))
test_stacktrace_ips_raw_tp();
+ if (test__start_subtest("kprobe"))
+ test_stacktrace_ips_kprobe(false);
+ if (test__start_subtest("kretprobe"))
+ test_stacktrace_ips_kprobe(true);
+ if (test__start_subtest("fentry"))
+ test_stacktrace_ips_trampoline(false);
+ if (test__start_subtest("fexit"))
+ test_stacktrace_ips_trampoline(true);
}
#else
static void __test_stacktrace_ips(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
index 0f3bf594e7a5..300032a19445 100644
--- a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -9,6 +9,7 @@
static const char * const test_cases[] = {
"strcmp",
"strcasecmp",
+ "strncasecmp",
"strchr",
"strchrnul",
"strnchr",
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 0ab36503c3b2..7d534fde0af9 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -8,6 +8,7 @@
#include "tailcall_freplace.skel.h"
#include "tc_bpf2bpf.skel.h"
#include "tailcall_fail.skel.h"
+#include "tailcall_sleepable.skel.h"
/* test_tailcall_1 checks basic functionality by patching multiple locations
* in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -1653,6 +1654,77 @@ static void test_tailcall_failure()
RUN_TESTS(tailcall_fail);
}
+noinline void uprobe_sleepable_trigger(void)
+{
+ asm volatile ("");
+}
+
+static void test_tailcall_sleepable(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct tailcall_sleepable *skel;
+ int prog_fd, map_fd;
+ int err, key;
+
+ skel = tailcall_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "tailcall_sleepable__open"))
+ return;
+
+ /*
+ * Test that we can't load uprobe_normal and uprobe_sleepable_1,
+ * because they share tailcall map.
+ */
+ bpf_program__set_autoload(skel->progs.uprobe_normal, true);
+ bpf_program__set_autoload(skel->progs.uprobe_sleepable_1, true);
+
+ err = tailcall_sleepable__load(skel);
+ if (!ASSERT_ERR(err, "tailcall_sleepable__load"))
+ goto out;
+
+ tailcall_sleepable__destroy(skel);
+
+ /*
+ * Test that we can tail call from sleepable to sleepable program.
+ */
+ skel = tailcall_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "tailcall_sleepable__open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.uprobe_sleepable_1, true);
+ bpf_program__set_autoload(skel->progs.uprobe_sleepable_2, true);
+
+ err = tailcall_sleepable__load(skel);
+ if (!ASSERT_OK(err, "tailcall_sleepable__load"))
+ goto out;
+
+ /* Add sleepable uprobe_sleepable_2 to jmp_table[0]. */
+ key = 0;
+ prog_fd = bpf_program__fd(skel->progs.uprobe_sleepable_2);
+ map_fd = bpf_map__fd(skel->maps.jmp_table);
+ err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ skel->bss->my_pid = getpid();
+
+ /* Attach uprobe_sleepable_1 to uprobe_sleepable_trigger and hit it. */
+ opts.func_name = "uprobe_sleepable_trigger";
+ skel->links.uprobe_sleepable_1 = bpf_program__attach_uprobe_opts(
+ skel->progs.uprobe_sleepable_1,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &opts);
+ if (!ASSERT_OK_PTR(skel->links.uprobe_sleepable_1, "bpf_program__attach_uprobe_opts"))
+ goto out;
+
+ uprobe_sleepable_trigger();
+ ASSERT_EQ(skel->bss->executed, 1, "executed");
+
+out:
+ tailcall_sleepable__destroy(skel);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -1707,4 +1779,6 @@ void test_tailcalls(void)
test_tailcall_bpf2bpf_freplace();
if (test__start_subtest("tailcall_failure"))
test_tailcall_failure();
+ if (test__start_subtest("tailcall_sleepable"))
+ test_tailcall_sleepable();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
index 2de38776a2d4..0f86b9275cf9 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_data.h
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -94,7 +94,7 @@ struct tld_metadata {
};
struct tld_meta_u {
- _Atomic __u8 cnt;
+ _Atomic __u16 cnt;
__u16 size;
struct tld_metadata metadata[];
};
@@ -217,7 +217,7 @@ out:
static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
{
int err, i, sz, off = 0;
- __u8 cnt;
+ __u16 cnt;
if (!TLD_READ_ONCE(tld_meta_p)) {
err = __tld_init_meta_p();
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index 42e822ea352f..7bee33797c71 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -112,24 +112,24 @@ static void test_recursion(void)
task_ls_recursion__detach(skel);
/* Refer to the comment in BPF_PROG(on_update) for
- * the explanation on the value 201 and 100.
+ * the explanation on the value 200 and 1.
*/
map_fd = bpf_map__fd(skel->maps.map_a);
err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
ASSERT_OK(err, "lookup map_a");
- ASSERT_EQ(value, 201, "map_a value");
- ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy");
+ ASSERT_EQ(value, 200, "map_a value");
+ ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
map_fd = bpf_map__fd(skel->maps.map_b);
err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
ASSERT_OK(err, "lookup map_b");
- ASSERT_EQ(value, 100, "map_b value");
+ ASSERT_EQ(value, 1, "map_b value");
prog_fd = bpf_program__fd(skel->progs.on_update);
memset(&info, 0, sizeof(info));
err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
ASSERT_OK(err, "get prog info");
- ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion");
+ ASSERT_EQ(info.recursion_misses, 2, "on_update prog recursion");
prog_fd = bpf_program__fd(skel->progs.on_enter);
memset(&info, 0, sizeof(info));
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c
new file mode 100644
index 000000000000..461ded722351
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_assoc.skel.h"
+#include "struct_ops_assoc_reuse.skel.h"
+#include "struct_ops_assoc_in_timer.skel.h"
+
+static void test_st_ops_assoc(void)
+{
+ struct struct_ops_assoc *skel = NULL;
+ int err, pid;
+
+ skel = struct_ops_assoc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_assoc__open"))
+ goto out;
+
+ /* cannot explicitly associate struct_ops program */
+ err = bpf_program__assoc_struct_ops(skel->progs.test_1_a,
+ skel->maps.st_ops_map_a, NULL);
+ ASSERT_ERR(err, "bpf_program__assoc_struct_ops(test_1_a, st_ops_map_a)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_a,
+ skel->maps.st_ops_map_a, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_a, st_ops_map_a)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.sys_enter_prog_a,
+ skel->maps.st_ops_map_a, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(sys_enter_prog_a, st_ops_map_a)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_b,
+ skel->maps.st_ops_map_b, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_b, st_ops_map_b)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.sys_enter_prog_b,
+ skel->maps.st_ops_map_b, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(sys_enter_prog_b, st_ops_map_b)");
+
+ /* sys_enter_prog_a already associated with map_a */
+ err = bpf_program__assoc_struct_ops(skel->progs.sys_enter_prog_a,
+ skel->maps.st_ops_map_b, NULL);
+ ASSERT_ERR(err, "bpf_program__assoc_struct_ops(sys_enter_prog_a, st_ops_map_b)");
+
+ err = struct_ops_assoc__attach(skel);
+ if (!ASSERT_OK(err, "struct_ops_assoc__attach"))
+ goto out;
+
+ /* run tracing prog that calls .test_1 and checks return */
+ pid = getpid();
+ skel->bss->test_pid = pid;
+ sys_gettid();
+ skel->bss->test_pid = 0;
+
+ ASSERT_EQ(skel->bss->test_err_a, 0, "skel->bss->test_err_a");
+ ASSERT_EQ(skel->bss->test_err_b, 0, "skel->bss->test_err_b");
+
+ /* run syscall_prog that calls .test_1 and checks return */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_a), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_b), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ ASSERT_EQ(skel->bss->test_err_a, 0, "skel->bss->test_err_a");
+ ASSERT_EQ(skel->bss->test_err_b, 0, "skel->bss->test_err_b");
+
+out:
+ struct_ops_assoc__destroy(skel);
+}
+
+static void test_st_ops_assoc_reuse(void)
+{
+ struct struct_ops_assoc_reuse *skel = NULL;
+ int err;
+
+ skel = struct_ops_assoc_reuse__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_assoc_reuse__open"))
+ goto out;
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_a,
+ skel->maps.st_ops_map_a, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_a, st_ops_map_a)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_b,
+ skel->maps.st_ops_map_b, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_b, st_ops_map_b)");
+
+ err = struct_ops_assoc_reuse__attach(skel);
+ if (!ASSERT_OK(err, "struct_ops_assoc__attach"))
+ goto out;
+
+ /* run syscall_prog that calls .test_1 and checks return */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_a), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_b), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ ASSERT_EQ(skel->bss->test_err_a, 0, "skel->bss->test_err_a");
+ ASSERT_EQ(skel->bss->test_err_b, 0, "skel->bss->test_err_b");
+
+out:
+ struct_ops_assoc_reuse__destroy(skel);
+}
+
+static void test_st_ops_assoc_in_timer(void)
+{
+ struct struct_ops_assoc_in_timer *skel = NULL;
+ int err;
+
+ skel = struct_ops_assoc_in_timer__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_assoc_in_timer__open"))
+ goto out;
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog,
+ skel->maps.st_ops_map, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops");
+
+ err = struct_ops_assoc_in_timer__attach(skel);
+ if (!ASSERT_OK(err, "struct_ops_assoc__attach"))
+ goto out;
+
+ /*
+ * Run .test_1 by calling kfunc bpf_kfunc_multi_st_ops_test_1_prog_arg() and checks
+ * the return value. .test_1 will also schedule timer_cb that runs .test_1 again
+ * immediately.
+ */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ /* Check the return of the kfunc after timer_cb runs */
+ while (!READ_ONCE(skel->bss->timer_cb_run))
+ sched_yield();
+ ASSERT_EQ(skel->bss->timer_test_1_ret, 1234, "skel->bss->timer_test_1_ret");
+ ASSERT_EQ(skel->bss->test_err, 0, "skel->bss->test_err_a");
+out:
+ struct_ops_assoc_in_timer__destroy(skel);
+}
+
+static void test_st_ops_assoc_in_timer_no_uref(void)
+{
+ struct struct_ops_assoc_in_timer *skel = NULL;
+ struct bpf_link *link;
+ int err;
+
+ skel = struct_ops_assoc_in_timer__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_assoc_in_timer__open"))
+ goto out;
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog,
+ skel->maps.st_ops_map, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops");
+
+ link = bpf_map__attach_struct_ops(skel->maps.st_ops_map);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ goto out;
+
+ /*
+ * Run .test_1 by calling kfunc bpf_kfunc_multi_st_ops_test_1_prog_arg() and checks
+ * the return value. .test_1 will also schedule timer_cb that runs .test_1 again.
+ * timer_cb will run 500ms after syscall_prog runs, when the user space no longer
+ * holds a reference to st_ops_map.
+ */
+ skel->bss->timer_ns = 500000000;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ /* Detach and close struct_ops map to cause it to be freed */
+ bpf_link__destroy(link);
+ close(bpf_program__fd(skel->progs.syscall_prog));
+ close(bpf_map__fd(skel->maps.st_ops_map));
+
+ /* Check the return of the kfunc after timer_cb runs */
+ while (!READ_ONCE(skel->bss->timer_cb_run))
+ sched_yield();
+ ASSERT_EQ(skel->bss->timer_test_1_ret, -1, "skel->bss->timer_test_1_ret");
+ ASSERT_EQ(skel->bss->test_err, 0, "skel->bss->test_err_a");
+out:
+ struct_ops_assoc_in_timer__destroy(skel);
+}
+
+void test_struct_ops_assoc(void)
+{
+ if (test__start_subtest("st_ops_assoc"))
+ test_st_ops_assoc();
+ if (test__start_subtest("st_ops_assoc_reuse"))
+ test_st_ops_assoc_reuse();
+ if (test__start_subtest("st_ops_assoc_in_timer"))
+ test_st_ops_assoc_in_timer();
+ if (test__start_subtest("st_ops_assoc_in_timer_no_uref"))
+ test_st_ops_assoc_in_timer_no_uref();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
index 9fd6306b455c..9556ad3d986f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
@@ -4,7 +4,7 @@
#include <test_progs.h>
#define TLD_FREE_DATA_ON_THREAD_EXIT
-#define TLD_DYN_DATA_SIZE 4096
+#define TLD_DYN_DATA_SIZE (getpagesize() - 8)
#include "task_local_data.h"
struct test_tld_struct {
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
index 5af28f359cfd..bab4a31621c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -433,7 +433,7 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk
}
/* Search for the end of the packet in verbatim mode */
- if (!pkt_continues(pkt->options))
+ if (!pkt_continues(pkt->options) || !pkt->valid)
return nb_frags;
next_frag = pkt_stream->current_pkt_nb;
@@ -1090,6 +1090,8 @@ static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
xsk_ring_prod__cancel(&umem->fq, nb_frags);
}
frags_processed -= nb_frags;
+ pkt_stream_cancel(pkt_stream);
+ pkts_sent--;
}
if (ifobj->use_fill_ring)
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 34f9ccce2602..09ff21e1ad2f 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -1,12 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
+#include <sched.h>
#include <test_progs.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
#include "timer.skel.h"
#include "timer_failure.skel.h"
#include "timer_interrupt.skel.h"
#define NUM_THR 8
+static int perf_event_open(__u32 type, __u64 config, int pid, int cpu)
+{
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .size = sizeof(struct perf_event_attr),
+ .sample_period = 10000,
+ };
+
+ return syscall(__NR_perf_event_open, &attr, pid, cpu, -1, 0);
+}
+
static void *spin_lock_thread(void *arg)
{
int i, err, prog_fd = *(int *)arg;
@@ -22,13 +37,174 @@ static void *spin_lock_thread(void *arg)
pthread_exit(arg);
}
-static int timer(struct timer *timer_skel)
+
+static int timer_stress_runner(struct timer *timer_skel, bool async_cancel)
{
- int i, err, prog_fd;
+ int i, err = 1, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, topts);
pthread_t thread_id[NUM_THR];
void *ret;
+ timer_skel->bss->async_cancel = async_cancel;
+ prog_fd = bpf_program__fd(timer_skel->progs.race);
+ for (i = 0; i < NUM_THR; i++) {
+ err = pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ break;
+ }
+
+ while (i) {
+ err = pthread_join(thread_id[--i], &ret);
+ if (ASSERT_OK(err, "pthread_join"))
+ ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
+ }
+ return err;
+}
+
+static int timer_stress(struct timer *timer_skel)
+{
+ return timer_stress_runner(timer_skel, false);
+}
+
+static int timer_stress_async_cancel(struct timer *timer_skel)
+{
+ return timer_stress_runner(timer_skel, true);
+}
+
+static void *nmi_cpu_worker(void *arg)
+{
+ volatile __u64 num = 1;
+ int i;
+
+ for (i = 0; i < 500000000; ++i)
+ num *= (i % 7) + 1;
+ (void)num;
+
+ return NULL;
+}
+
+static int run_nmi_test(struct timer *timer_skel, struct bpf_program *prog)
+{
+ struct bpf_link *link = NULL;
+ int pe_fd = -1, pipefd[2] = {-1, -1}, pid = 0, status;
+ char buf = 0;
+ int ret = -1;
+
+ if (!ASSERT_OK(pipe(pipefd), "pipe"))
+ goto cleanup;
+
+ pid = fork();
+ if (pid == 0) {
+ /* Child: spawn multiple threads to consume multiple CPUs */
+ pthread_t threads[NUM_THR];
+ int i;
+
+ close(pipefd[1]);
+ read(pipefd[0], &buf, 1);
+ close(pipefd[0]);
+
+ for (i = 0; i < NUM_THR; i++)
+ pthread_create(&threads[i], NULL, nmi_cpu_worker, NULL);
+ for (i = 0; i < NUM_THR; i++)
+ pthread_join(threads[i], NULL);
+ exit(0);
+ }
+
+ if (!ASSERT_GE(pid, 0, "fork"))
+ goto cleanup;
+
+ /* Open perf event for child process across all CPUs */
+ pe_fd = perf_event_open(PERF_TYPE_HARDWARE,
+ PERF_COUNT_HW_CPU_CYCLES,
+ pid, /* measure child process */
+ -1); /* on any CPU */
+ if (pe_fd < 0) {
+ if (errno == ENOENT || errno == EOPNOTSUPP) {
+ printf("SKIP:no PERF_COUNT_HW_CPU_CYCLES\n");
+ test__skip();
+ ret = EOPNOTSUPP;
+ goto cleanup;
+ }
+ ASSERT_GE(pe_fd, 0, "perf_event_open");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_perf_event(prog, pe_fd);
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+ goto cleanup;
+ pe_fd = -1; /* Ownership transferred to link */
+
+ /* Signal child to start CPU work */
+ close(pipefd[0]);
+ pipefd[0] = -1;
+ write(pipefd[1], &buf, 1);
+ close(pipefd[1]);
+ pipefd[1] = -1;
+
+ waitpid(pid, &status, 0);
+ pid = 0;
+
+ /* Verify NMI context was hit */
+ ASSERT_GT(timer_skel->bss->test_hits, 0, "test_hits");
+ ret = 0;
+
+cleanup:
+ bpf_link__destroy(link);
+ if (pe_fd >= 0)
+ close(pe_fd);
+ if (pid > 0) {
+ write(pipefd[1], &buf, 1);
+ waitpid(pid, &status, 0);
+ }
+ if (pipefd[0] >= 0)
+ close(pipefd[0]);
+ if (pipefd[1] >= 0)
+ close(pipefd[1]);
+ return ret;
+}
+
+static int timer_stress_nmi_race(struct timer *timer_skel)
+{
+ int err;
+
+ err = run_nmi_test(timer_skel, timer_skel->progs.nmi_race);
+ if (err == EOPNOTSUPP)
+ return 0;
+ return err;
+}
+
+static int timer_stress_nmi_update(struct timer *timer_skel)
+{
+ int err;
+
+ err = run_nmi_test(timer_skel, timer_skel->progs.nmi_update);
+ if (err == EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+ ASSERT_GT(timer_skel->bss->update_hits, 0, "update_hits");
+ return 0;
+}
+
+static int timer_stress_nmi_cancel(struct timer *timer_skel)
+{
+ int err;
+
+ err = run_nmi_test(timer_skel, timer_skel->progs.nmi_cancel);
+ if (err == EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+ ASSERT_GT(timer_skel->bss->cancel_hits, 0, "cancel_hits");
+ return 0;
+}
+
+static int timer(struct timer *timer_skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
err = timer__attach(timer_skel);
if (!ASSERT_OK(err, "timer_attach"))
return err;
@@ -63,25 +239,30 @@ static int timer(struct timer *timer_skel)
/* check that code paths completed */
ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
- prog_fd = bpf_program__fd(timer_skel->progs.race);
- for (i = 0; i < NUM_THR; i++) {
- err = pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd);
- if (!ASSERT_OK(err, "pthread_create"))
- break;
- }
+ return 0;
+}
- while (i) {
- err = pthread_join(thread_id[--i], &ret);
- if (ASSERT_OK(err, "pthread_join"))
- ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
- }
+static int timer_cancel_async(struct timer *timer_skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test_async_cancel_succeed);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ usleep(500);
+ /* check that there were no errors in timer execution */
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
return 0;
}
-/* TODO: use pid filtering */
-void serial_test_timer(void)
+static void test_timer(int (*timer_test_fn)(struct timer *timer_skel))
{
struct timer *timer_skel = NULL;
int err;
@@ -94,13 +275,48 @@ void serial_test_timer(void)
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
return;
- err = timer(timer_skel);
+ err = timer_test_fn(timer_skel);
ASSERT_OK(err, "timer");
timer__destroy(timer_skel);
+}
+
+void serial_test_timer(void)
+{
+ test_timer(timer);
RUN_TESTS(timer_failure);
}
+void serial_test_timer_stress(void)
+{
+ test_timer(timer_stress);
+}
+
+void serial_test_timer_stress_async_cancel(void)
+{
+ test_timer(timer_stress_async_cancel);
+}
+
+void serial_test_timer_async_cancel(void)
+{
+ test_timer(timer_cancel_async);
+}
+
+void serial_test_timer_stress_nmi_race(void)
+{
+ test_timer(timer_stress_nmi_race);
+}
+
+void serial_test_timer_stress_nmi_update(void)
+{
+ test_timer(timer_stress_nmi_update);
+}
+
+void serial_test_timer_stress_nmi_cancel(void)
+{
+ test_timer(timer_stress_nmi_cancel);
+}
+
void test_timer_interrupt(void)
{
struct timer_interrupt *skel = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c b/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c
new file mode 100644
index 000000000000..9f1f9aec8888
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "timer_start_deadlock.skel.h"
+
+void test_timer_start_deadlock(void)
+{
+ struct timer_start_deadlock *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ skel = timer_start_deadlock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = timer_start_deadlock__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.start_timer);
+
+ /*
+ * Run the syscall program that attempts to deadlock.
+ * If the kernel deadlocks, this call will never return.
+ */
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "prog_test_run");
+ ASSERT_EQ(opts.retval, 0, "prog_retval");
+
+ ASSERT_EQ(skel->bss->tp_called, 1, "tp_called");
+cleanup:
+ timer_start_deadlock__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c b/tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c
new file mode 100644
index 000000000000..29a46e96f660
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <pthread.h>
+#include <test_progs.h>
+#include "timer_start_delete_race.skel.h"
+
+/*
+ * Test for race between bpf_timer_start() and map element deletion.
+ *
+ * The race scenario:
+ * - CPU 1: bpf_timer_start() proceeds to bpf_async_process() and is about
+ * to call hrtimer_start() but hasn't yet
+ * - CPU 2: map_delete_elem() calls __bpf_async_cancel_and_free(), since
+ * timer is not scheduled yet hrtimer_try_to_cancel() is a nop,
+ * then calls bpf_async_refcount_put() dropping refcnt to zero
+ * and scheduling call_rcu_tasks_trace()
+ * - CPU 1: continues and calls hrtimer_start()
+ * - After RCU tasks trace grace period: memory is freed
+ * - Timer callback fires on freed memory: UAF!
+ *
+ * This test stresses this race by having two threads:
+ * - Thread 1: repeatedly starts timers
+ * - Thread 2: repeatedly deletes map elements
+ *
+ * KASAN should detect use-after-free.
+ */
+
+#define ITERATIONS 1000
+
+struct ctx {
+ struct timer_start_delete_race *skel;
+ volatile bool start;
+ volatile bool stop;
+ int errors;
+};
+
+static void *start_timer_thread(void *arg)
+{
+ struct ctx *ctx = arg;
+ cpu_set_t cpuset;
+ int fd, i;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+ while (!ctx->start && !ctx->stop)
+ usleep(1);
+ if (ctx->stop)
+ return NULL;
+
+ fd = bpf_program__fd(ctx->skel->progs.start_timer);
+
+ for (i = 0; i < ITERATIONS && !ctx->stop; i++) {
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ int err;
+
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err || opts.retval) {
+ ctx->errors++;
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+static void *delete_elem_thread(void *arg)
+{
+ struct ctx *ctx = arg;
+ cpu_set_t cpuset;
+ int fd, i;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+ while (!ctx->start && !ctx->stop)
+ usleep(1);
+ if (ctx->stop)
+ return NULL;
+
+ fd = bpf_program__fd(ctx->skel->progs.delete_elem);
+
+ for (i = 0; i < ITERATIONS && !ctx->stop; i++) {
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ int err;
+
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err || opts.retval) {
+ ctx->errors++;
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+void test_timer_start_delete_race(void)
+{
+ struct timer_start_delete_race *skel;
+ pthread_t threads[2];
+ struct ctx ctx = {};
+ int err;
+
+ skel = timer_start_delete_race__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ ctx.skel = skel;
+
+ err = pthread_create(&threads[0], NULL, start_timer_thread, &ctx);
+ if (!ASSERT_OK(err, "create start_timer_thread")) {
+ ctx.stop = true;
+ goto cleanup;
+ }
+
+ err = pthread_create(&threads[1], NULL, delete_elem_thread, &ctx);
+ if (!ASSERT_OK(err, "create delete_elem_thread")) {
+ ctx.stop = true;
+ pthread_join(threads[0], NULL);
+ goto cleanup;
+ }
+
+ ctx.start = true;
+
+ pthread_join(threads[0], NULL);
+ pthread_join(threads[1], NULL);
+
+ ASSERT_EQ(ctx.errors, 0, "thread_errors");
+
+ /* Either KASAN will catch UAF or kernel will crash or nothing happens */
+cleanup:
+ timer_start_delete_race__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
index 10e231965589..f9f9e1cb87bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -73,7 +73,7 @@ static void test_tracing_deny(void)
static void test_fexit_noreturns(void)
{
test_tracing_fail_prog("fexit_noreturns",
- "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.");
+ "Attaching fexit/fsession/fmod_ret to __noreturn function 'do_exit' is rejected.");
}
void test_tracing_failure(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 4b4b081b46cc..302286a80154 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -6,6 +6,8 @@
#include "verifier_and.skel.h"
#include "verifier_arena.skel.h"
#include "verifier_arena_large.skel.h"
+#include "verifier_arena_globals1.skel.h"
+#include "verifier_arena_globals2.skel.h"
#include "verifier_array_access.skel.h"
#include "verifier_async_cb_context.skel.h"
#include "verifier_basic_stack.skel.h"
@@ -28,9 +30,11 @@
#include "verifier_ctx.skel.h"
#include "verifier_ctx_sk_msg.skel.h"
#include "verifier_d_path.skel.h"
+#include "verifier_default_trusted_ptr.skel.h"
#include "verifier_direct_packet_access.skel.h"
#include "verifier_direct_stack_access_wraparound.skel.h"
#include "verifier_div0.skel.h"
+#include "verifier_div_mod_bounds.skel.h"
#include "verifier_div_overflow.skel.h"
#include "verifier_global_subprogs.skel.h"
#include "verifier_global_ptr_args.skel.h"
@@ -108,6 +112,7 @@
#include "verifier_xdp_direct_packet_access.skel.h"
#include "verifier_bits_iter.skel.h"
#include "verifier_lsm.skel.h"
+#include "verifier_jit_inline.skel.h"
#include "irq.skel.h"
#define MAX_ENTRIES 11
@@ -147,6 +152,8 @@ static void run_tests_aux(const char *skel_name,
void test_verifier_and(void) { RUN(verifier_and); }
void test_verifier_arena(void) { RUN(verifier_arena); }
void test_verifier_arena_large(void) { RUN(verifier_arena_large); }
+void test_verifier_arena_globals1(void) { RUN(verifier_arena_globals1); }
+void test_verifier_arena_globals2(void) { RUN(verifier_arena_globals2); }
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
void test_verifier_bounds(void) { RUN(verifier_bounds); }
@@ -167,9 +174,11 @@ void test_verifier_const_or(void) { RUN(verifier_const_or); }
void test_verifier_ctx(void) { RUN(verifier_ctx); }
void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); }
void test_verifier_d_path(void) { RUN(verifier_d_path); }
+void test_verifier_default_trusted_ptr(void) { RUN_TESTS(verifier_default_trusted_ptr); }
void test_verifier_direct_packet_access(void) { RUN(verifier_direct_packet_access); }
void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); }
void test_verifier_div0(void) { RUN(verifier_div0); }
+void test_verifier_div_mod_bounds(void) { RUN(verifier_div_mod_bounds); }
void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); }
void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); }
void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); }
@@ -247,6 +256,7 @@ void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
void test_verifier_lsm(void) { RUN(verifier_lsm); }
void test_irq(void) { RUN(irq); }
void test_verifier_mtu(void) { RUN(verifier_mtu); }
+void test_verifier_jit_inline(void) { RUN(verifier_jit_inline); }
static int init_test_val_map(struct bpf_object *obj, char *map_name)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c
index 15c67d23128b..84831eecc935 100644
--- a/tools/testing/selftests/bpf/prog_tests/wq.c
+++ b/tools/testing/selftests/bpf/prog_tests/wq.c
@@ -16,12 +16,12 @@ void serial_test_wq(void)
/* re-run the success test to check if the timer was actually executed */
wq_skel = wq__open_and_load();
- if (!ASSERT_OK_PTR(wq_skel, "wq_skel_load"))
+ if (!ASSERT_OK_PTR(wq_skel, "wq__open_and_load"))
return;
err = wq__attach(wq_skel);
if (!ASSERT_OK(err, "wq_attach"))
- return;
+ goto clean_up;
prog_fd = bpf_program__fd(wq_skel->progs.test_syscall_array_sleepable);
err = bpf_prog_test_run_opts(prog_fd, &topts);
@@ -31,6 +31,7 @@ void serial_test_wq(void)
usleep(50); /* 10 usecs should be enough, but give it extra */
ASSERT_EQ(wq_skel->bss->ok_sleepable, (1 << 1), "ok_sleepable");
+clean_up:
wq__destroy(wq_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index df27535995af..ad56e4370ce3 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -18,7 +18,7 @@ static void test_xdp_with_cpumap_helpers(void)
struct bpf_cpumap_val val = {
.qsize = 192,
};
- int err, prog_fd, prog_redir_fd, map_fd;
+ int err, prog_fd, prog_redir_fd, map_fd, bad_fd;
struct nstoken *nstoken = NULL;
__u32 idx = 0;
@@ -79,7 +79,22 @@ static void test_xdp_with_cpumap_helpers(void)
val.qsize = 192;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
+ ASSERT_EQ(err, -EINVAL, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
+
+ /* Try to attach non-BPF file descriptor */
+ bad_fd = open("/dev/null", O_RDONLY);
+ ASSERT_GE(bad_fd, 0, "Open /dev/null for non-BPF fd");
+
+ val.bpf_prog.fd = bad_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_EQ(err, -EINVAL, "Add non-BPF fd to cpumap entry");
+
+ /* Try to attach nonexistent file descriptor */
+ err = close(bad_fd);
+ ASSERT_EQ(err, 0, "Close non-BPF fd for nonexistent fd");
+
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_EQ(err, -EBADF, "Add nonexistent fd to cpumap entry");
/* Try to attach BPF_XDP program with frags to cpumap when we have
* already loaded a BPF_XDP program on the map
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
index efa350d04ec5..910dabe95afd 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
@@ -114,12 +114,14 @@ static void test_xdp_pull_data_basic(void)
{
u32 pg_sz, max_meta_len, max_data_len;
struct test_xdp_pull_data *skel;
+ int buff_len;
skel = test_xdp_pull_data__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load"))
return;
pg_sz = sysconf(_SC_PAGE_SIZE);
+ buff_len = pg_sz + pg_sz / 2;
if (find_xdp_sizes(skel, pg_sz))
goto out;
@@ -140,13 +142,13 @@ static void test_xdp_pull_data_basic(void)
run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025);
/* multi-buf pkt, empty linear data area, pull requires memmove */
- run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX);
+ run_test(skel, XDP_PASS, pg_sz, buff_len, 0, 0, PULL_MAX);
/* multi-buf pkt, no headroom */
- run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX);
+ run_test(skel, XDP_PASS, pg_sz, buff_len, max_meta_len, 1024, PULL_MAX);
/* multi-buf pkt, no tailroom, pull requires memmove */
- run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX);
+ run_test(skel, XDP_PASS, pg_sz, buff_len, 0, max_data_len, PULL_MAX);
/* Test cases with invalid pull length */
@@ -154,18 +156,18 @@ static void test_xdp_pull_data_basic(void)
run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049);
/* multi-buf pkt with no space left in linear data area */
- run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len,
+ run_test(skel, XDP_DROP, pg_sz, buff_len, max_meta_len, max_data_len,
PULL_MAX | PULL_PLUS_ONE);
/* multi-buf pkt, empty linear data area */
- run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE);
+ run_test(skel, XDP_DROP, pg_sz, buff_len, 0, 0, PULL_MAX | PULL_PLUS_ONE);
/* multi-buf pkt, no headroom */
- run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024,
+ run_test(skel, XDP_DROP, pg_sz, buff_len, max_meta_len, 1024,
PULL_MAX | PULL_PLUS_ONE);
/* multi-buf pkt, no tailroom */
- run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len,
+ run_test(skel, XDP_DROP, pg_sz, buff_len, 0, max_data_len,
PULL_MAX | PULL_PLUS_ONE);
out:
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
index 3a2ddcacbea6..235d8cc95bdd 100644
--- a/tools/testing/selftests/bpf/progs/arena_list.c
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -30,6 +30,7 @@ struct arena_list_head __arena *list_head;
int list_sum;
int cnt;
bool skip = false;
+const volatile bool nonsleepable = false;
#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
long __arena arena_sum;
@@ -42,6 +43,9 @@ int test_val SEC(".addr_space.1");
int zero;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
SEC("syscall")
int arena_list_add(void *ctx)
{
@@ -71,6 +75,10 @@ int arena_list_del(void *ctx)
struct elem __arena *n;
int sum = 0;
+ /* Take rcu_read_lock to test non-sleepable context */
+ if (nonsleepable)
+ bpf_rcu_read_lock();
+
arena_sum = 0;
list_for_each_entry(n, list_head, node) {
sum += n->value;
@@ -79,6 +87,9 @@ int arena_list_del(void *ctx)
bpf_free(n);
}
list_sum = sum;
+
+ if (nonsleepable)
+ bpf_rcu_read_unlock();
#else
skip = true;
#endif
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
index ff189a736ad8..8fc38592a87b 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -62,9 +62,9 @@ static int create_attach_counter(__u64 cg_id, __u64 state, __u64 pending)
&init, BPF_NOEXIST);
}
-SEC("fentry/cgroup_attach_task")
-int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
- bool threadgroup)
+SEC("tp_btf/cgroup_attach_task")
+int BPF_PROG(counter, struct cgroup *dst_cgrp, const char *path,
+ struct task_struct *task, bool threadgroup)
{
__u64 cg_id = cgroup_id(dst_cgrp);
struct percpu_attach_counter *pcpu_counter = bpf_map_lookup_elem(
diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
new file mode 100644
index 000000000000..59fb70a3cc50
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include "cgroup_iter_memcg.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* The latest values read are stored here. */
+struct memcg_query memcg_query SEC(".data.query");
+
+SEC("iter.s/cgroup")
+int cgroup_memcg_query(struct bpf_iter__cgroup *ctx)
+{
+ struct cgroup *cgrp = ctx->cgroup;
+ struct cgroup_subsys_state *css;
+ struct mem_cgroup *memcg;
+
+ if (!cgrp)
+ return 1;
+
+ css = &cgrp->self;
+ memcg = bpf_get_mem_cgroup(css);
+ if (!memcg)
+ return 1;
+
+ bpf_mem_cgroup_flush_stats(memcg);
+
+ memcg_query.nr_anon_mapped = bpf_mem_cgroup_page_state(memcg, NR_ANON_MAPPED);
+ memcg_query.nr_shmem = bpf_mem_cgroup_page_state(memcg, NR_SHMEM);
+ memcg_query.nr_file_pages = bpf_mem_cgroup_page_state(memcg, NR_FILE_PAGES);
+ memcg_query.nr_file_mapped = bpf_mem_cgroup_page_state(memcg, NR_FILE_MAPPED);
+ memcg_query.memcg_kmem = bpf_mem_cgroup_page_state(memcg, MEMCG_KMEM);
+ memcg_query.pgfault = bpf_mem_cgroup_vm_events(memcg, PGFAULT);
+
+ bpf_put_mem_cgroup(memcg);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c
index 6884ab99a421..f05e120f3450 100644
--- a/tools/testing/selftests/bpf/progs/compute_live_registers.c
+++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c
@@ -431,6 +431,47 @@ __naked void subprog1(void)
::: __clobber_all);
}
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+
+SEC("socket")
+__log_level(2)
+__msg("2: .1........ (07) r1 += 8")
+__msg("3: .1........ (79) r2 = *(u64 *)(r1 +0)")
+__msg("4: ..2....... (b7) r3 = 1")
+__msg("5: ..23...... (b7) r4 = 2")
+__msg("6: ..234..... (0d) gotox r2")
+__msg("7: ...3...... (bf) r0 = r3")
+__msg("8: 0......... (95) exit")
+__msg("9: ....4..... (bf) r0 = r4")
+__msg("10: 0......... (95) exit")
+__naked
+void gotox(void)
+{
+ asm volatile (
+ ".pushsection .jumptables,\"\",@progbits;"
+"jt0_%=: .quad l0_%= - socket;"
+ ".quad l1_%= - socket;"
+ ".size jt0_%=, 16;"
+ ".global jt0_%=;"
+ ".popsection;"
+
+ "r1 = jt0_%= ll;"
+ "r1 += 8;"
+ "r2 = *(u64 *)(r1 + 0);"
+ "r3 = 1;"
+ "r4 = 2;"
+ ".8byte %[gotox_r2];"
+"l0_%=: r0 = r3;"
+ "exit;"
+"l1_%=: r0 = r4;"
+ "exit;"
+ :
+ : __imm_insn(gotox_r2, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_2, BPF_REG_0, 0, 0))
+ : __clobber_all);
+}
+
+#endif /* __TARGET_ARCH_x86 || __TARGET_ARCH_arm64 */
+
/* to retain debug info for BTF generation */
void kfunc_root(void)
{
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index 8a2fd596c8a3..61c32e91e8c3 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -110,7 +110,7 @@ SEC("tp_btf/task_newtask")
__failure __msg("NULL pointer passed to trusted arg0")
int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
{
- /* NULL passed to KF_TRUSTED_ARGS kfunc. */
+ /* NULL passed to kfunc. */
bpf_cpumask_empty(NULL);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index dda6a8dada82..8f2ae9640886 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -1465,7 +1465,7 @@ int xdp_invalid_data_slice2(struct xdp_md *xdp)
}
/* Only supported prog type can create skb-type dynptrs */
-SEC("?raw_tp")
+SEC("?xdp")
__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed")
int skb_invalid_ctx(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c
index 4d756b623557..462712ff3b8a 100644
--- a/tools/testing/selftests/bpf/progs/file_reader.c
+++ b/tools/testing/selftests/bpf/progs/file_reader.c
@@ -77,7 +77,7 @@ int on_open_validate_file_read(void *c)
err = 1;
return 0;
}
- bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, task_work_callback, NULL);
+ bpf_task_work_schedule_signal(task, &work->tw, &arrmap, task_work_callback);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/free_timer.c b/tools/testing/selftests/bpf/progs/free_timer.c
index 4501ae8fc414..eccb2d47db43 100644
--- a/tools/testing/selftests/bpf/progs/free_timer.c
+++ b/tools/testing/selftests/bpf/progs/free_timer.c
@@ -7,6 +7,16 @@
#define MAX_ENTRIES 8
+/* clang considers 'sum += 1' as usage but 'sum++' as non-usage. GCC
+ * is more consistent and considers both 'sum += 1' and 'sum++' as
+ * non-usage. This triggers warnings in the functions below.
+ *
+ * Starting with GCC 16 -Wunused-but-set-variable=2 can be used to
+ * mimic clang's behavior. */
+#if !defined(__clang__) && __GNUC__ > 15
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+
struct map_value {
struct bpf_timer timer;
};
diff --git a/tools/testing/selftests/bpf/progs/fsession_test.c b/tools/testing/selftests/bpf/progs/fsession_test.c
new file mode 100644
index 000000000000..86e8a2fe467e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fsession_test.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 ChinaTelecom */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_entry_result = 0;
+__u64 test1_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test1, int a, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ test1_entry_result = a == 1 && ret == 0;
+ return 0;
+ }
+
+ test1_exit_result = a == 1 && ret == 2;
+ return 0;
+}
+
+__u64 test2_entry_result = 0;
+__u64 test2_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test3")
+int BPF_PROG(test2, char a, int b, __u64 c, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ test2_entry_result = a == 4 && b == 5 && c == 6 && ret == 0;
+ return 0;
+ }
+
+ test2_exit_result = a == 4 && b == 5 && c == 6 && ret == 15;
+ return 0;
+}
+
+__u64 test3_entry_result = 0;
+__u64 test3_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test4")
+int BPF_PROG(test3, void *a, char b, int c, __u64 d, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ test3_entry_result = a == (void *)7 && b == 8 && c == 9 && d == 10 && ret == 0;
+ return 0;
+ }
+
+ test3_exit_result = a == (void *)7 && b == 8 && c == 9 && d == 10 && ret == 34;
+ return 0;
+}
+
+__u64 test4_entry_result = 0;
+__u64 test4_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test5")
+int BPF_PROG(test4, __u64 a, void *b, short c, int d, __u64 e, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ test4_entry_result = a == 11 && b == (void *)12 && c == 13 && d == 14 &&
+ e == 15 && ret == 0;
+ return 0;
+ }
+
+ test4_exit_result = a == 11 && b == (void *)12 && c == 13 && d == 14 &&
+ e == 15 && ret == 65;
+ return 0;
+}
+
+__u64 test5_entry_result = 0;
+__u64 test5_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test7")
+int BPF_PROG(test5, struct bpf_fentry_test_t *arg, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ if (!arg)
+ test5_entry_result = ret == 0;
+ return 0;
+ }
+
+ if (!arg)
+ test5_exit_result = 1;
+ return 0;
+}
+
+__u64 test6_entry_result = 0;
+__u64 test6_exit_result = 0;
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test6, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ if (bpf_session_is_return(ctx))
+ test6_exit_result = (const void *) addr == &bpf_fentry_test1;
+ else
+ test6_entry_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
+
+__u64 test7_entry_ok = 0;
+__u64 test7_exit_ok = 0;
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test7, int a)
+{
+ volatile __u64 *cookie = bpf_session_cookie(ctx);
+
+ if (!bpf_session_is_return(ctx)) {
+ *cookie = 0xAAAABBBBCCCCDDDDull;
+ test7_entry_ok = *cookie == 0xAAAABBBBCCCCDDDDull;
+ return 0;
+ }
+
+ test7_exit_ok = *cookie == 0xAAAABBBBCCCCDDDDull;
+ return 0;
+}
+
+__u64 test8_entry_ok = 0;
+__u64 test8_exit_ok = 0;
+
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test8, int a)
+{
+ volatile __u64 *cookie = bpf_session_cookie(ctx);
+
+ if (!bpf_session_is_return(ctx)) {
+ *cookie = 0x1111222233334444ull;
+ test8_entry_ok = *cookie == 0x1111222233334444ull;
+ return 0;
+ }
+
+ test8_exit_ok = *cookie == 0x1111222233334444ull;
+ return 0;
+}
+
+__u64 test9_entry_result = 0;
+__u64 test9_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test9, int a, int ret)
+{
+ __u64 *cookie = bpf_session_cookie(ctx);
+
+ if (!bpf_session_is_return(ctx)) {
+ test9_entry_result = a == 1 && ret == 0;
+ *cookie = 0x123456ULL;
+ return 0;
+ }
+
+ test9_exit_result = a == 1 && ret == 2 && *cookie == 0x123456ULL;
+ return 0;
+}
+
+__u64 test10_result = 0;
+SEC("fexit/bpf_fentry_test1")
+int BPF_PROG(test10, int a, int ret)
+{
+ test10_result = a == 1 && ret == 2;
+ return 0;
+}
+
+__u64 test11_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test11, int a)
+{
+ test11_result = a == 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_args_test.c b/tools/testing/selftests/bpf/progs/get_func_args_test.c
index e0f34a55e697..180ba5098ca1 100644
--- a/tools/testing/selftests/bpf/progs/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_args_test.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <errno.h>
@@ -121,3 +121,85 @@ int BPF_PROG(fexit_test, int _a, int *_b, int _ret)
test4_result &= err == 0 && ret == 1234;
return 0;
}
+
+__u64 test5_result = 0;
+SEC("tp_btf/bpf_testmod_fentry_test1_tp")
+int BPF_PROG(tp_test1)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, z = 0;
+ __s64 err;
+
+ test5_result = cnt == 1;
+
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test5_result &= err == 0 && ((int) a == 1);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 1, &z);
+ test5_result &= err == -EINVAL;
+
+ return 0;
+}
+
+__u64 test6_result = 0;
+SEC("tp_btf/bpf_testmod_fentry_test2_tp")
+int BPF_PROG(tp_test2)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0;
+ __s64 err;
+
+ test6_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test6_result &= err == 0 && (int) a == 2;
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test6_result &= err == 0 && b == 3;
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test6_result &= err == -EINVAL;
+
+ return 0;
+}
+
+__u64 test7_result = 0;
+#if defined(bpf_target_x86) || defined(bpf_target_arm64)
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test7)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test7_result = cnt == 1;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test7_result &= err == 0 && ((int) a == 1);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 1, &z);
+ test7_result &= err == -EINVAL;
+
+ if (bpf_session_is_return(ctx)) {
+ err = bpf_get_func_ret(ctx, &ret);
+ test7_result &= err == 0 && ret == 2;
+ } else {
+ err = bpf_get_func_ret(ctx, &ret);
+ test7_result &= err == 0 && ret == 0;
+ }
+
+ return 0;
+}
+#else
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test7)
+{
+ test7_result = 1;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
index 2011cacdeb18..43ff836a8ed8 100644
--- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -103,3 +103,26 @@ int BPF_URETPROBE(test8, int ret)
test8_result = (const void *) addr == (const void *) uprobe_trigger;
return 0;
}
+
+__u64 test9_entry_result = 0;
+__u64 test9_exit_result = 0;
+#if defined(bpf_target_x86) || defined(bpf_target_arm64)
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test9, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ if (bpf_session_is_return(ctx))
+ test9_exit_result = (const void *) addr == &bpf_fentry_test1;
+ else
+ test9_entry_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
+#else
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test9, int a)
+{
+ test9_entry_result = test9_exit_result = 1;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 7dd92a303bf6..7f27b517d5d5 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1926,4 +1926,144 @@ static int loop1_wrapper(void)
);
}
+/*
+ * This is similar to a test case absent_mark_in_the_middle_state(),
+ * but adapted for use with bpf_loop().
+ */
+SEC("raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("math between fp pointer and register with unbounded min value is not allowed")
+__naked void absent_mark_in_the_middle_state4(void)
+{
+ /*
+ * Equivalent to a C program below:
+ *
+ * int main(void) {
+ * fp[-8] = bpf_get_prandom_u32();
+ * fp[-16] = -32; // used in a memory access below
+ * bpf_loop(7, loop_cb4, fp, 0);
+ * return 0;
+ * }
+ *
+ * int loop_cb4(int i, void *ctx) {
+ * if (unlikely(ctx[-8] > bpf_get_prandom_u32()))
+ * *(u64 *)(fp + ctx[-16]) = 42; // aligned access expected
+ * if (unlikely(fp[-8] > bpf_get_prandom_u32()))
+ * ctx[-16] = -31; // makes said access unaligned
+ * return 0;
+ * }
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "*(u64 *)(r10 - 8) = r0;"
+ "*(u64 *)(r10 - 16) = -32;"
+ "r1 = 7;"
+ "r2 = loop_cb4 ll;"
+ "r3 = r10;"
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_loop),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static void loop_cb4(void)
+{
+ asm volatile (
+ "r9 = r2;"
+ "r8 = *(u64 *)(r9 - 8);"
+ "r6 = *(u64 *)(r9 - 16);"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 > r8 goto use_fp16_%=;"
+ "1:"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 > r8 goto update_fp16_%=;"
+ "2:"
+ "r0 = 0;"
+ "exit;"
+ "use_fp16_%=:"
+ "r1 = r10;"
+ "r1 += r6;"
+ "*(u64 *)(r1 + 0) = 42;"
+ "goto 1b;"
+ "update_fp16_%=:"
+ "*(u64 *)(r9 - 16) = -31;"
+ "goto 2b;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ );
+}
+
+SEC("raw_tp")
+__success
+__naked int stack_misc_vs_scalar_in_a_loop(void)
+{
+ asm volatile(
+ "*(u8 *)(r10 - 15) = 1;" /* This marks stack slot fp[-16] as STACK_MISC. */
+ "*(u8 *)(r10 - 23) = 1;"
+ "*(u8 *)(r10 - 31) = 1;"
+ "*(u8 *)(r10 - 39) = 1;"
+ "*(u8 *)(r10 - 47) = 1;"
+ "*(u8 *)(r10 - 55) = 1;"
+ "*(u8 *)(r10 - 63) = 1;"
+ "*(u8 *)(r10 - 71) = 1;"
+ "*(u8 *)(r10 - 79) = 1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+
+#define maybe_change_stack_slot(off) \
+ "call %[bpf_get_prandom_u32];" \
+ "if r0 == 42 goto +1;" \
+ "goto +1;" \
+ "*(u64 *)(r10 " #off ") = r0;"
+
+ /*
+ * When comparing verifier states fp[-16] will be
+ * either STACK_MISC or SCALAR. Pruning logic should
+ * consider old STACK_MISC equivalent to current SCALAR
+ * to avoid states explosion.
+ */
+ maybe_change_stack_slot(-16)
+ maybe_change_stack_slot(-24)
+ maybe_change_stack_slot(-32)
+ maybe_change_stack_slot(-40)
+ maybe_change_stack_slot(-48)
+ maybe_change_stack_slot(-56)
+ maybe_change_stack_slot(-64)
+ maybe_change_stack_slot(-72)
+ maybe_change_stack_slot(-80)
+
+#undef maybe_change_stack_slot
+
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm_addr(amap)
+ : __clobber_all
+ );
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_css.c b/tools/testing/selftests/bpf/progs/iters_css.c
index ec1f6c2f590b..5a1d87d186a9 100644
--- a/tools/testing/selftests/bpf/progs/iters_css.c
+++ b/tools/testing/selftests/bpf/progs/iters_css.c
@@ -12,8 +12,7 @@ char _license[] SEC("license") = "GPL";
pid_t target_pid;
u64 root_cg_id, leaf_cg_id;
u64 first_cg_id, last_cg_id;
-
-int pre_order_cnt, post_order_cnt, tree_high;
+int pre_order_cnt, post_order_cnt, children_cnt, tree_high;
struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
void bpf_cgroup_release(struct cgroup *p) __ksym;
@@ -43,7 +42,7 @@ int iter_css_for_each(const void *ctx)
}
root_css = &root_cgrp->self;
leaf_css = &leaf_cgrp->self;
- pre_order_cnt = post_order_cnt = tree_high = 0;
+ pre_order_cnt = post_order_cnt = children_cnt = tree_high = 0;
first_cg_id = last_cg_id = 0;
bpf_rcu_read_lock();
@@ -60,6 +59,10 @@ int iter_css_for_each(const void *ctx)
first_cg_id = cur_cgrp->kn->id;
}
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_CHILDREN) {
+ children_cnt++;
+ }
+
bpf_for_each(css, pos, leaf_css, BPF_CGROUP_ITER_ANCESTORS_UP)
tree_high++;
diff --git a/tools/testing/selftests/bpf/progs/kfunc_implicit_args.c b/tools/testing/selftests/bpf/progs/kfunc_implicit_args.c
new file mode 100644
index 000000000000..89b6a47e22dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_implicit_args.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+extern int bpf_kfunc_implicit_arg(int a) __weak __ksym;
+extern int bpf_kfunc_implicit_arg_impl(int a, struct bpf_prog_aux *aux) __weak __ksym; /* illegal */
+extern int bpf_kfunc_implicit_arg_legacy(int a, int b) __weak __ksym;
+extern int bpf_kfunc_implicit_arg_legacy_impl(int a, int b, struct bpf_prog_aux *aux) __weak __ksym;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("syscall")
+__retval(5)
+int test_kfunc_implicit_arg(void *ctx)
+{
+ return bpf_kfunc_implicit_arg(5);
+}
+
+SEC("syscall")
+__failure __msg("cannot find address for kernel function bpf_kfunc_implicit_arg_impl")
+int test_kfunc_implicit_arg_impl_illegal(void *ctx)
+{
+ return bpf_kfunc_implicit_arg_impl(5, NULL);
+}
+
+SEC("syscall")
+__retval(7)
+int test_kfunc_implicit_arg_legacy(void *ctx)
+{
+ return bpf_kfunc_implicit_arg_legacy(3, 4);
+}
+
+SEC("syscall")
+__retval(11)
+int test_kfunc_implicit_arg_legacy_impl(void *ctx)
+{
+ return bpf_kfunc_implicit_arg_legacy_impl(5, 6, NULL);
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
index 28f8487c9059..14f39fa6d515 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
@@ -5,9 +5,24 @@
char _license[] SEC("license") = "GPL";
+int pid = 0;
+
SEC("kprobe.multi")
int test_override(struct pt_regs *ctx)
{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ bpf_override_return(ctx, 123);
+ return 0;
+}
+
+SEC("kprobe")
+int test_kprobe_override(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
bpf_override_return(ctx, 123);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
index 0835b5edf685..ad627016e3e5 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
-#include "bpf_kfuncs.h"
char _license[] SEC("license") = "GPL";
@@ -23,16 +22,16 @@ int BPF_PROG(trigger)
return 0;
}
-static int check_cookie(__u64 val, __u64 *result)
+static int check_cookie(struct pt_regs *ctx, __u64 val, __u64 *result)
{
__u64 *cookie;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 1;
- cookie = bpf_session_cookie();
+ cookie = bpf_session_cookie(ctx);
- if (bpf_session_is_return())
+ if (bpf_session_is_return(ctx))
*result = *cookie == val ? val : 0;
else
*cookie = val;
@@ -42,17 +41,17 @@ static int check_cookie(__u64 val, __u64 *result)
SEC("kprobe.session/bpf_fentry_test1")
int test_kprobe_1(struct pt_regs *ctx)
{
- return check_cookie(1, &test_kprobe_1_result);
+ return check_cookie(ctx, 1, &test_kprobe_1_result);
}
SEC("kprobe.session/bpf_fentry_test1")
int test_kprobe_2(struct pt_regs *ctx)
{
- return check_cookie(2, &test_kprobe_2_result);
+ return check_cookie(ctx, 2, &test_kprobe_2_result);
}
SEC("kprobe.session/bpf_fentry_test1")
int test_kprobe_3(struct pt_regs *ctx)
{
- return check_cookie(3, &test_kprobe_3_result);
+ return check_cookie(ctx, 3, &test_kprobe_3_result);
}
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 637e75df2e14..d0be77011a84 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -62,7 +62,6 @@ SEC("lsm/inode_unlink")
int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
- struct bpf_local_storage *local_storage;
struct local_storage *storage;
struct task_struct *task;
bool is_self_unlink;
@@ -88,15 +87,10 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
if (!storage || storage->value)
return 0;
- if (bpf_task_storage_delete(&task_storage_map, task))
+ if (bpf_task_storage_delete(&task_storage_map2, task))
return 0;
- /* Ensure that the task_storage_map is disconnected from the storage.
- * The storage memory should not be freed back to the
- * bpf_mem_alloc.
- */
- local_storage = task->bpf_storage;
- if (!local_storage || local_storage->smap)
+ if (bpf_task_storage_delete(&task_storage_map, task))
return 0;
task_storage_result = 0;
@@ -164,18 +158,9 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
if (bpf_sk_storage_delete(&sk_storage_map2, sk))
return 0;
- storage = bpf_sk_storage_get(&sk_storage_map2, sk, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (!storage)
- return 0;
-
if (bpf_sk_storage_delete(&sk_storage_map, sk))
return 0;
- /* Ensure that the sk_storage_map is disconnected from the storage. */
- if (!sk->sk_bpf_storage || sk->sk_bpf_storage->smap)
- return 0;
-
sk_storage_result = 0;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index edaba481db9d..e708ffbe1f61 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -487,6 +487,24 @@ int test_map_kptr_ref3(struct __sk_buff *ctx)
return 0;
}
+int num_of_refs;
+
+SEC("syscall")
+int count_ref(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long arg = 0;
+
+ p = bpf_kfunc_call_test_acquire(&arg);
+ if (!p)
+ return 1;
+
+ num_of_refs = p->cnt.refs.counter;
+
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
SEC("syscall")
int test_ls_map_kptr_ref1(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 4c0ff01f1a96..6443b320c732 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -272,7 +272,7 @@ int reject_untrusted_xchg(struct __sk_buff *ctx)
SEC("?tc")
__failure
-__msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member")
+__msg("invalid kptr access, R2 type=trusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test_member")
int reject_bad_type_xchg(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *ref_ptr;
@@ -291,7 +291,7 @@ int reject_bad_type_xchg(struct __sk_buff *ctx)
}
SEC("?tc")
-__failure __msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc")
+__failure __msg("invalid kptr access, R2 type=trusted_ptr_prog_test_ref_kfunc")
int reject_member_of_ref_xchg(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *ref_ptr;
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
index 37c2d2608ec0..ed6a2a93d5a5 100644
--- a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
@@ -187,4 +187,36 @@ out:
return 0;
}
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, u32);
+} percpu SEC(".maps");
+
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(test_percpu_array, int x)
+{
+ u64 value = 0xDEADC0DE;
+ int key = 0;
+
+ bpf_map_update_elem(&percpu, &key, &value, BPF_ANY);
+ return 0;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, u32);
+} percpu_cgroup_storage SEC(".maps");
+
+SEC("cgroup_skb/egress")
+int cgroup_egress(struct __sk_buff *skb)
+{
+ u32 *val = bpf_get_local_storage(&percpu_cgroup_storage, 0);
+
+ *val = 1;
+ return 1;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index 4acb6af2dfe3..70b7baf9304b 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -153,7 +153,7 @@ long rbtree_api_add_to_multiple_trees(void *ctx)
}
SEC("?tc")
-__failure __msg("dereference of modified ptr_or_null_ ptr R2 off=16 disallowed")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
long rbtree_api_use_unchecked_remove_retval(void *ctx)
{
struct bpf_rb_node *res;
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index d70c28824bbe..b4e073168fb1 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -7,6 +7,16 @@
#include "bpf_tracing_net.h"
#include "bpf_misc.h"
+/* clang considers 'sum += 1' as usage but 'sum++' as non-usage. GCC
+ * is more consistent and considers both 'sum += 1' and 'sum++' as
+ * non-usage. This triggers warnings in the functions below.
+ *
+ * Starting with GCC 16 -Wunused-but-set-variable=2 can be used to
+ * mimic clang's behavior. */
+#if !defined(__clang__) && __GNUC__ > 15
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+
char _license[] SEC("license") = "GPL";
struct {
diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
deleted file mode 100644
index 69da05bb6c63..000000000000
--- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-extern bool CONFIG_PREEMPTION __kconfig __weak;
-extern const int bpf_task_storage_busy __ksym;
-
-char _license[] SEC("license") = "GPL";
-
-int pid = 0;
-int busy = 0;
-
-struct {
- __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
- __uint(map_flags, BPF_F_NO_PREALLOC);
- __type(key, int);
- __type(value, long);
-} task SEC(".maps");
-
-SEC("raw_tp/sys_enter")
-int BPF_PROG(read_bpf_task_storage_busy)
-{
- int *value;
-
- if (!CONFIG_PREEMPTION)
- return 0;
-
- if (bpf_get_current_pid_tgid() >> 32 != pid)
- return 0;
-
- value = bpf_this_cpu_ptr(&bpf_task_storage_busy);
- if (value)
- busy = *value;
-
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
index 46d6eb2a3b17..c8f4815c8dfb 100644
--- a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
+++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
@@ -6,7 +6,6 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
-void *local_storage_ptr = NULL;
void *sk_ptr = NULL;
int cookie_found = 0;
__u64 cookie = 0;
@@ -19,21 +18,17 @@ struct {
__type(value, int);
} sk_storage SEC(".maps");
-SEC("fexit/bpf_local_storage_destroy")
-int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
+SEC("fexit/bpf_sk_storage_free")
+int BPF_PROG(bpf_sk_storage_free, struct sock *sk)
{
- struct sock *sk;
-
- if (local_storage_ptr != local_storage)
+ if (sk_ptr != sk)
return 0;
- sk = bpf_core_cast(sk_ptr, struct sock);
if (sk->sk_cookie.counter != cookie)
return 0;
cookie_found++;
omem = sk->sk_omem_alloc.counter;
- local_storage_ptr = NULL;
return 0;
}
@@ -50,7 +45,6 @@ int BPF_PROG(inet6_sock_destruct, struct sock *sk)
if (value && *value == 0xdeadbeef) {
cookie_found++;
sk_ptr = sk;
- local_storage_ptr = sk->sk_bpf_storage;
}
return 0;
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
index a96c8150d7f5..6830f2978613 100644
--- a/tools/testing/selftests/bpf/progs/stacktrace_ips.c
+++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
@@ -31,6 +31,13 @@ int unused(void)
__u32 stack_key;
+SEC("kprobe")
+int kprobe_test(struct pt_regs *ctx)
+{
+ stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+ return 0;
+}
+
SEC("kprobe.multi")
int kprobe_multi_test(struct pt_regs *ctx)
{
@@ -46,4 +53,24 @@ int rawtp_test(void *ctx)
return 0;
}
+SEC("fentry/bpf_testmod_stacktrace_test")
+int fentry_test(struct pt_regs *ctx)
+{
+ /*
+ * Skip 2 bpf_program/trampoline stack entries:
+ * - bpf_prog_bd1f7a949f55fb03_fentry_test
+ * - bpf_trampoline_182536277701
+ */
+ stack_key = bpf_get_stackid(ctx, &stackmap, 2);
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_stacktrace_test")
+int fexit_test(struct pt_regs *ctx)
+{
+ /* Skip 2 bpf_program/trampoline stack entries, check fentry_test. */
+ stack_key = bpf_get_stackid(ctx, &stackmap, 2);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c
index 4a5bd852f10c..6f999ba951a3 100644
--- a/tools/testing/selftests/bpf/progs/stream.c
+++ b/tools/testing/selftests/bpf/progs/stream.c
@@ -42,6 +42,10 @@ int size;
u64 fault_addr;
void *arena_ptr;
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+private(STREAM) struct bpf_spin_lock block;
+
SEC("syscall")
__success __retval(0)
int stream_exhaust(void *ctx)
@@ -234,4 +238,53 @@ int stream_arena_callback_fault(void *ctx)
return 0;
}
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_print_stack_kfunc(void *ctx)
+{
+ return bpf_stream_print_stack(BPF_STDERR);
+}
+
+SEC("syscall")
+__success __retval(-2)
+int stream_print_stack_invalid_id(void *ctx)
+{
+ /* Try to pass an invalid stream ID. */
+ return bpf_stream_print_stack((enum bpf_stream_id)0xbadcafe);
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stdout(_STR)
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_print_kfuncs_locked(void *ctx)
+{
+ int ret;
+
+ bpf_spin_lock(&block);
+
+ ret = bpf_stream_printk(BPF_STDOUT, _STR);
+ if (ret)
+ goto out;
+
+ ret = bpf_stream_print_stack(BPF_STDERR);
+
+out:
+ bpf_spin_unlock(&block);
+
+ return ret;
+}
+
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c
index 3662515f0107..8e8249f3521c 100644
--- a/tools/testing/selftests/bpf/progs/stream_fail.c
+++ b/tools/testing/selftests/bpf/progs/stream_fail.c
@@ -10,7 +10,7 @@ SEC("syscall")
__failure __msg("Possibly NULL pointer passed")
int stream_vprintk_null_arg(void *ctx)
{
- bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL);
+ bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0);
return 0;
}
@@ -18,7 +18,7 @@ SEC("syscall")
__failure __msg("R3 type=scalar expected=")
int stream_vprintk_scalar_arg(void *ctx)
{
- bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL);
+ bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0);
return 0;
}
@@ -26,7 +26,7 @@ SEC("syscall")
__failure __msg("arg#1 doesn't point to a const string")
int stream_vprintk_string_arg(void *ctx)
{
- bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL);
+ bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
index 826e6b6aff7e..bddc4e8579d2 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -33,6 +33,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return
SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strncasecmp_null1(void *ctx) { return bpf_strncasecmp(NULL, "HELLO", 5); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strncasecmp_null2(void *ctx) { return bpf_strncasecmp("HELLO", NULL, 5); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); }
@@ -57,6 +59,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { ret
SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasecmp_user_ptr1(void *ctx) { return bpf_strncasecmp(user_ptr, "HELLO", 5); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasecmp_user_ptr2(void *ctx) { return bpf_strncasecmp("HELLO", user_ptr, 5); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); }
@@ -83,6 +87,8 @@ SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return
SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); }
SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); }
SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strncasecmp_pagefault1(void *ctx) { return bpf_strncasecmp(invalid_kern_ptr, "HELLO", 5); }
+SEC("syscall") __retval(-EFAULT) int test_strncasecmp_pagefault2(void *ctx) { return bpf_strncasecmp("HELLO", invalid_kern_ptr, 5); }
SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); }
SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); }
SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); }
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
index 05e1da1f250f..412c53b87b18 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -8,6 +8,7 @@ char long_str[XATTR_SIZE_MAX + 1];
SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); }
SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); }
+SEC("syscall") int test_strncasecmp_too_long(void *ctx) { return bpf_strncasecmp(long_str, long_str, sizeof(long_str)); }
SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); }
SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); }
SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); }
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
index a8513964516b..f65b1226a81a 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -17,6 +17,13 @@ __test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO
__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); }
__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); }
__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); }
+__test(0) int test_strncasecmp_eq1(void *ctx) { return bpf_strncasecmp(str, "hello world", 11); }
+__test(0) int test_strncasecmp_eq2(void *ctx) { return bpf_strncasecmp(str, "HELLO WORLD", 11); }
+__test(0) int test_strncasecmp_eq3(void *ctx) { return bpf_strncasecmp(str, "HELLO world", 11); }
+__test(0) int test_strncasecmp_eq4(void *ctx) { return bpf_strncasecmp(str, "hello", 5); }
+__test(0) int test_strncasecmp_eq5(void *ctx) { return bpf_strncasecmp(str, "hello world!", 11); }
+__test(-1) int test_strncasecmp_neq1(void *ctx) { return bpf_strncasecmp(str, "hello!", 6); }
+__test(1) int test_strncasecmp_neq2(void *ctx) { return bpf_strncasecmp(str, "abc", 3); }
__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); }
__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); }
__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); }
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_assoc.c b/tools/testing/selftests/bpf/progs/struct_ops_assoc.c
new file mode 100644
index 000000000000..68842e3f936b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_assoc.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int test_pid;
+
+/* Programs associated with st_ops_map_a */
+
+#define MAP_A_MAGIC 1234
+int test_err_a;
+
+SEC("struct_ops")
+int BPF_PROG(test_1_a, struct st_ops_args *args)
+{
+ return MAP_A_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter_prog_a, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_A_MAGIC)
+ test_err_a++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog_a(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_A_MAGIC)
+ test_err_a++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map_a = {
+ .test_1 = (void *)test_1_a,
+};
+
+/* Programs associated with st_ops_map_b */
+
+#define MAP_B_MAGIC 5678
+int test_err_b;
+
+SEC("struct_ops")
+int BPF_PROG(test_1_b, struct st_ops_args *args)
+{
+ return MAP_B_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter_prog_b, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_B_MAGIC)
+ test_err_b++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog_b(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_B_MAGIC)
+ test_err_b++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map_b = {
+ .test_1 = (void *)test_1_b,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_assoc_in_timer.c b/tools/testing/selftests/bpf/progs/struct_ops_assoc_in_timer.c
new file mode 100644
index 000000000000..0bed49e9f217
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_assoc_in_timer.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array_map SEC(".maps");
+
+#define MAP_MAGIC 1234
+int recur;
+int test_err;
+int timer_ns;
+int timer_test_1_ret;
+int timer_cb_run;
+
+__noinline static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ struct st_ops_args args = {};
+
+ recur++;
+ timer_test_1_ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ recur--;
+
+ timer_cb_run++;
+
+ return 0;
+}
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ if (!recur) {
+ timer = bpf_map_lookup_elem(&array_map, &key);
+ if (!timer)
+ return 0;
+
+ bpf_timer_init(timer, &array_map, 1);
+ bpf_timer_set_callback(timer, timer_cb);
+ bpf_timer_start(timer, timer_ns, 0);
+ }
+
+ return MAP_MAGIC;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_assoc_reuse.c b/tools/testing/selftests/bpf/progs/struct_ops_assoc_reuse.c
new file mode 100644
index 000000000000..396b3e58c729
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_assoc_reuse.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define MAP_A_MAGIC 1234
+int test_err_a;
+int recur;
+
+/*
+ * test_1_a is reused. The kfunc should not be able to get the associated
+ * struct_ops and call test_1 recursively as it is ambiguous.
+ */
+SEC("struct_ops")
+int BPF_PROG(test_1_a, struct st_ops_args *args)
+{
+ int ret;
+
+ if (!recur) {
+ recur++;
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(args);
+ if (ret != -1)
+ test_err_a++;
+ recur--;
+ }
+
+ return MAP_A_MAGIC;
+}
+
+/* Programs associated with st_ops_map_a */
+
+SEC("syscall")
+int syscall_prog_a(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_A_MAGIC)
+ test_err_a++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map_a = {
+ .test_1 = (void *)test_1_a,
+};
+
+/* Programs associated with st_ops_map_b */
+
+int test_err_b;
+
+SEC("syscall")
+int syscall_prog_b(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_A_MAGIC)
+ test_err_b++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map_b = {
+ .test_1 = (void *)test_1_a,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
index 6a2dd5367802..c8d217e89eea 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
@@ -12,7 +12,7 @@ void bpf_task_release(struct task_struct *p) __ksym;
* reject programs returning a referenced kptr of the wrong type.
*/
SEC("struct_ops/test_return_ref_kptr")
-__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+__failure __msg("At program exit the register R0 is not a known value (trusted_ptr_or_null_)")
struct task_struct *BPF_PROG(kptr_return_fail__wrong_type, int dummy,
struct task_struct *task, struct cgroup *cgrp)
{
diff --git a/tools/testing/selftests/bpf/progs/tailcall_sleepable.c b/tools/testing/selftests/bpf/progs/tailcall_sleepable.c
new file mode 100644
index 000000000000..d959a9eaaa9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_sleepable.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_test_utils.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, void (void));
+} jmp_table SEC(".maps");
+
+SEC("?uprobe")
+int uprobe_normal(void *ctx)
+{
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+SEC("?uprobe.s")
+int uprobe_sleepable_1(void *ctx)
+{
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+int executed = 0;
+int my_pid = 0;
+
+SEC("?uprobe.s")
+int uprobe_sleepable_2(void *ctx)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
index 432fff2af844..fed53d63a7e5 100644
--- a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
+++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
@@ -80,7 +80,7 @@ struct tld_metadata {
};
struct tld_meta_u {
- __u8 cnt;
+ __u16 cnt;
__u16 size;
struct tld_metadata metadata[TLD_MAX_DATA_CNT];
};
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
index f1853c38aada..b37359432692 100644
--- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -36,14 +36,9 @@ int BPF_PROG(on_update)
if (!test_pid || task->pid != test_pid)
return 0;
+ /* This will succeed as there is no real deadlock */
ptr = bpf_task_storage_get(&map_a, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
- /* ptr will not be NULL when it is called from
- * the bpf_task_storage_get(&map_b,...F_CREATE) in
- * the BPF_PROG(on_enter) below. It is because
- * the value can be found in map_a and the kernel
- * does not need to acquire any spin_lock.
- */
if (ptr) {
int err;
@@ -53,12 +48,7 @@ int BPF_PROG(on_update)
nr_del_errs++;
}
- /* This will still fail because map_b is empty and
- * this BPF_PROG(on_update) has failed to acquire
- * the percpu busy lock => meaning potential
- * deadlock is detected and it will fail to create
- * new storage.
- */
+ /* This will succeed as there is no real deadlock */
ptr = bpf_task_storage_get(&map_b, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
index 986829aaf73a..6ce98fe9f387 100644
--- a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
+++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
@@ -1,15 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-#ifndef EBUSY
-#define EBUSY 16
-#endif
-
extern bool CONFIG_PREEMPTION __kconfig __weak;
int nr_get_errs = 0;
int nr_del_errs = 0;
@@ -40,7 +37,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
ret = bpf_task_storage_delete(&task_storage,
bpf_get_current_task_btf());
- if (ret == -EBUSY)
+ if (ret == -EDEADLK || ret == -ETIMEDOUT)
__sync_fetch_and_add(&nr_del_errs, 1);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c
index 663a80990f8f..a6009d105158 100644
--- a/tools/testing/selftests/bpf/progs/task_work.c
+++ b/tools/testing/selftests/bpf/progs/task_work.c
@@ -65,8 +65,7 @@ int oncpu_hash_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&hmap, &key);
if (!work)
return 0;
-
- bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work);
return 0;
}
@@ -80,7 +79,7 @@ int oncpu_array_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work)
return 0;
- bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL);
+ bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work);
return 0;
}
@@ -102,6 +101,6 @@ int oncpu_lru_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&lrumap, &key);
if (!work || work->data[0])
return 0;
- bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c
index 1270953fd092..82e4b8913333 100644
--- a/tools/testing/selftests/bpf/progs/task_work_fail.c
+++ b/tools/testing/selftests/bpf/progs/task_work_fail.c
@@ -53,7 +53,7 @@ int mismatch_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work)
return 0;
- bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work);
return 0;
}
@@ -65,7 +65,7 @@ int no_map_task_work(struct pt_regs *args)
struct bpf_task_work tw;
task = bpf_get_current_task_btf();
- bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &tw, &hmap, process_work);
return 0;
}
@@ -76,7 +76,7 @@ int task_work_null(struct pt_regs *args)
struct task_struct *task;
task = bpf_get_current_task_btf();
- bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, NULL, &hmap, process_work);
return 0;
}
@@ -91,6 +91,6 @@ int map_null(struct pt_regs *args)
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work)
return 0;
- bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c
index 55e555f7f41b..1d4378f351ef 100644
--- a/tools/testing/selftests/bpf/progs/task_work_stress.c
+++ b/tools/testing/selftests/bpf/progs/task_work_stress.c
@@ -51,8 +51,8 @@ int schedule_task_work(void *ctx)
if (!work)
return 0;
}
- err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap,
- process_work, NULL);
+ err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap,
+ process_work);
if (err)
__sync_fetch_and_add(&schedule_error, 1);
else
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index f7b330ddd007..076fbf03a126 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -15,7 +15,6 @@
extern unsigned long CONFIG_HZ __kconfig;
-int test_einval_bpf_tuple = 0;
int test_einval_reserved = 0;
int test_einval_reserved_new = 0;
int test_einval_netns_id = 0;
@@ -99,12 +98,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
__builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
- ct = lookup_fn(ctx, NULL, 0, &opts_def, sizeof(opts_def));
- if (ct)
- bpf_ct_release(ct);
- else
- test_einval_bpf_tuple = opts_def.error;
-
opts_def.reserved[0] = 1;
ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
sizeof(opts_def));
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
index a586f087ffeb..2c156cd166af 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
@@ -4,6 +4,7 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
struct nf_conn;
@@ -18,6 +19,10 @@ struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u3
struct bpf_ct_opts___local *, u32) __ksym;
struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym;
void bpf_ct_release(struct nf_conn *) __ksym;
void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
@@ -146,4 +151,56 @@ int change_status_after_alloc(struct __sk_buff *ctx)
return 0;
}
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
+int lookup_null_bpf_tuple(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, NULL, 0, &opts, sizeof(opts));
+ if (ct)
+ bpf_ct_release(ct);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg3")
+int lookup_null_bpf_opts(struct __sk_buff *ctx)
+{
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), NULL, sizeof(struct bpf_ct_opts___local));
+ if (ct)
+ bpf_ct_release(ct);
+ return 0;
+}
+
+SEC("?xdp")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
+int xdp_lookup_null_bpf_tuple(struct xdp_md *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct nf_conn *ct;
+
+ ct = bpf_xdp_ct_lookup(ctx, NULL, 0, &opts, sizeof(opts));
+ if (ct)
+ bpf_ct_release(ct);
+ return 0;
+}
+
+SEC("?xdp")
+__failure __msg("Possibly NULL pointer passed to trusted arg3")
+int xdp_lookup_null_bpf_opts(struct xdp_md *ctx)
+{
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_xdp_ct_lookup(ctx, &tup, sizeof(tup.ipv4), NULL, sizeof(struct bpf_ct_opts___local));
+ if (ct)
+ bpf_ct_release(ct);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
index c88ccc53529a..0c3df19626cb 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
@@ -33,7 +33,7 @@ struct {
} hashmap1 SEC(".maps");
-static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2
+static __noinline __tag1 __tag2 int foo(int x __tag1 __tag2)
{
struct key_t key;
value_t val = {};
diff --git a/tools/testing/selftests/bpf/progs/test_ctx.c b/tools/testing/selftests/bpf/progs/test_ctx.c
new file mode 100644
index 000000000000..7d4995506717
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ctx.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Valve Corporation.
+ * Author: Changwoo Min <changwoo@igalia.com>
+ */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern void bpf_kfunc_trigger_ctx_check(void) __ksym;
+
+int count_hardirq;
+int count_softirq;
+int count_task;
+
+/* Triggered via bpf_prog_test_run from user-space */
+SEC("syscall")
+int trigger_all_contexts(void *ctx)
+{
+ if (bpf_in_task())
+ __sync_fetch_and_add(&count_task, 1);
+
+ /* Trigger the firing of a hardirq and softirq for test. */
+ bpf_kfunc_trigger_ctx_check();
+ return 0;
+}
+
+/* Observer for HardIRQ */
+SEC("fentry/bpf_testmod_test_hardirq_fn")
+int BPF_PROG(on_hardirq)
+{
+ if (bpf_in_hardirq())
+ __sync_fetch_and_add(&count_hardirq, 1);
+ return 0;
+}
+
+/* Observer for SoftIRQ */
+SEC("fentry/bpf_testmod_test_softirq_fn")
+int BPF_PROG(on_softirq)
+{
+ if (bpf_in_serving_softirq())
+ __sync_fetch_and_add(&count_softirq, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index 061befb004c2..d249113ed657 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -48,10 +48,9 @@ SEC("?lsm.s/bpf")
__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
- unsigned long val = 0;
+ static struct bpf_dynptr val;
- return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val,
- (struct bpf_dynptr *)val, NULL);
+ return bpf_verify_pkcs7_signature(&val, &val, NULL);
}
SEC("lsm.s/bpf")
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
index 0ad1bf1ede8d..967081bbcfe1 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
@@ -29,7 +29,7 @@ int kfunc_dynptr_nullable_test2(struct __sk_buff *skb)
}
SEC("tc")
-__failure __msg("expected pointer to stack or const struct bpf_dynptr")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
int kfunc_dynptr_nullable_test3(struct __sk_buff *skb)
{
struct bpf_dynptr data;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
index 69aacc96db36..ef9edca184ea 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
@@ -44,4 +44,18 @@ int prog_skb_parser(struct __sk_buff *skb)
return SK_PASS;
}
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_ingress(struct __sk_buff *skb)
+{
+ int one = 1;
+
+ return bpf_sk_redirect_map(skb, &sock_map_rx, one, BPF_F_INGRESS);
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_verdict_ingress_strp(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 7330c61b5730..7376df405a6b 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -23,7 +23,12 @@ static const int cfg_udp_src = 20000;
(((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK) \
<< BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
-#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
+struct vxlanhdr___local {
+ __be32 vx_flags;
+ __be32 vx_vni;
+};
+
+#define L2_PAD_SZ (sizeof(struct vxlanhdr___local) + ETH_HLEN)
#define UDP_PORT 5555
#define MPLS_OVER_UDP_PORT 6635
@@ -154,7 +159,7 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
l2_len = ETH_HLEN;
if (ext_proto & EXTPROTO_VXLAN) {
udp_dst = VXLAN_UDP_PORT;
- l2_len += sizeof(struct vxlanhdr);
+ l2_len += sizeof(struct vxlanhdr___local);
} else
udp_dst = ETH_OVER_UDP_PORT;
break;
@@ -195,12 +200,12 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
if (ext_proto & EXTPROTO_VXLAN) {
- struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+ struct vxlanhdr___local *vxlan_hdr = (struct vxlanhdr___local *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
vxlan_hdr->vx_vni = VXLAN_VNI;
- l2_hdr += sizeof(struct vxlanhdr);
+ l2_hdr += sizeof(struct vxlanhdr___local);
}
if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
@@ -285,7 +290,7 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
l2_len = ETH_HLEN;
if (ext_proto & EXTPROTO_VXLAN) {
udp_dst = VXLAN_UDP_PORT;
- l2_len += sizeof(struct vxlanhdr);
+ l2_len += sizeof(struct vxlanhdr___local);
} else
udp_dst = ETH_OVER_UDP_PORT;
break;
@@ -325,12 +330,12 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
if (ext_proto & EXTPROTO_VXLAN) {
- struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+ struct vxlanhdr___local *vxlan_hdr = (struct vxlanhdr___local *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
vxlan_hdr->vx_vni = VXLAN_VNI;
- l2_hdr += sizeof(struct vxlanhdr);
+ l2_hdr += sizeof(struct vxlanhdr___local);
}
if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
@@ -639,7 +644,7 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
olen += ETH_HLEN;
break;
case VXLAN_UDP_PORT:
- olen += ETH_HLEN + sizeof(struct vxlanhdr);
+ olen += ETH_HLEN + sizeof(struct vxlanhdr___local);
break;
}
break;
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index 4c677c001258..d6d5fefcd9b1 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -1,13 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
-#include <time.h>
+
+#include <vmlinux.h>
#include <stdbool.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#define CLOCK_MONOTONIC 1
+#define CLOCK_BOOTTIME 7
+
char _license[] SEC("license") = "GPL";
+
struct hmap_elem {
int counter;
struct bpf_timer timer;
@@ -59,10 +63,14 @@ __u64 bss_data;
__u64 abs_data;
__u64 err;
__u64 ok;
+__u64 test_hits;
+__u64 update_hits;
+__u64 cancel_hits;
__u64 callback_check = 52;
__u64 callback2_check = 52;
__u64 pinned_callback_check;
__s32 pinned_cpu;
+bool async_cancel = 0;
#define ARRAY 1
#define HTAB 2
@@ -164,6 +172,29 @@ int BPF_PROG2(test1, int, a)
return 0;
}
+static int timer_error(void *map, int *key, struct bpf_timer *timer)
+{
+ err = 42;
+ return 0;
+}
+
+SEC("syscall")
+int test_async_cancel_succeed(void *ctx)
+{
+ struct bpf_timer *arr_timer;
+ int array_key = ARRAY;
+
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(arr_timer, timer_error);
+ bpf_timer_start(arr_timer, 100000 /* 100us */, 0);
+ bpf_timer_cancel_async(arr_timer);
+ ok = 7;
+ return 0;
+}
+
/* callback for prealloc and non-prealloca hashtab timers */
static int timer_cb2(void *map, int *key, struct hmap_elem *val)
{
@@ -399,27 +430,88 @@ static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer
return 0;
}
-SEC("syscall")
-int race(void *ctx)
+/* Callback that updates its own map element */
+static int update_self_callback(void *map, int *key, struct bpf_timer *timer)
+{
+ struct elem init = {};
+
+ bpf_map_update_elem(map, key, &init, BPF_ANY);
+ __sync_fetch_and_add(&update_hits, 1);
+ return 0;
+}
+
+/* Callback that cancels itself using async cancel */
+static int cancel_self_callback(void *map, int *key, struct bpf_timer *timer)
+{
+ bpf_timer_cancel_async(timer);
+ __sync_fetch_and_add(&cancel_hits, 1);
+ return 0;
+}
+
+enum test_mode {
+ TEST_RACE_SYNC,
+ TEST_RACE_ASYNC,
+ TEST_UPDATE,
+ TEST_CANCEL,
+};
+
+static __always_inline int test_common(enum test_mode mode)
{
struct bpf_timer *timer;
- int err, race_key = 0;
struct elem init;
+ int ret, key = 0;
__builtin_memset(&init, 0, sizeof(struct elem));
- bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY);
- timer = bpf_map_lookup_elem(&race_array, &race_key);
+ bpf_map_update_elem(&race_array, &key, &init, BPF_ANY);
+ timer = bpf_map_lookup_elem(&race_array, &key);
if (!timer)
- return 1;
+ return 0;
- err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
- if (err && err != -EBUSY)
- return 1;
+ ret = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
+ if (ret && ret != -EBUSY)
+ return 0;
+
+ if (mode == TEST_RACE_SYNC || mode == TEST_RACE_ASYNC)
+ bpf_timer_set_callback(timer, race_timer_callback);
+ else if (mode == TEST_UPDATE)
+ bpf_timer_set_callback(timer, update_self_callback);
+ else
+ bpf_timer_set_callback(timer, cancel_self_callback);
- bpf_timer_set_callback(timer, race_timer_callback);
bpf_timer_start(timer, 0, 0);
- bpf_timer_cancel(timer);
+
+ if (mode == TEST_RACE_ASYNC)
+ bpf_timer_cancel_async(timer);
+ else if (mode == TEST_RACE_SYNC)
+ bpf_timer_cancel(timer);
return 0;
}
+
+SEC("syscall")
+int race(void *ctx)
+{
+ return test_common(async_cancel ? TEST_RACE_ASYNC : TEST_RACE_SYNC);
+}
+
+SEC("perf_event")
+int nmi_race(void *ctx)
+{
+ __sync_fetch_and_add(&test_hits, 1);
+ return test_common(TEST_RACE_ASYNC);
+}
+
+SEC("perf_event")
+int nmi_update(void *ctx)
+{
+ __sync_fetch_and_add(&test_hits, 1);
+ return test_common(TEST_UPDATE);
+}
+
+SEC("perf_event")
+int nmi_cancel(void *ctx)
+{
+ __sync_fetch_and_add(&test_hits, 1);
+ return test_common(TEST_CANCEL);
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_start_deadlock.c b/tools/testing/selftests/bpf/progs/timer_start_deadlock.c
new file mode 100644
index 000000000000..019518ee18cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_start_deadlock.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define CLOCK_MONOTONIC 1
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer_map SEC(".maps");
+
+volatile int in_timer_start;
+volatile int tp_called;
+
+static int timer_cb(void *map, int *key, struct elem *value)
+{
+ return 0;
+}
+
+SEC("tp_btf/hrtimer_cancel")
+int BPF_PROG(tp_hrtimer_cancel, struct hrtimer *hrtimer)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ if (!in_timer_start)
+ return 0;
+
+ tp_called = 1;
+ timer = bpf_map_lookup_elem(&timer_map, &key);
+
+ /*
+ * Call bpf_timer_start() from the tracepoint within hrtimer logic
+ * on the same timer to make sure it doesn't deadlock.
+ */
+ bpf_timer_start(timer, 1000000000, 0);
+ return 0;
+}
+
+SEC("syscall")
+int start_timer(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer_map, &key);
+ /* claude may complain here that there is no NULL check. Ignoring it. */
+ bpf_timer_init(timer, &timer_map, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(timer, timer_cb);
+
+ /*
+ * call hrtimer_start() twice, so that 2nd call does
+ * remove_hrtimer() and trace_hrtimer_cancel() tracepoint.
+ */
+ in_timer_start = 1;
+ bpf_timer_start(timer, 1000000000, 0);
+ bpf_timer_start(timer, 1000000000, 0);
+ in_timer_start = 0;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_start_delete_race.c b/tools/testing/selftests/bpf/progs/timer_start_delete_race.c
new file mode 100644
index 000000000000..2ff6357f33f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_start_delete_race.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#define ITER_CNT 2000
+
+struct map_value {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} timer_map SEC(".maps");
+
+long cb_cnt;
+
+/*
+ * Timer callback that accesses the map value.
+ * If the race bug exists and this runs on freed memory,
+ * KASAN should detect it.
+ */
+static int timer_cb(void *map, int *key, struct map_value *value)
+{
+ __sync_fetch_and_add(&cb_cnt, 1);
+ return 0;
+}
+
+SEC("syscall")
+int start_timer(void *ctx)
+{
+ struct map_value *value;
+ int i;
+
+ for (i = 0; i < ITER_CNT; i++) {
+ int key = 0;
+
+ value = bpf_map_lookup_elem(&timer_map, &key);
+ /* hey claude, no if (!value) here. The verifier got smarter */
+
+ bpf_timer_init(&value->timer, &timer_map, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(&value->timer, timer_cb);
+ bpf_timer_start(&value->timer, 100000000, 0);
+ }
+ return 0;
+}
+
+SEC("syscall")
+int delete_elem(void *ctx)
+{
+ int i;
+
+ for (i = 0; i < ITER_CNT; i++) {
+ int key = 0;
+
+ bpf_map_delete_elem(&timer_map, &key);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2898b3749d07..4ea0422d1042 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -25,6 +25,34 @@ static __always_inline void inc_counter(void)
__sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
}
+volatile const int stacktrace;
+
+typedef __u64 stack_trace_t[128];
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stack_heap SEC(".maps");
+
+static __always_inline void do_stacktrace(void *ctx)
+{
+ if (!stacktrace)
+ return;
+
+ __u64 *ptr = bpf_map_lookup_elem(&stack_heap, &(__u32){0});
+
+ if (ptr)
+ bpf_get_stack(ctx, ptr, sizeof(stack_trace_t), 0);
+}
+
+static __always_inline void handle(void *ctx)
+{
+ inc_counter();
+ do_stacktrace(ctx);
+}
+
SEC("?uprobe")
int bench_trigger_uprobe(void *ctx)
{
@@ -81,21 +109,21 @@ int trigger_driver_kfunc(void *ctx)
SEC("?kprobe/bpf_get_numa_node_id")
int bench_trigger_kprobe(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?kretprobe/bpf_get_numa_node_id")
int bench_trigger_kretprobe(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?kprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kprobe_multi(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
@@ -108,7 +136,7 @@ int bench_kprobe_multi_empty(void *ctx)
SEC("?kretprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kretprobe_multi(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
@@ -121,34 +149,34 @@ int bench_kretprobe_multi_empty(void *ctx)
SEC("?fentry/bpf_get_numa_node_id")
int bench_trigger_fentry(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?fexit/bpf_get_numa_node_id")
int bench_trigger_fexit(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?fmod_ret/bpf_modify_return_test_tp")
int bench_trigger_fmodret(void *ctx)
{
- inc_counter();
+ handle(ctx);
return -22;
}
SEC("?tp/bpf_test_run/bpf_trigger_tp")
int bench_trigger_tp(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?raw_tp/bpf_trigger_tp")
int bench_trigger_rawtp(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c
index 30bff90b68dc..6e46bb00ff58 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi_session.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
-#include "bpf_kfuncs.h"
#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -51,7 +50,7 @@ static int uprobe_multi_check(void *ctx, bool is_return)
SEC("uprobe.session//proc/self/exe:uprobe_multi_func_*")
int uprobe(struct pt_regs *ctx)
{
- return uprobe_multi_check(ctx, bpf_session_is_return());
+ return uprobe_multi_check(ctx, bpf_session_is_return(ctx));
}
static __always_inline bool verify_sleepable_user_copy(void)
@@ -67,5 +66,5 @@ int uprobe_sleepable(struct pt_regs *ctx)
{
if (verify_sleepable_user_copy())
uprobe_multi_sleep_result++;
- return uprobe_multi_check(ctx, bpf_session_is_return());
+ return uprobe_multi_check(ctx, bpf_session_is_return(ctx));
}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c
index 5befdf944dc6..b5db196614a9 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
-#include "bpf_kfuncs.h"
char _license[] SEC("license") = "GPL";
@@ -13,16 +12,16 @@ __u64 test_uprobe_1_result = 0;
__u64 test_uprobe_2_result = 0;
__u64 test_uprobe_3_result = 0;
-static int check_cookie(__u64 val, __u64 *result)
+static int check_cookie(struct pt_regs *ctx, __u64 val, __u64 *result)
{
__u64 *cookie;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 1;
- cookie = bpf_session_cookie();
+ cookie = bpf_session_cookie(ctx);
- if (bpf_session_is_return())
+ if (bpf_session_is_return(ctx))
*result = *cookie == val ? val : 0;
else
*cookie = val;
@@ -32,17 +31,17 @@ static int check_cookie(__u64 val, __u64 *result)
SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1")
int uprobe_1(struct pt_regs *ctx)
{
- return check_cookie(1, &test_uprobe_1_result);
+ return check_cookie(ctx, 1, &test_uprobe_1_result);
}
SEC("uprobe.session//proc/self/exe:uprobe_multi_func_2")
int uprobe_2(struct pt_regs *ctx)
{
- return check_cookie(2, &test_uprobe_2_result);
+ return check_cookie(ctx, 2, &test_uprobe_2_result);
}
SEC("uprobe.session//proc/self/exe:uprobe_multi_func_3")
int uprobe_3(struct pt_regs *ctx)
{
- return check_cookie(3, &test_uprobe_3_result);
+ return check_cookie(ctx, 3, &test_uprobe_3_result);
}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c
index 8fbcd69fae22..3ce309248a04 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
-#include "bpf_kfuncs.h"
#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -16,11 +15,11 @@ int idx_return = 0;
__u64 test_uprobe_cookie_entry[6];
__u64 test_uprobe_cookie_return[3];
-static int check_cookie(void)
+static int check_cookie(struct pt_regs *ctx)
{
- __u64 *cookie = bpf_session_cookie();
+ __u64 *cookie = bpf_session_cookie(ctx);
- if (bpf_session_is_return()) {
+ if (bpf_session_is_return(ctx)) {
if (idx_return >= ARRAY_SIZE(test_uprobe_cookie_return))
return 1;
test_uprobe_cookie_return[idx_return++] = *cookie;
@@ -40,5 +39,5 @@ int uprobe_recursive(struct pt_regs *ctx)
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 1;
- return check_cookie();
+ return check_cookie(ctx);
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 7f4827eede3c..c4b8daac4388 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -10,6 +10,8 @@
#include "bpf_experimental.h"
#include "bpf_arena_common.h"
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
struct {
__uint(type, BPF_MAP_TYPE_ARENA);
__uint(map_flags, BPF_F_MMAPABLE);
@@ -21,6 +23,37 @@ struct {
#endif
} arena SEC(".maps");
+SEC("socket")
+__success __retval(0)
+int basic_alloc1_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile int __arena *page1, *page2, *no_page;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
+ if (*page1 != 1)
+ return 6;
+ if (*page2 != 0 && *page2 != 2) /* use-after-free should return 0 or the stored value */
+ return 7;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int basic_alloc1(void *ctx)
@@ -60,6 +93,44 @@ int basic_alloc1(void *ctx)
return 0;
}
+SEC("socket")
+__success __retval(0)
+int basic_alloc2_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page1, *page2, *page3, *page4;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ page2 = page1 + __PAGE_SIZE;
+ page3 = page1 + __PAGE_SIZE * 2;
+ page4 = page1 - __PAGE_SIZE;
+ *page1 = 1;
+ *page2 = 2;
+ *page3 = 3;
+ *page4 = 4;
+ if (*page1 != 1)
+ return 1;
+ if (*page2 != 2)
+ return 2;
+ if (*page3 != 0)
+ return 3;
+ if (*page4 != 0)
+ return 4;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 2);
+ if (*page1 != 0 && *page1 != 1)
+ return 5;
+ if (*page2 != 0 && *page2 != 2)
+ return 6;
+ if (*page3 != 0)
+ return 7;
+ if (*page4 != 0)
+ return 8;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int basic_alloc2(void *ctx)
@@ -102,6 +173,19 @@ struct bpf_arena___l {
struct bpf_map map;
} __attribute__((preserve_access_index));
+SEC("socket")
+__success __retval(0) __log_level(2)
+int basic_alloc3_nosleep(void *ctx)
+{
+ struct bpf_arena___l *ar = (struct bpf_arena___l *)&arena;
+ volatile char __arena *pages;
+
+ pages = bpf_arena_alloc_pages(&ar->map, NULL, ar->map.max_entries, NUMA_NO_NODE, 0);
+ if (!pages)
+ return 1;
+ return 0;
+}
+
SEC("syscall")
__success __retval(0) __log_level(2)
int basic_alloc3(void *ctx)
@@ -115,6 +199,38 @@ int basic_alloc3(void *ctx)
return 0;
}
+SEC("socket")
+__success __retval(0)
+int basic_reserve1_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page)
+ return 1;
+
+ page += __PAGE_SIZE;
+
+ /* Reserve the second page */
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 2;
+
+ /* Try to explicitly allocate the reserved page. */
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 3;
+
+ /* Try to implicitly allocate the page (since there's only 2 of them). */
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 4;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int basic_reserve1(void *ctx)
@@ -147,6 +263,26 @@ int basic_reserve1(void *ctx)
return 0;
}
+SEC("socket")
+__success __retval(0)
+int basic_reserve2_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if ((u64)page)
+ return 2;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int basic_reserve2(void *ctx)
@@ -168,6 +304,27 @@ int basic_reserve2(void *ctx)
}
/* Reserve the same page twice, should return -EBUSY. */
+SEC("socket")
+__success __retval(0)
+int reserve_twice_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret != -EBUSY)
+ return 2;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int reserve_twice(void *ctx)
@@ -190,6 +347,36 @@ int reserve_twice(void *ctx)
}
/* Try to reserve past the end of the arena. */
+SEC("socket")
+__success __retval(0)
+int reserve_invalid_region_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ /* Try a NULL pointer. */
+ ret = bpf_arena_reserve_pages(&arena, NULL, 3);
+ if (ret != -EINVAL)
+ return 1;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 3);
+ if (ret != -EINVAL)
+ return 2;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 4096);
+ if (ret != -EINVAL)
+ return 3;
+
+ ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
+ if (ret != -EINVAL)
+ return 4;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int reserve_invalid_region(void *ctx)
@@ -254,4 +441,40 @@ int iter_maps3(struct bpf_iter__bpf_map *ctx)
return 0;
}
+private(ARENA_TESTS) struct bpf_spin_lock arena_bpf_test_lock;
+
+/* Use the arena kfunc API while under a BPF lock. */
+SEC("syscall")
+__success __retval(0)
+int arena_kfuncs_under_bpf_lock(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ bpf_spin_lock(&arena_bpf_test_lock);
+
+ /* Get a separate region of the arena. */
+ page = arena_base(&arena);
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret) {
+ bpf_spin_unlock(&arena_bpf_test_lock);
+ return 1;
+ }
+
+ bpf_arena_free_pages(&arena, page, 1);
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page) {
+ bpf_spin_unlock(&arena_bpf_test_lock);
+ return 2;
+ }
+
+ bpf_arena_free_pages(&arena, page, 1);
+
+ bpf_spin_unlock(&arena_bpf_test_lock);
+#endif
+
+ return 0;
+}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c b/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c
new file mode 100644
index 000000000000..83182ddbfb95
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+#include "bpf_misc.h"
+
+#define ARENA_PAGES (1UL<< (32 - __builtin_ffs(__PAGE_SIZE) + 1))
+#define GLOBAL_PAGES (16)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, ARENA_PAGES);
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * ARENA_PAGES + 1));
+#else
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * ARENA_PAGES + 1));
+#endif
+} arena SEC(".maps");
+
+/*
+ * Global data, to be placed at the end of the arena.
+ */
+volatile char __arena global_data[GLOBAL_PAGES][PAGE_SIZE];
+
+SEC("syscall")
+__success __retval(0)
+int check_reserve1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ const u8 magic = 0x5a;
+ __u8 __arena *guard, *globals;
+ volatile char __arena *ptr;
+ int i;
+ int ret;
+
+ guard = (void __arena *)arena_base(&arena);
+ globals = (void __arena *)(arena_base(&arena) + (ARENA_PAGES - GLOBAL_PAGES) * PAGE_SIZE);
+
+ /* Reserve the region we've offset the globals by. */
+ ret = bpf_arena_reserve_pages(&arena, guard, ARENA_PAGES - GLOBAL_PAGES);
+ if (ret)
+ return 1;
+
+ /* Make sure the globals are in the expected offset. */
+ ret = bpf_arena_reserve_pages(&arena, globals, 1);
+ if (!ret)
+ return 2;
+
+ /* Verify globals are properly mapped in by libbpf. */
+ for (i = 0; i < GLOBAL_PAGES; i++) {
+ ptr = &global_data[i][PAGE_SIZE / 2];
+
+ *ptr = magic;
+ if (*ptr != magic)
+ return i + 3;
+ }
+#endif
+ return 0;
+}
+
+/*
+ * Relocation check by reading directly into the global data w/o using symbols.
+ */
+SEC("syscall")
+__success __retval(0)
+int check_relocation(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ const u8 magic = 0xfa;
+ u8 __arena *ptr;
+
+ global_data[GLOBAL_PAGES - 1][PAGE_SIZE / 2] = magic;
+ ptr = (u8 __arena *)((u64)(ARENA_PAGES * PAGE_SIZE - PAGE_SIZE / 2));
+ if (*ptr != magic)
+ return 1;
+
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c b/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c
new file mode 100644
index 000000000000..e6bd7b61f9f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+#define ARENA_PAGES (32)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, ARENA_PAGES);
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * ARENA_PAGES + 1));
+#else
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * ARENA_PAGES + 1));
+#endif
+} arena SEC(".maps");
+
+/*
+ * Fill the entire arena with global data.
+ * The offset into the arena should be 0.
+ */
+char __arena global_data[ARENA_PAGES][PAGE_SIZE];
+
+SEC("syscall")
+__success __retval(0)
+int check_reserve2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ void __arena *guard;
+ int ret;
+
+ guard = (void __arena *)arena_base(&arena);
+
+ /* Make sure the data at offset 0 case is properly handled. */
+ ret = bpf_arena_reserve_pages(&arena, guard, 1);
+ if (!ret)
+ return 1;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
index f19e15400b3e..5f7e7afee169 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -23,18 +23,31 @@ int big_alloc1(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page1, *page2, *no_page, *page3;
- void __arena *base;
+ u64 base;
- page1 = base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ base = (u64)arena_base(&arena);
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
if (!page1)
return 1;
+
+ if ((u64)page1 != base)
+ return 15;
+
*page1 = 1;
- page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE * 2,
+ page2 = bpf_arena_alloc_pages(&arena, (void __arena *)(ARENA_SIZE - 2 * PAGE_SIZE),
1, NUMA_NO_NODE, 0);
if (!page2)
return 2;
*page2 = 2;
- no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE,
+
+ /* Test for the guard region at the end of the arena. */
+ no_page = bpf_arena_alloc_pages(&arena, (void __arena *)ARENA_SIZE - PAGE_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 16;
+
+ no_page = bpf_arena_alloc_pages(&arena, (void __arena *)ARENA_SIZE,
1, NUMA_NO_NODE, 0);
if (no_page)
return 3;
@@ -270,5 +283,34 @@ int big_alloc2(void *ctx)
return 9;
return 0;
}
+
+SEC("socket")
+__success __retval(0)
+int big_alloc3(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *pages;
+ u64 i;
+
+ /*
+ * Allocate 2051 pages in one go to check how kmalloc_nolock() handles large requests.
+ * Since kmalloc_nolock() can allocate up to 1024 struct page * at a time, this call should
+ * result in three batches: two batches of 1024 pages each, followed by a final batch of 3
+ * pages.
+ */
+ pages = bpf_arena_alloc_pages(&arena, NULL, 2051, NUMA_NO_NODE, 0);
+ if (!pages)
+ return 0;
+
+ bpf_for(i, 0, 2051)
+ pages[i * PAGE_SIZE] = 123;
+ bpf_for(i, 0, 2051)
+ if (pages[i * PAGE_SIZE] != 123)
+ return i;
+
+ bpf_arena_free_pages(&arena, pages, 2051);
+#endif
+ return 0;
+}
#endif
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
index 7efa9521105e..39aff82549c9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
+++ b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
@@ -96,7 +96,7 @@ int wq_non_sleepable_prog(void *ctx)
if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
return 0;
- if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+ if (bpf_wq_set_callback(&val->w, wq_cb, 0) != 0)
return 0;
return 0;
}
@@ -114,7 +114,7 @@ int wq_sleepable_prog(void *ctx)
if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
return 0;
- if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+ if (bpf_wq_set_callback(&val->w, wq_cb, 0) != 0)
return 0;
return 0;
}
@@ -156,7 +156,7 @@ int task_work_non_sleepable_prog(void *ctx)
if (!task)
return 0;
- bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+ bpf_task_work_schedule_resume(task, &val->tw, &task_work_map, task_work_cb);
return 0;
}
@@ -176,6 +176,6 @@ int task_work_sleepable_prog(void *ctx)
if (!task)
return 0;
- bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+ bpf_task_work_schedule_resume(task, &val->tw, &task_work_map, task_work_cb);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 411a18437d7e..560531404bce 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -1477,7 +1477,7 @@ __naked void sub64_full_overflow(void)
SEC("socket")
__description("64-bit subtraction, partial overflow, result in unbounded reg")
__success __log_level(2)
-__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar()")
+__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar(id=1-1)")
__retval(0)
__naked void sub64_partial_overflow(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c
index e61755656e8d..4b779deee767 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bswap.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -48,6 +48,49 @@ __naked void bswap_64(void)
: __clobber_all);
}
+#define BSWAP_RANGE_TEST(name, op, in_value, out_value) \
+ SEC("socket") \
+ __success __log_level(2) \
+ __msg("r0 &= {{.*}}; R0=scalar({{.*}},var_off=(0x0; " #in_value "))") \
+ __msg("r0 = " op " r0 {{.*}}; R0=scalar({{.*}},var_off=(0x0; " #out_value "))") \
+ __naked void name(void) \
+ { \
+ asm volatile ( \
+ "call %[bpf_get_prandom_u32];" \
+ "r0 &= " #in_value ";" \
+ "r0 = " op " r0;" \
+ "r2 = " #out_value " ll;" \
+ "if r0 > r2 goto trap_%=;" \
+ "r0 = 0;" \
+ "exit;" \
+ "trap_%=:" \
+ "r1 = 42;" \
+ "r0 = *(u64 *)(r1 + 0);" \
+ "exit;" \
+ : \
+ : __imm(bpf_get_prandom_u32) \
+ : __clobber_all); \
+ }
+
+BSWAP_RANGE_TEST(bswap16_range, "bswap16", 0x3f00, 0x3f)
+BSWAP_RANGE_TEST(bswap32_range, "bswap32", 0x3f00, 0x3f0000)
+BSWAP_RANGE_TEST(bswap64_range, "bswap64", 0x3f00, 0x3f000000000000)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+BSWAP_RANGE_TEST(be16_range, "be16", 0x3f00, 0x3f)
+BSWAP_RANGE_TEST(be32_range, "be32", 0x3f00, 0x3f0000)
+BSWAP_RANGE_TEST(be64_range, "be64", 0x3f00, 0x3f000000000000)
+BSWAP_RANGE_TEST(le16_range, "le16", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(le32_range, "le32", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(le64_range, "le64", 0x3f00, 0x3f00)
+#else
+BSWAP_RANGE_TEST(be16_range, "be16", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(be32_range, "be32", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(be64_range, "be64", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(le16_range, "le16", 0x3f00, 0x3f)
+BSWAP_RANGE_TEST(le32_range, "le32", 0x3f00, 0x3f0000)
+BSWAP_RANGE_TEST(le64_range, "le64", 0x3f00, 0x3f000000000000)
+#endif
+
#else
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_default_trusted_ptr.c b/tools/testing/selftests/bpf/progs/verifier_default_trusted_ptr.c
new file mode 100644
index 000000000000..fa3b656ad4fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_default_trusted_ptr.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2026 Google LLC.
+ */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("syscall")
+__success __retval(0)
+int test_default_trusted_ptr(void *ctx)
+{
+ struct prog_test_member *trusted_ptr;
+
+ trusted_ptr = bpf_kfunc_get_default_trusted_ptr_test();
+ /*
+ * Test BPF kfunc bpf_get_default_trusted_ptr_test() returns a
+ * PTR_TO_BTF_ID | PTR_TRUSTED, therefore it should be accepted when
+ * passed to a BPF kfunc only accepting KF_TRUSTED_ARGS.
+ */
+ bpf_kfunc_put_default_trusted_ptr_test(trusted_ptr);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c b/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c
new file mode 100644
index 000000000000..4672af0b3268
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c
@@ -0,0 +1,1149 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <limits.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* This file contains unit tests for signed/unsigned division and modulo
+ * operations (with divisor as a constant), focusing on verifying whether
+ * BPF verifier's range tracking module soundly and precisely computes
+ * the results.
+ */
+
+SEC("socket")
+__description("UDIV32, positive divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 /= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=3,var_off=(0x0; 0x3))")
+__naked void udiv32_pos_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w1 /= 3; \
+ if w1 > 3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UDIV32, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 /= w2 {{.*}}; R1=0 R2=0")
+__naked void udiv32_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w2 = 0; \
+ w1 /= w2; \
+ if w1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UDIV64, positive divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 /= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=3,var_off=(0x0; 0x3))")
+__naked void udiv64_pos_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r1 /= 3; \
+ if r1 > 3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UDIV64, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 /= r2 {{.*}}; R1=0 R2=0")
+__naked void udiv64_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r2 = 0; \
+ r1 /= r2; \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, positive divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= 3 {{.*}}; R1=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=3,var_off=(0x2; 0x1))")
+__naked void sdiv32_pos_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< 8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s/= 3; \
+ if w1 s< 2 goto l1_%=; \
+ if w1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, positive divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= 3 {{.*}}; R1=scalar(smin=umin=umin32=0xfffffffd,smax=umax=umax32=0xfffffffe,smin32=-3,smax32=-2,var_off=(0xfffffffc; 0x3))")
+__naked void sdiv32_pos_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s> -8 goto l0_%=; \
+ if w1 s< -10 goto l0_%=; \
+ w1 s/= 3; \
+ if w1 s< -3 goto l1_%=; \
+ if w1 s> -2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, positive divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= 3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=3,var_off=(0x0; 0xffffffff))")
+__naked void sdiv32_pos_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s/= 3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -3 {{.*}}; R1=scalar(smin=umin=umin32=0xfffffffd,smax=umax=umax32=0xfffffffe,smin32=-3,smax32=-2,var_off=(0xfffffffc; 0x3))")
+__naked void sdiv32_neg_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< 8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s/= -3; \
+ if w1 s< -3 goto l1_%=; \
+ if w1 s> -2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -3 {{.*}}; R1=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=3,var_off=(0x2; 0x1))")
+__naked void sdiv32_neg_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s> -8 goto l0_%=; \
+ if w1 s< -10 goto l0_%=; \
+ w1 s/= -3; \
+ if w1 s< 2 goto l1_%=; \
+ if w1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, negative divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-3,smax32=2,var_off=(0x0; 0xffffffff))")
+__naked void sdiv32_neg_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s/= -3; \
+ if w1 s< -3 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= w2 {{.*}}; R1=0 R2=0")
+__naked void sdiv32_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w2 = 0; \
+ w1 s/= w2; \
+ if w1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, overflow (S32_MIN/-1)")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -1 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__naked void sdiv32_overflow_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w2 = %[int_min]; \
+ w2 += 10; \
+ if w1 s> w2 goto l0_%=; \
+ w1 s/= -1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, overflow (S32_MIN/-1), constant dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -1 {{.*}}; R1=0x80000000")
+__naked void sdiv32_overflow_2(void)
+{
+ asm volatile (" \
+ w1 = %[int_min]; \
+ w1 s/= -1; \
+ if w1 != %[int_min] goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, positive divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= 3 {{.*}}; R1=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=3,var_off=(0x2; 0x1))")
+__naked void sdiv64_pos_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< 8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s/= 3; \
+ if r1 s< 2 goto l1_%=; \
+ if r1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, positive divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= 3 {{.*}}; R1=scalar(smin=smin32=-3,smax=smax32=-2,umin=0xfffffffffffffffd,umax=0xfffffffffffffffe,umin32=0xfffffffd,umax32=0xfffffffe,var_off=(0xfffffffffffffffc; 0x3))")
+__naked void sdiv64_pos_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s> -8 goto l0_%=; \
+ if r1 s< -10 goto l0_%=; \
+ r1 s/= 3; \
+ if r1 s< -3 goto l1_%=; \
+ if r1 s> -2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, positive divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= 3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=3)")
+__naked void sdiv64_pos_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s/= 3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -3 {{.*}}; R1=scalar(smin=smin32=-3,smax=smax32=-2,umin=0xfffffffffffffffd,umax=0xfffffffffffffffe,umin32=0xfffffffd,umax32=0xfffffffe,var_off=(0xfffffffffffffffc; 0x3))")
+__naked void sdiv64_neg_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< 8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s/= -3; \
+ if r1 s< -3 goto l1_%=; \
+ if r1 s> -2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -3 {{.*}}; R1=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=3,var_off=(0x2; 0x1))")
+__naked void sdiv64_neg_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s> -8 goto l0_%=; \
+ if r1 s< -10 goto l0_%=; \
+ r1 s/= -3; \
+ if r1 s< 2 goto l1_%=; \
+ if r1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, negative divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -3 {{.*}}; R1=scalar(smin=smin32=-3,smax=smax32=2)")
+__naked void sdiv64_neg_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s/= -3; \
+ if r1 s< -3 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= r2 {{.*}}; R1=0 R2=0")
+__naked void sdiv64_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r2 = 0; \
+ r1 s/= r2; \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, overflow (S64_MIN/-1)")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -1 {{.*}}; R1=scalar()")
+__naked void sdiv64_overflow_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ r1 = r0; \
+ r2 = %[llong_min] ll; \
+ r2 += 10; \
+ if r1 s> r2 goto l0_%=; \
+ r1 s/= -1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN),
+ __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, overflow (S64_MIN/-1), constant dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -1 {{.*}}; R1=0x8000000000000000")
+__naked void sdiv64_overflow_2(void)
+{
+ asm volatile (" \
+ r1 = %[llong_min] ll; \
+ r1 s/= -1; \
+ r2 = %[llong_min] ll; \
+ if r1 != r2 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD32, positive divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 %= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void umod32_pos_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w1 %%= 3; \
+ if w1 > 3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD32, positive divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 %= 10 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8))")
+__naked void umod32_pos_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w1 %%= 10; \
+ if w1 < 1 goto l0_%=; \
+ if w1 > 9 goto l0_%=; \
+ if w1 & 1 != 1 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD32, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 %= w2 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8)) R2=0")
+__naked void umod32_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w2 = 0; \
+ w1 %%= w2; \
+ if w1 < 1 goto l0_%=; \
+ if w1 > 9 goto l0_%=; \
+ if w1 & 1 != 1 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD64, positive divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 %= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void umod64_pos_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r1 %%= 3; \
+ if r1 > 3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD64, positive divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 %= 10 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8))")
+__naked void umod64_pos_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r1 %%= 10; \
+ if r1 < 1 goto l0_%=; \
+ if r1 > 9 goto l0_%=; \
+ if r1 & 1 != 1 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD64, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 %= r2 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8)) R2=0")
+__naked void umod64_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r2 = 0; \
+ r1 %%= r2; \
+ if r1 < 1 goto l0_%=; \
+ if r1 > 9 goto l0_%=; \
+ if r1 & 1 != 1 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, positive divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void smod32_pos_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< 8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= 3; \
+ if w1 s< 0 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, positive divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= 3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=0,var_off=(0x0; 0xffffffff))")
+__naked void smod32_pos_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s> -8 goto l0_%=; \
+ if w1 s< -10 goto l0_%=; \
+ w1 s%%= 3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, positive divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= 3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=2,var_off=(0x0; 0xffffffff))")
+__naked void smod32_pos_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= 3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, positive divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= 11 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-8,smax32=10,var_off=(0x0; 0xffffffff))")
+__naked void smod32_pos_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= 11; \
+ if w1 s< -8 goto l1_%=; \
+ if w1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void smod32_neg_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< 8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= -3; \
+ if w1 s< 0 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, negative divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=0,var_off=(0x0; 0xffffffff))")
+__naked void smod32_neg_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s> -8 goto l0_%=; \
+ if w1 s< -10 goto l0_%=; \
+ w1 s%%= -3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, negative divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=2,var_off=(0x0; 0xffffffff))")
+__naked void smod32_neg_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= -3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, negative divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -11 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-8,smax32=10,var_off=(0x0; 0xffffffff))")
+__naked void smod32_neg_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= -11; \
+ if w1 s< -8 goto l1_%=; \
+ if w1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= w2 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-8,smax32=10,var_off=(0x0; 0xffffffff)) R2=0")
+__naked void smod32_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w2 = 0; \
+ w1 s%%= w2; \
+ if w1 s< -8 goto l1_%=; \
+ if w1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, overflow (S32_MIN%-1)")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -1 {{.*}}; R1=0")
+__naked void smod32_overflow_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w2 = %[int_min]; \
+ w2 += 10; \
+ if w1 s> w2 goto l0_%=; \
+ w1 s%%= -1; \
+ if w1 != 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, overflow (S32_MIN%-1), constant dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -1 {{.*}}; R1=0")
+__naked void smod32_overflow_2(void)
+{
+ asm volatile (" \
+ w1 = %[int_min]; \
+ w1 s%%= -1; \
+ if w1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, positive divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void smod64_pos_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< 8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= 3; \
+ if r1 s< 0 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, positive divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= 3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=0)")
+__naked void smod64_pos_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s> -8 goto l0_%=; \
+ if r1 s< -10 goto l0_%=; \
+ r1 s%%= 3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, positive divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= 3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=2)")
+__naked void smod64_pos_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= 3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, positive divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= 11 {{.*}}; R1=scalar(smin=smin32=-8,smax=smax32=10)")
+__naked void smod64_pos_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= 11; \
+ if r1 s< -8 goto l1_%=; \
+ if r1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void smod64_neg_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< 8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= -3; \
+ if r1 s< 0 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, negative divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=0)")
+__naked void smod64_neg_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s> -8 goto l0_%=; \
+ if r1 s< -10 goto l0_%=; \
+ r1 s%%= -3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, negative divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=2)")
+__naked void smod64_neg_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= -3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, negative divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -11 {{.*}}; R1=scalar(smin=smin32=-8,smax=smax32=10)")
+__naked void smod64_neg_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= -11; \
+ if r1 s< -8 goto l1_%=; \
+ if r1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= r2 {{.*}}; R1=scalar(smin=smin32=-8,smax=smax32=10) R2=0")
+__naked void smod64_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r2 = 0; \
+ r1 s%%= r2; \
+ if r1 s< -8 goto l1_%=; \
+ if r1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, overflow (S64_MIN%-1)")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -1 {{.*}}; R1=0")
+__naked void smod64_overflow_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ r1 = r0; \
+ r2 = %[llong_min] ll; \
+ r2 += 10; \
+ if r1 s> r2 goto l0_%=; \
+ r1 s%%= -1; \
+ if r1 != 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN),
+ __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, overflow (S64_MIN%-1), constant dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -1 {{.*}}; R1=0")
+__naked void smod64_overflow_2(void)
+{
+ asm volatile (" \
+ r1 = %[llong_min] ll; \
+ r1 s%%= -1; \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
index 1204fbc58178..e7dae0cf9c17 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -72,7 +72,7 @@ int trusted_task_arg_nonnull_fail1(void *ctx)
SEC("?tp_btf/task_newtask")
__failure __log_level(2)
-__msg("R1 type=ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("R1 type=trusted_ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_")
__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
int trusted_task_arg_nonnull_fail2(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
index 059aa716e3d0..889c9b78b912 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
@@ -17,17 +17,6 @@ struct {
__type(value, struct val);
} map_spin_lock SEC(".maps");
-struct timer {
- struct bpf_timer t;
-};
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
- __type(key, int);
- __type(value, struct timer);
-} map_timer SEC(".maps");
-
SEC("kprobe")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE")
__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
@@ -85,106 +74,6 @@ __naked void bpf_prog_type_raw_tracepoint_1(void)
}
SEC("kprobe")
-__description("bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE")
-__failure __msg("tracing progs cannot use bpf_timer yet")
-__naked void in_bpf_prog_type_kprobe_2(void)
-{
- asm volatile (" \
- r2 = r10; \
- r2 += -8; \
- r1 = 0; \
- *(u64*)(r2 + 0) = r1; \
- r1 = %[map_timer] ll; \
- call %[bpf_map_lookup_elem]; \
- if r0 == 0 goto l0_%=; \
- r1 = r0; \
- r2 = %[map_timer] ll; \
- r3 = 1; \
-l0_%=: call %[bpf_timer_init]; \
- exit; \
-" :
- : __imm(bpf_map_lookup_elem),
- __imm(bpf_timer_init),
- __imm_addr(map_timer)
- : __clobber_all);
-}
-
-SEC("perf_event")
-__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT")
-__failure __msg("tracing progs cannot use bpf_timer yet")
-__naked void bpf_prog_type_perf_event_2(void)
-{
- asm volatile (" \
- r2 = r10; \
- r2 += -8; \
- r1 = 0; \
- *(u64*)(r2 + 0) = r1; \
- r1 = %[map_timer] ll; \
- call %[bpf_map_lookup_elem]; \
- if r0 == 0 goto l0_%=; \
- r1 = r0; \
- r2 = %[map_timer] ll; \
- r3 = 1; \
-l0_%=: call %[bpf_timer_init]; \
- exit; \
-" :
- : __imm(bpf_map_lookup_elem),
- __imm(bpf_timer_init),
- __imm_addr(map_timer)
- : __clobber_all);
-}
-
-SEC("tracepoint")
-__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT")
-__failure __msg("tracing progs cannot use bpf_timer yet")
-__naked void in_bpf_prog_type_tracepoint_2(void)
-{
- asm volatile (" \
- r2 = r10; \
- r2 += -8; \
- r1 = 0; \
- *(u64*)(r2 + 0) = r1; \
- r1 = %[map_timer] ll; \
- call %[bpf_map_lookup_elem]; \
- if r0 == 0 goto l0_%=; \
- r1 = r0; \
- r2 = %[map_timer] ll; \
- r3 = 1; \
-l0_%=: call %[bpf_timer_init]; \
- exit; \
-" :
- : __imm(bpf_map_lookup_elem),
- __imm(bpf_timer_init),
- __imm_addr(map_timer)
- : __clobber_all);
-}
-
-SEC("raw_tracepoint")
-__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
-__failure __msg("tracing progs cannot use bpf_timer yet")
-__naked void bpf_prog_type_raw_tracepoint_2(void)
-{
- asm volatile (" \
- r2 = r10; \
- r2 += -8; \
- r1 = 0; \
- *(u64*)(r2 + 0) = r1; \
- r1 = %[map_timer] ll; \
- call %[bpf_map_lookup_elem]; \
- if r0 == 0 goto l0_%=; \
- r1 = r0; \
- r2 = %[map_timer] ll; \
- r3 = 1; \
-l0_%=: call %[bpf_timer_init]; \
- exit; \
-" :
- : __imm(bpf_map_lookup_elem),
- __imm(bpf_timer_init),
- __imm_addr(map_timer)
- : __clobber_all);
-}
-
-SEC("kprobe")
__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE")
__failure __msg("tracing progs cannot use bpf_spin_lock yet")
__naked void in_bpf_prog_type_kprobe_3(void)
diff --git a/tools/testing/selftests/bpf/progs/verifier_jit_inline.c b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c
new file mode 100644
index 000000000000..4ea254063646
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("fentry/bpf_fentry_test1")
+__success __retval(0)
+__arch_x86_64
+__jited(" addq %gs:{{.*}}, %rax")
+__arch_arm64
+__jited(" mrs x7, SP_EL0")
+int inline_bpf_get_current_task(void)
+{
+ bpf_get_current_task();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
index a509cad97e69..1fce7a7e8d03 100644
--- a/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
+++ b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
@@ -32,7 +32,7 @@ static void task_kfunc_load_test(void)
}
SEC("raw_tp")
-__failure __msg("calling kernel function")
+__success
int BPF_PROG(task_kfunc_raw_tp)
{
task_kfunc_load_test();
@@ -86,7 +86,7 @@ static void cgrp_kfunc_load_test(void)
}
SEC("raw_tp")
-__failure __msg("calling kernel function")
+__success
int BPF_PROG(cgrp_kfunc_raw_tp)
{
cgrp_kfunc_load_test();
@@ -138,7 +138,7 @@ static void cpumask_kfunc_load_test(void)
}
SEC("raw_tp")
-__failure __msg("calling kernel function")
+__success
int BPF_PROG(cpumask_kfunc_raw_tp)
{
cpumask_kfunc_load_test();
diff --git a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
index 8f755d2464cf..2ef346c827c2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
+++ b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
+#include <limits.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
@@ -18,9 +19,9 @@ __naked void scalars(void)
r4 = r1; \
w2 += 0x7FFFFFFF; \
w4 += 0; \
- if r2 == 0 goto l1; \
+ if r2 == 0 goto l0_%=; \
exit; \
-l1: \
+l0_%=: \
r4 >>= 63; \
r3 = 1; \
r3 -= r4; \
@@ -31,4 +32,335 @@ l1: \
" ::: __clobber_all);
}
+/*
+ * Test that sync_linked_regs() preserves register IDs.
+ *
+ * The sync_linked_regs() function copies bounds from known_reg to linked
+ * registers. When doing so, it must preserve each register's original id
+ * to allow subsequent syncs from the same source to work correctly.
+ *
+ */
+SEC("socket")
+__success
+__naked void sync_linked_regs_preserves_id(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; /* r0 in [0, 255] */ \
+ r1 = r0; /* r0, r1 linked with id 1 */ \
+ r1 += 4; /* r1 has id=1 and off=4 in [4, 259] */ \
+ if r1 < 10 goto l0_%=; \
+ /* r1 in [10, 259], r0 synced to [6, 255] */ \
+ r2 = r0; /* r2 has id=1 and in [6, 255] */ \
+ if r1 < 14 goto l0_%=; \
+ /* r1 in [14, 259], r0 synced to [10, 255] */ \
+ if r0 >= 10 goto l0_%=; \
+ /* Never executed */ \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success
+__naked void scalars_neg(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 += -4; \
+ if r1 s< 0 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Same test but using BPF_SUB instead of BPF_ADD with negative immediate */
+SEC("socket")
+__success
+__naked void scalars_neg_sub(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 -= 4; \
+ if r1 s< 0 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* alu32 with negative offset */
+SEC("socket")
+__success
+__naked void scalars_neg_alu32_add(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xff; \
+ w1 = w0; \
+ w1 += -4; \
+ if w1 s< 0 goto l0_%=; \
+ if w0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* alu32 with negative offset using SUB */
+SEC("socket")
+__success
+__naked void scalars_neg_alu32_sub(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xff; \
+ w1 = w0; \
+ w1 -= 4; \
+ if w1 s< 0 goto l0_%=; \
+ if w0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Positive offset: r1 = r0 + 4, then if r1 >= 6, r0 >= 2, so r0 != 0 */
+SEC("socket")
+__success
+__naked void scalars_pos(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 += 4; \
+ if r1 < 6 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* SUB with negative immediate: r1 -= -4 is equivalent to r1 += 4 */
+SEC("socket")
+__success
+__naked void scalars_sub_neg_imm(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 -= -4; \
+ if r1 < 6 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Double ADD clears the ID (can't accumulate offsets) */
+SEC("socket")
+__failure
+__msg("div by zero")
+__naked void scalars_double_add(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 += 2; \
+ r1 += 2; \
+ if r1 < 6 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Test that sync_linked_regs() correctly handles large offset differences.
+ * r1.off = S32_MIN, r2.off = 1, delta = S32_MIN - 1 requires 64-bit math.
+ */
+SEC("socket")
+__success
+__naked void scalars_sync_delta_overflow(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r2 = r0; \
+ r1 += %[s32_min]; \
+ r2 += 1; \
+ if r2 s< 100 goto l0_%=; \
+ if r1 s< 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ [s32_min]"i"(INT_MIN)
+ : __clobber_all);
+}
+
+/*
+ * Another large delta case: r1.off = S32_MAX, r2.off = -1.
+ * delta = S32_MAX - (-1) = S32_MAX + 1 requires 64-bit math.
+ */
+SEC("socket")
+__success
+__naked void scalars_sync_delta_overflow_large_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r2 = r0; \
+ r1 += %[s32_max]; \
+ r2 += -1; \
+ if r2 s< 0 goto l0_%=; \
+ if r1 s>= 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ [s32_max]"i"(INT_MAX)
+ : __clobber_all);
+}
+
+/*
+ * Test linked scalar tracking with alu32 and large positive offset (0x7FFFFFFF).
+ * After w1 += 0x7FFFFFFF, w1 wraps to negative for any r0 >= 1.
+ * If w1 is signed-negative, then r0 >= 1, so r0 != 0.
+ */
+SEC("socket")
+__success
+__naked void scalars_alu32_big_offset(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xff; \
+ w1 = w0; \
+ w1 += 0x7FFFFFFF; \
+ if w1 s>= 0 goto l0_%=; \
+ if w0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure
+__msg("div by zero")
+__naked void scalars_alu32_basic(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ w1 += 1; \
+ if r1 > 10 goto 1f; \
+ r0 >>= 32; \
+ if r0 == 0 goto 1f; \
+ r0 /= 0; \
+1: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Test alu32 linked register tracking with wrapping.
+ * R0 is bounded to [0xffffff00, 0xffffffff] (high 32-bit values)
+ * w1 += 0x100 causes R1 to wrap to [0, 0xff]
+ *
+ * After sync_linked_regs, if bounds are computed correctly:
+ * R0 should be [0x00000000_ffffff00, 0x00000000_ffffff80]
+ * R0 >> 32 == 0, so div by zero is unreachable
+ *
+ * If bounds are computed incorrectly (64-bit underflow):
+ * R0 becomes [0xffffffff_ffffff00, 0xffffffff_ffffff80]
+ * R0 >> 32 == 0xffffffff != 0, so div by zero is reachable
+ */
+SEC("socket")
+__success
+__naked void scalars_alu32_wrap(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w0 |= 0xffffff00; \
+ r1 = r0; \
+ w1 += 0x100; \
+ if r1 > 0x80 goto l0_%=; \
+ r2 = r0; \
+ r2 >>= 32; \
+ if r2 == 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success
+void alu32_negative_offset(void)
+{
+ volatile char path[5];
+ volatile int offset = bpf_get_prandom_u32();
+ int off = offset;
+
+ if (off >= 5 && off < 10)
+ path[off - 5] = '.';
+
+ /* So compiler doesn't say: error: variable 'path' set but not used */
+ __sink(path[0]);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c
index 6af9100a37ff..38e8e9176862 100644
--- a/tools/testing/selftests/bpf/progs/verifier_lsm.c
+++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
SEC("lsm/file_permission")
@@ -159,4 +160,32 @@ __naked int disabled_hook_test3(void *ctx)
::: __clobber_all);
}
+SEC("lsm/mmap_file")
+__description("not null checking nullable pointer in bpf_lsm_mmap_file")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(no_null_check, struct file *file)
+{
+ struct inode *inode;
+
+ inode = file->f_inode;
+ __sink(inode);
+
+ return 0;
+}
+
+SEC("lsm/mmap_file")
+__description("null checking nullable pointer in bpf_lsm_mmap_file")
+__success
+int BPF_PROG(null_check, struct file *file)
+{
+ struct inode *inode;
+
+ if (file) {
+ inode = file->f_inode;
+ __sink(inode);
+ }
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
index c0ce690ddb68..3072fee9a448 100644
--- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -715,6 +715,51 @@ __naked void ignore_unique_scalar_ids_old(void)
: __clobber_all);
}
+/* Check that two registers with 0 scalar IDs in a verified state can be mapped
+ * to the same scalar ID in current state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* The states should be equivalent on reaching insn 12.
+ */
+__msg("12: safe")
+__msg("processed 17 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void two_nil_old_ids_one_cur_id(void)
+{
+ asm volatile (
+ /* Give unique scalar IDs to r{6,7} */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r6 = r0;"
+ "r6 *= 1;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r7 = r0;"
+ "r7 *= 1;"
+ "r0 = 0;"
+ /* Maybe make r{6,7} IDs identical */
+ "if r6 > r7 goto l0_%=;"
+ "goto l1_%=;"
+"l0_%=:"
+ "r6 = r7;"
+"l1_%=:"
+ /* Mark r{6,7} precise.
+ * Get here in two states:
+ * - first: r6{.id=0}, r7{.id=0} (cached state)
+ * - second: r6{.id=A}, r7{.id=A}
+ * Verifier considers such states equivalent.
+ * Thus "exit;" would be verified only once.
+ */
+ "r2 = r10;"
+ "r2 += r6;"
+ "r2 += r7;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
/* Check that two different scalar IDs in a verified state can't be
* mapped to the same scalar ID in current state.
*/
@@ -723,9 +768,9 @@ __success __log_level(2)
/* The exit instruction should be reachable from two states,
* use two matches and "processed .. insns" to ensure this.
*/
-__msg("13: (95) exit")
-__msg("13: (95) exit")
-__msg("processed 18 insns")
+__msg("15: (95) exit")
+__msg("15: (95) exit")
+__msg("processed 20 insns")
__flag(BPF_F_TEST_STATE_FREQ)
__naked void two_old_ids_one_cur_id(void)
{
@@ -734,9 +779,11 @@ __naked void two_old_ids_one_cur_id(void)
"call %[bpf_ktime_get_ns];"
"r0 &= 0xff;"
"r6 = r0;"
+ "r8 = r0;"
"call %[bpf_ktime_get_ns];"
"r0 &= 0xff;"
"r7 = r0;"
+ "r9 = r0;"
"r0 = 0;"
/* Maybe make r{6,7} IDs identical */
"if r6 > r7 goto l0_%=;"
diff --git a/tools/testing/selftests/bpf/progs/verifier_subreg.c b/tools/testing/selftests/bpf/progs/verifier_subreg.c
index 8613ea160dcd..be328100ba53 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subreg.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subreg.c
@@ -532,6 +532,74 @@ __naked void arsh32_imm_zero_extend_check(void)
}
SEC("socket")
+__description("arsh32 imm sign positive extend check")
+__success __retval(0)
+__log_level(2)
+__msg("2: (57) r6 &= 4095 ; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=4095,var_off=(0x0; 0xfff))")
+__msg("3: (67) r6 <<= 32 ; R6=scalar(smin=smin32=0,smax=umax=0xfff00000000,smax32=umax32=0,var_off=(0x0; 0xfff00000000))")
+__msg("4: (c7) r6 s>>= 32 ; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=4095,var_off=(0x0; 0xfff))")
+__naked void arsh32_imm_sign_extend_positive_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 &= 4095; \
+ r6 <<= 32; \
+ r6 s>>= 32; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("arsh32 imm sign negative extend check")
+__success __retval(0)
+__log_level(2)
+__msg("3: (17) r6 -= 4095 ; R6=scalar(smin=smin32=-4095,smax=smax32=0)")
+__msg("4: (67) r6 <<= 32 ; R6=scalar(smin=0xfffff00100000000,smax=smax32=umax32=0,umax=0xffffffff00000000,smin32=0,var_off=(0x0; 0xffffffff00000000))")
+__msg("5: (c7) r6 s>>= 32 ; R6=scalar(smin=smin32=-4095,smax=smax32=0)")
+__naked void arsh32_imm_sign_extend_negative_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 &= 4095; \
+ r6 -= 4095; \
+ r6 <<= 32; \
+ r6 s>>= 32; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("arsh32 imm sign extend check")
+__success __retval(0)
+__log_level(2)
+__msg("3: (17) r6 -= 2047 ; R6=scalar(smin=smin32=-2047,smax=smax32=2048)")
+__msg("4: (67) r6 <<= 32 ; R6=scalar(smin=0xfffff80100000000,smax=0x80000000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))")
+__msg("5: (c7) r6 s>>= 32 ; R6=scalar(smin=smin32=-2047,smax=smax32=2048)")
+__naked void arsh32_imm_sign_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 &= 4095; \
+ r6 -= 2047; \
+ r6 <<= 32; \
+ r6 s>>= 32; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
__description("end16 (to_le) reg zero extend check")
__success __success_unpriv __retval(0)
__naked void le_reg_zero_extend_check_1(void)
@@ -670,4 +738,89 @@ __naked void ldx_w_zero_extend_check(void)
: __clobber_all);
}
+SEC("socket")
+__success __success_unpriv __retval(0)
+__naked void arsh_31_and(void)
+{
+ /* Below is what LLVM generates in cilium's bpf_wiregard.o */
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w2 = w0; \
+ w2 s>>= 31; \
+ w2 &= -134; /* w2 becomes 0 or -134 */ \
+ if w2 s> -1 goto +2; \
+ /* Branch always taken because w2 = -134 */ \
+ if w2 != -136 goto +1; \
+ w0 /= 0; \
+ w0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __success_unpriv __retval(0)
+__naked void arsh_63_and(void)
+{
+ /* Copy of arsh_31 with s/w/r/ */
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ r2 <<= 32; \
+ r2 s>>= 63; \
+ r2 &= -134; \
+ if r2 s> -1 goto +2; \
+ /* Branch always taken because w2 = -134 */ \
+ if r2 != -136 goto +1; \
+ r0 /= 0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __success_unpriv __retval(0)
+__naked void arsh_31_or(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w2 = w0; \
+ w2 s>>= 31; \
+ w2 |= 134; /* w2 becomes -1 or 134 */ \
+ if w2 s> -1 goto +2; \
+ /* Branch always taken because w2 = -1 */ \
+ if w2 == -1 goto +1; \
+ w0 /= 0; \
+ w0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __success_unpriv __retval(0)
+__naked void arsh_63_or(void)
+{
+ /* Copy of arsh_31 with s/w/r/ */
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ r2 <<= 32; \
+ r2 s>>= 63; \
+ r2 |= 134; /* r2 becomes -1 or 134 */ \
+ if r2 s> -1 goto +2; \
+ /* Branch always taken because w2 = -1 */ \
+ if r2 == -1 goto +1; \
+ r0 /= 0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index 28b4f7035ceb..8ee1243e62a8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -950,4 +950,26 @@ l3_%=: r0 = 0; \
" ::: __clobber_all);
}
+SEC("socket")
+__description("unpriv: nospec after dead stack write in helper")
+__success __success_unpriv
+__retval(0)
+/* Dead code sanitizer rewrites the call to `goto -1`. */
+__naked void unpriv_dead_helper_stack_write_nospec_result(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 != 1 goto l0_%=; \
+ r2 = 0; \
+ r3 = r10; \
+ r3 += -16; \
+ r4 = 4; \
+ r5 = 0; \
+ call %[bpf_skb_load_bytes_relative]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_skb_load_bytes_relative)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
index 2129e4353fd9..4d8273c258d5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
@@ -173,14 +173,15 @@ __naked void flow_keys_illegal_variable_offset_alu(void)
asm volatile(" \
r6 = r1; \
r7 = *(u64*)(r6 + %[flow_keys_off]); \
- r8 = 8; \
- r8 /= 1; \
+ call %[bpf_get_prandom_u32]; \
+ r8 = r0; \
r8 &= 8; \
r7 += r8; \
r0 = *(u64*)(r7 + 0); \
exit; \
" :
- : __imm_const(flow_keys_off, offsetof(struct __sk_buff, flow_keys))
+ : __imm_const(flow_keys_off, offsetof(struct __sk_buff, flow_keys)),
+ __imm(bpf_get_prandom_u32)
: __clobber_all);
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp.c b/tools/testing/selftests/bpf/progs/verifier_xdp.c
index 50768ed179b3..7dc9226aeb34 100644
--- a/tools/testing/selftests/bpf/progs/verifier_xdp.c
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp.c
@@ -5,6 +5,14 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, __u64);
+ __uint(map_flags, BPF_F_RDONLY_PROG);
+} map_array_ro SEC(".maps");
+
SEC("xdp")
__description("XDP, using ifindex from netdev")
__success __retval(1)
@@ -21,4 +29,31 @@ l0_%=: exit; \
: __clobber_all);
}
+SEC("xdp")
+__description("XDP, using xdp_store_bytes from RO map")
+__success __retval(0)
+__naked void xdp_store_bytes_from_ro_map(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r0; \
+ r4 = 8; \
+ call %[bpf_xdp_store_bytes]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_xdp_store_bytes),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
index d06f6d40594a..3767f5595bbc 100644
--- a/tools/testing/selftests/bpf/progs/wq_failures.c
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -97,7 +97,7 @@ __failure
/* check that the first argument of bpf_wq_set_callback()
* is a correct bpf_wq pointer.
*/
-__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg(": (85) call bpf_wq_set_callback#") /* anchor message */
__msg("arg#0 doesn't point to a map value")
long test_wrong_wq_pointer(void *ctx)
{
@@ -123,7 +123,7 @@ __failure
/* check that the first argument of bpf_wq_set_callback()
* is a correct bpf_wq pointer.
*/
-__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg(": (85) call bpf_wq_set_callback#") /* anchor message */
__msg("off 1 doesn't point to 'struct bpf_wq' that is at 0")
long test_wrong_wq_pointer_offset(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh
deleted file mode 100755
index 515b1df0501e..000000000000
--- a/tools/testing/selftests/bpf/test_bpftool_map.sh
+++ /dev/null
@@ -1,398 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-TESTNAME="bpftool_map"
-BPF_FILE="security_bpf_map.bpf.o"
-BPF_ITER_FILE="bpf_iter_map_elem.bpf.o"
-PROTECTED_MAP_NAME="prot_map"
-NOT_PROTECTED_MAP_NAME="not_prot_map"
-BPF_FS_TMP_PARENT="/tmp"
-BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
-BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT}
-# bpftool will mount bpf file system under BPF_DIR if it is not mounted
-# under BPF_FS_PARENT.
-BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME"
-SCRIPT_DIR=$(dirname $(realpath "$0"))
-BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE"
-BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE"
-BPFTOOL_PATH="bpftool"
-# Assume the script is located under tools/testing/selftests/bpf/
-KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../)
-
-_cleanup()
-{
- set +eu
-
- # If BPF_DIR is a mount point this will not remove the mount point itself.
- [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null
-
- # Unmount if BPF filesystem was temporarily created.
- if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then
- # A loop and recursive unmount are required as bpftool might
- # create multiple mounts. For example, a bind mount of the directory
- # to itself. The bind mount is created to change mount propagation
- # flags on an actual mount point.
- max_attempts=3
- attempt=0
- while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do
- umount -R "$BPF_DIR" 2>/dev/null
- attempt=$((attempt+1))
- done
-
- # The directory still exists. Remove it now.
- [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null
- fi
-}
-
-cleanup_skip()
-{
- echo "selftests: $TESTNAME [SKIP]"
- _cleanup
-
- exit $ksft_skip
-}
-
-cleanup()
-{
- if [ "$?" = 0 ]; then
- echo "selftests: $TESTNAME [PASS]"
- else
- echo "selftests: $TESTNAME [FAILED]"
- fi
- _cleanup
-}
-
-check_root_privileges() {
- if [ $(id -u) -ne 0 ]; then
- echo "Need root privileges"
- exit $ksft_skip
- fi
-}
-
-# Function to verify bpftool path.
-# Parameters:
-# $1: bpftool path
-verify_bpftool_path() {
- local bpftool_path="$1"
- if ! "$bpftool_path" version > /dev/null 2>&1; then
- echo "Could not run test without bpftool"
- exit $ksft_skip
- fi
-}
-
-# Function to verify BTF support.
-# The test requires BTF support for fmod_ret programs.
-verify_btf_support() {
- if [ ! -f /sys/kernel/btf/vmlinux ]; then
- echo "Could not run test without BTF support"
- exit $ksft_skip
- fi
-}
-
-# Function to initialize map entries with keys [0..2] and values set to 0.
-# Parameters:
-# $1: Map name
-# $2: bpftool path
-initialize_map_entries() {
- local map_name="$1"
- local bpftool_path="$2"
-
- for key in 0 1 2; do
- "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key
- done
-}
-
-# Test read access to the map.
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: key
-access_for_read() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local key="$4"
-
- # Test read access to the map.
- if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
- echo " Read access to $key in $map_name failed"
- exit 1
- fi
-
- # Test read access to map's BTF data.
- if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then
- echo " Read access to $map_name for BTF data failed"
- exit 1
- fi
-}
-
-# Test write access to the map.
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: key
-# $5: Whether write should succeed (true/false)
-access_for_write() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local key="$4"
- local write_should_succeed="$5"
- local value="1 1 1 1"
-
- if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
- $value 2>/dev/null; then
- if [ "$write_should_succeed" = "false" ]; then
- echo " Write access to $key in $map_name succeeded but should have failed"
- exit 1
- fi
- else
- if [ "$write_should_succeed" = "true" ]; then
- echo " Write access to $key in $map_name failed but should have succeeded"
- exit 1
- fi
- fi
-}
-
-# Test entry deletion for the map.
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: key
-# $5: Whether write should succeed (true/false)
-access_for_deletion() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local key="$4"
- local write_should_succeed="$5"
- local value="1 1 1 1"
-
- # Test deletion by key for the map.
- # Before deleting, check the key exists.
- if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
- echo " Key $key does not exist in $map_name"
- exit 1
- fi
-
- # Delete by key.
- if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then
- if [ "$write_should_succeed" = "false" ]; then
- echo " Deletion for $key in $map_name succeeded but should have failed"
- exit 1
- fi
- else
- if [ "$write_should_succeed" = "true" ]; then
- echo " Deletion for $key in $map_name failed but should have succeeded"
- exit 1
- fi
- fi
-
- # After deleting, check the entry existence according to the expected status.
- if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
- if [ "$write_should_succeed" = "true" ]; then
- echo " Key $key for $map_name was not deleted but should have been deleted"
- exit 1
- fi
- else
- if [ "$write_should_succeed" = "false" ]; then
- echo "Key $key for $map_name was deleted but should have not been deleted"
- exit 1
- fi
- fi
-
- # Test creation of map's deleted entry, if deletion was successful.
- # Otherwise, the entry exists.
- if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
- $value 2>/dev/null; then
- if [ "$write_should_succeed" = "false" ]; then
- echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed"
- exit 1
- fi
- else
- if [ "$write_should_succeed" = "true" ]; then
- echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded"
- exit 1
- fi
- fi
-}
-
-# Test map elements iterator.
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: BPF_DIR
-# $5: bpf iterator object file path
-iterate_map_elem() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local bpf_dir="$4"
- local bpf_file="$5"
- local pin_path="$bpf_dir/map_iterator"
-
- "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name"
- if [ ! -f "$pin_path" ]; then
- echo " Failed to pin iterator to $pin_path"
- exit 1
- fi
-
- cat "$pin_path" 1>/dev/null
- rm "$pin_path" 2>/dev/null
-}
-
-# Function to test map access with configurable write expectations
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: key for rw
-# $5: key to delete
-# $6: Whether write should succeed (true/false)
-# $7: BPF_DIR
-# $8: bpf iterator object file path
-access_map() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local key_for_rw="$4"
- local key_to_del="$5"
- local write_should_succeed="$6"
- local bpf_dir="$7"
- local bpf_iter_file_path="$8"
-
- access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw"
- access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \
- "$write_should_succeed"
- access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \
- "$write_should_succeed"
- iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \
- "$bpf_iter_file_path"
-}
-
-# Function to test map access with configurable write expectations
-# Parameters:
-# $1: Map name
-# $2: bpftool path
-# $3: BPF_DIR
-# $4: Whether write should succeed (true/false)
-# $5: bpf iterator object file path
-test_map_access() {
- local map_name="$1"
- local bpftool_path="$2"
- local bpf_dir="$3"
- local pin_path="$bpf_dir/${map_name}_pinned"
- local write_should_succeed="$4"
- local bpf_iter_file_path="$5"
-
- # Test access to the map by name.
- access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \
- "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
-
- # Pin the map to the BPF filesystem
- "$bpftool_path" map pin name "$map_name" "$pin_path"
- if [ ! -e "$pin_path" ]; then
- echo " Failed to pin $map_name"
- exit 1
- fi
-
- # Test access to the pinned map.
- access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \
- "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
-}
-
-# Function to test map creation and map-of-maps
-# Parameters:
-# $1: bpftool path
-# $2: BPF_DIR
-test_map_creation_and_map_of_maps() {
- local bpftool_path="$1"
- local bpf_dir="$2"
- local outer_map_name="outer_map_tt"
- local inner_map_name="inner_map_tt"
-
- "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \
- value 4 entries 4 name "$inner_map_name"
- if [ ! -f "$bpf_dir/$inner_map_name" ]; then
- echo " Failed to create inner map file at $bpf_dir/$outer_map_name"
- return 1
- fi
-
- "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \
- key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name"
- if [ ! -f "$bpf_dir/$outer_map_name" ]; then
- echo " Failed to create outer map file at $bpf_dir/$outer_map_name"
- return 1
- fi
-
- # Add entries to the outer map by name and by pinned path.
- "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \
- value pinned "$bpf_dir/$inner_map_name"
- "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \
- name "$inner_map_name"
-
- # The outer map should be full by now.
- # The following map update command is expected to fail.
- if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \
- "$inner_map_name" 2>/dev/null; then
- echo " Update for $outer_map_name succeeded but should have failed"
- exit 1
- fi
-}
-
-# Function to test map access with the btf list command
-# Parameters:
-# $1: bpftool path
-test_map_access_with_btf_list() {
- local bpftool_path="$1"
-
- # The btf list command iterates over maps for
- # loaded BPF programs.
- if ! "$bpftool_path" btf list 1>/dev/null; then
- echo " Failed to access btf data"
- exit 1
- fi
-}
-
-set -eu
-
-trap cleanup_skip EXIT
-
-check_root_privileges
-
-verify_bpftool_path "$BPFTOOL_PATH"
-
-verify_btf_support
-
-trap cleanup EXIT
-
-# Load and attach the BPF programs to control maps access.
-"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach
-
-initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
-initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
-
-# Activate the map protection mechanism. Protection status is controlled
-# by a value stored in the prot_status_map at index 0.
-"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0
-
-# Test protected map (write should fail).
-test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \
- "$BPF_ITER_FILE_PATH"
-
-# Test not protected map (write should succeed).
-test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \
- "$BPF_ITER_FILE_PATH"
-
-test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR"
-
-test_map_access_with_btf_list "$BPFTOOL_PATH"
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
deleted file mode 100755
index b5520692f41b..000000000000
--- a/tools/testing/selftests/bpf/test_bpftool_metadata.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-BPF_FILE_USED="metadata_used.bpf.o"
-BPF_FILE_UNUSED="metadata_unused.bpf.o"
-
-TESTNAME=bpftool_metadata
-BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
-BPF_DIR=$BPF_FS/test_$TESTNAME
-
-_cleanup()
-{
- set +e
- rm -rf $BPF_DIR 2> /dev/null
-}
-
-cleanup_skip()
-{
- echo "selftests: $TESTNAME [SKIP]"
- _cleanup
-
- exit $ksft_skip
-}
-
-cleanup()
-{
- if [ "$?" = 0 ]; then
- echo "selftests: $TESTNAME [PASS]"
- else
- echo "selftests: $TESTNAME [FAILED]"
- fi
- _cleanup
-}
-
-if [ $(id -u) -ne 0 ]; then
- echo "selftests: $TESTNAME [SKIP] Need root privileges"
- exit $ksft_skip
-fi
-
-if [ -z "$BPF_FS" ]; then
- echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
- exit $ksft_skip
-fi
-
-if ! bpftool version > /dev/null 2>&1; then
- echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
- exit $ksft_skip
-fi
-
-set -e
-
-trap cleanup_skip EXIT
-
-mkdir $BPF_DIR
-
-trap cleanup EXIT
-
-bpftool prog load $BPF_FILE_UNUSED $BPF_DIR/unused
-
-METADATA_PLAIN="$(bpftool prog)"
-echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null
-echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null
-
-bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null
-
-bpftool map | grep 'metadata.rodata' > /dev/null
-
-rm $BPF_DIR/unused
-
-bpftool prog load $BPF_FILE_USED $BPF_DIR/used
-
-METADATA_PLAIN="$(bpftool prog)"
-echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null
-echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null
-
-bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null
-
-bpftool map | grep 'metadata.rodata' > /dev/null
-
-rm $BPF_DIR/used
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
index aeef86b3da74..45a5e41f3a92 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
@@ -63,6 +63,16 @@ BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare,
sizeof(struct bpf_testmod_test_writable_ctx)
);
+DECLARE_TRACE(bpf_testmod_fentry_test1,
+ TP_PROTO(int a),
+ TP_ARGS(a)
+);
+
+DECLARE_TRACE(bpf_testmod_fentry_test2,
+ TP_PROTO(int a, u64 b),
+ TP_ARGS(a, b)
+);
+
#endif /* _BPF_TESTMOD_EVENTS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index 1669a7eeda26..186a25ab429a 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -254,6 +254,22 @@ __bpf_kfunc int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size)
return NULL;
}
+static struct prog_test_member trusted_ptr;
+
+__bpf_kfunc struct prog_test_member *bpf_kfunc_get_default_trusted_ptr_test(void)
+{
+ return &trusted_ptr;
+}
+
+__bpf_kfunc void bpf_kfunc_put_default_trusted_ptr_test(struct prog_test_member *trusted_ptr)
+{
+ /*
+ * This BPF kfunc doesn't actually have any put/KF_ACQUIRE
+ * semantics. We're simply wanting to simulate a BPF kfunc that takes a
+ * struct prog_test_member pointer as an argument.
+ */
+}
+
__bpf_kfunc struct bpf_testmod_ctx *
bpf_testmod_ctx_create(int *err)
{
@@ -285,6 +301,12 @@ __bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx)
call_rcu(&ctx->rcu, testmod_free_cb);
}
+__bpf_kfunc void bpf_testmod_ctx_release_dtor(void *ctx)
+{
+ bpf_testmod_ctx_release(ctx);
+}
+CFI_NOSEAL(bpf_testmod_ctx_release_dtor);
+
static struct bpf_testmod_ops3 *st_ops3;
static int bpf_testmod_test_3(void)
@@ -390,11 +412,15 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg)
noinline int bpf_testmod_fentry_test1(int a)
{
+ trace_bpf_testmod_fentry_test1_tp(a);
+
return a + 1;
}
noinline int bpf_testmod_fentry_test2(int a, u64 b)
{
+ trace_bpf_testmod_fentry_test2_tp(a, b);
+
return a + b;
}
@@ -693,9 +719,9 @@ BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test)
BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_nonzero_offset_test, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_zero_offset_test, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_kfunc_nested_release_test, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test)
+BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test)
+BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test)
BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test, KF_RET_NULL | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test_nostruct, KF_RET_NULL | KF_RCU_PROTECTED)
@@ -703,11 +729,13 @@ BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1)
BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_2)
+BTF_ID_FLAGS(func, bpf_kfunc_get_default_trusted_ptr_test);
+BTF_ID_FLAGS(func, bpf_kfunc_put_default_trusted_ptr_test);
BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids)
BTF_ID_LIST(bpf_testmod_dtor_ids)
BTF_ID(struct, bpf_testmod_ctx)
-BTF_ID(func, bpf_testmod_ctx_release)
+BTF_ID(func, bpf_testmod_ctx_release_dtor)
static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
.owner = THIS_MODULE,
@@ -1134,6 +1162,38 @@ __bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args)
}
__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id);
+__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1_assoc(struct st_ops_args *args, struct bpf_prog_aux *aux);
+
+__bpf_kfunc int bpf_kfunc_implicit_arg(int a, struct bpf_prog_aux *aux);
+__bpf_kfunc int bpf_kfunc_implicit_arg_legacy(int a, int b, struct bpf_prog_aux *aux);
+__bpf_kfunc int bpf_kfunc_implicit_arg_legacy_impl(int a, int b, struct bpf_prog_aux *aux);
+
+/* hook targets */
+noinline void bpf_testmod_test_hardirq_fn(void) { barrier(); }
+noinline void bpf_testmod_test_softirq_fn(void) { barrier(); }
+
+/* Tasklet for SoftIRQ context */
+static void ctx_check_tasklet_fn(struct tasklet_struct *t)
+{
+ bpf_testmod_test_softirq_fn();
+}
+
+DECLARE_TASKLET(ctx_check_tasklet, ctx_check_tasklet_fn);
+
+/* IRQ Work for HardIRQ context */
+static void ctx_check_irq_fn(struct irq_work *work)
+{
+ bpf_testmod_test_hardirq_fn();
+ tasklet_schedule(&ctx_check_tasklet);
+}
+
+static struct irq_work ctx_check_irq = IRQ_WORK_INIT_HARD(ctx_check_irq_fn);
+
+/* The kfunc trigger */
+__bpf_kfunc void bpf_kfunc_trigger_ctx_check(void)
+{
+ irq_work_queue(&ctx_check_irq);
+}
BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
@@ -1157,7 +1217,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
@@ -1171,11 +1231,16 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10)
+BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1)
+BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1_assoc, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_implicit_arg, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_implicit_arg_legacy, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_implicit_arg_legacy_impl)
+BTF_ID_FLAGS(func, bpf_kfunc_trigger_ctx_check)
BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
static int bpf_testmod_ops_init(struct btf *btf)
@@ -1637,6 +1702,7 @@ static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id)
return NULL;
}
+/* Call test_1() of the struct_ops map identified by the id */
int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id)
{
struct bpf_testmod_multi_st_ops *st_ops;
@@ -1652,6 +1718,38 @@ int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id)
return ret;
}
+/* Call test_1() of the associated struct_ops map */
+int bpf_kfunc_multi_st_ops_test_1_assoc(struct st_ops_args *args, struct bpf_prog_aux *aux)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ int ret = -1;
+
+ st_ops = (struct bpf_testmod_multi_st_ops *)bpf_prog_get_assoc_struct_ops(aux);
+ if (st_ops)
+ ret = st_ops->test_1(args);
+
+ return ret;
+}
+
+int bpf_kfunc_implicit_arg(int a, struct bpf_prog_aux *aux)
+{
+ if (aux && a > 0)
+ return a;
+ return -EINVAL;
+}
+
+int bpf_kfunc_implicit_arg_legacy(int a, int b, struct bpf_prog_aux *aux)
+{
+ if (aux)
+ return a + b;
+ return -EINVAL;
+}
+
+int bpf_kfunc_implicit_arg_legacy_impl(int a, int b, struct bpf_prog_aux *aux)
+{
+ return bpf_kfunc_implicit_arg_legacy(a, b, aux);
+}
+
static int multi_st_ops_reg(void *kdata, struct bpf_link *link)
{
struct bpf_testmod_multi_st_ops *st_ops =
@@ -1774,6 +1872,10 @@ static void bpf_testmod_exit(void)
while (refcount_read(&prog_test_struct.cnt) > 1)
msleep(20);
+ /* Clean up irqwork and tasklet */
+ irq_work_sync(&ctx_check_irq);
+ tasklet_kill(&ctx_check_tasklet);
+
bpf_kfunc_close_sock();
sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
unregister_bpf_testmod_uprobe();
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
index 4df6fa6a92cb..d5c5454e257e 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
@@ -161,6 +161,16 @@ void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym;
struct task_struct *bpf_kfunc_ret_rcu_test(void) __ksym;
int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) __ksym;
-int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym;
+#ifndef __KERNEL__
+extern int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __weak __ksym;
+extern int bpf_kfunc_multi_st_ops_test_1_assoc(struct st_ops_args *args) __weak __ksym;
+#endif
+
+struct prog_test_member *bpf_kfunc_get_default_trusted_ptr_test(void) __ksym;
+void bpf_kfunc_put_default_trusted_ptr_test(struct prog_test_member *trusted_ptr) __ksym;
+
+void bpf_testmod_test_hardirq_fn(void);
+void bpf_testmod_test_softirq_fn(void);
+void bpf_kfunc_trigger_ctx_check(void) __ksym;
#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 9437bdd4afa5..a5576b2dfc26 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -4,6 +4,18 @@
#include <bpf/libbpf.h>
+#ifdef __x86_64__
+#define SYS_PREFIX "__x64_"
+#elif defined(__s390x__)
+#define SYS_PREFIX "__s390x_"
+#elif defined(__aarch64__)
+#define SYS_PREFIX "__arm64_"
+#elif defined(__riscv)
+#define SYS_PREFIX "__riscv_"
+#else
+#define SYS_PREFIX ""
+#endif
+
#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index c8d640802cce..9ca83dce100d 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -220,7 +220,7 @@
},
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed",
+ .errstr = "variable trusted_ptr_ access var_off=(0x0; 0x7) disallowed",
},
{
"calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index c0648dc009b5..e569d119fb60 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -81,7 +81,7 @@
},
.fixup_map_array_48b = { 1 },
.result = REJECT,
- .errstr = "direct value offset of 4294967295 is not allowed",
+ .errstr = "invalid access to map value pointer, value_size=48 off=4294967295",
},
{
"direct map access, write test 8",
@@ -141,7 +141,7 @@
},
.fixup_map_array_48b = { 1 },
.result = REJECT,
- .errstr = "direct value offset of 536870912 is not allowed",
+ .errstr = "invalid access to map value pointer, value_size=48 off=536870912",
},
{
"direct map access, write test 13",
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 59a020c35647..061d98f6e9bb 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -229,11 +229,11 @@
{
"precise: program doesn't prematurely prune branches",
.insns = {
- BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0x400),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0),
BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
BPF_ALU64_IMM(BPF_MOV, BPF_REG_8, 0),
BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0x80000000),
- BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 0x401),
BPF_JMP_IMM(BPF_JA, 0, 0, 0),
BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 2),
BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 1),
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index e962f133250c..1be1e353d40a 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -2580,7 +2580,7 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last
if (last && fmt == RESFMT_TABLE) {
output_header_underlines();
printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
- env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
+ env.files_processed, env.progs_processed, env.files_skipped, env.progs_skipped);
}
}
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
index 531228b849da..80ab60905865 100644
--- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
+++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -116,10 +116,8 @@ extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx,
/* bpf_wq implementation */
extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
-extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, void *wq),
- unsigned int flags__k, void *aux__ign) __weak __ksym;
-#define bpf_wq_set_callback(timer, cb, flags) \
- bpf_wq_set_callback_impl(timer, cb, flags, NULL)
+extern int bpf_wq_set_callback(struct bpf_wq *wq,
+ int (*callback_fn)(void *, int *, void *),
+ unsigned int flags) __weak __ksym;
#endif /* __HID_BPF_HELPERS_H */