summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/bpf/bpftool/.gitignore2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst19
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst22
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst116
-rw-r--r--tools/bpf/bpftool/Makefile36
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool110
-rw-r--r--tools/bpf/bpftool/btf.c5
-rw-r--r--tools/bpf/bpftool/btf_dumper.c199
-rw-r--r--tools/bpf/bpftool/common.c40
-rw-r--r--tools/bpf/bpftool/feature.c283
-rw-r--r--tools/bpf/bpftool/main.c10
-rw-r--r--tools/bpf/bpftool/main.h12
-rw-r--r--tools/bpf/bpftool/map.c2
-rw-r--r--tools/bpf/bpftool/prog.c458
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c119
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.h46
-rw-r--r--tools/bpf/bpftool/struct_ops.c596
-rw-r--r--tools/bpf/runqslower/runqslower.bpf.c4
-rw-r--r--tools/build/feature/Makefile9
-rw-r--r--tools/build/feature/test-clang-bpf-global-var.c4
-rw-r--r--tools/include/uapi/linux/bpf.h324
-rw-r--r--tools/include/uapi/linux/if_link.h6
-rw-r--r--tools/include/uapi/linux/types.h (renamed from tools/testing/selftests/bpf/include/uapi/linux/types.h)0
-rw-r--r--tools/lib/bpf/bpf.c37
-rw-r--r--tools/lib/bpf/bpf.h19
-rw-r--r--tools/lib/bpf/bpf_tracing.h223
-rw-r--r--tools/lib/bpf/btf.c20
-rw-r--r--tools/lib/bpf/btf_dump.c10
-rw-r--r--tools/lib/bpf/libbpf.c324
-rw-r--r--tools/lib/bpf/libbpf.h31
-rw-r--r--tools/lib/bpf/libbpf.map19
-rw-r--r--tools/lib/bpf/libbpf_probes.c1
-rw-r--r--tools/lib/bpf/netlink.c34
-rw-r--r--tools/lib/bpf/xsk.c16
-rw-r--r--tools/scripts/Makefile.include1
-rw-r--r--tools/testing/selftests/.gitignore5
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile33
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/bpf_trace_helpers.h120
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c39
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c244
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c69
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data_init.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/link_pinning.c105
-rw-r--r--tools/testing/selftests/bpf/prog_tests/modify_return.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c88
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c170
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c73
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c309
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c124
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c1635
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/vmlinux.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_attach.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c69
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c18
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c2
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c2
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c48
-rw-r--r--tools/testing/selftests/bpf/progs/modify_return.c49
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_parse_prog.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup_link.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_data.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_link_pinning.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_branches.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_buffer.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_user.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign.c204
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c98
-rw-r--r--tools/testing/selftests/bpf/progs/test_trampoline_count.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_vmlinux.c84
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c30
-rw-r--r--tools/testing/selftests/bpf/test_bpftool.py178
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool.sh5
-rw-r--r--tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c159
-rw-r--r--tools/testing/selftests/bpf/test_maps.c6
-rw-r--r--tools/testing/selftests/bpf/test_progs.c102
-rw-r--r--tools/testing/selftests/bpf/test_progs.h9
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c23
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c57
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_get_stack.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx.c105
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_skb.c47
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh151
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh28
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh384
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/extack.sh45
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh13
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh18
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh30
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/router_scale.sh53
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh68
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_ets.sh14
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh533
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh94
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh68
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh222
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py416
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh5
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh130
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh186
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh31
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh229
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh15
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh121
-rw-r--r--tools/testing/selftests/net/.gitignore5
-rw-r--r--tools/testing/selftests/net/Makefile4
-rw-r--r--tools/testing/selftests/net/config2
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh72
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh88
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh238
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh9
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh168
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh32
-rw-r--r--tools/testing/selftests/net/hwtstamp_config.c (renamed from tools/testing/selftests/networking/timestamping/hwtstamp_config.c)0
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore1
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile7
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c71
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh24
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh357
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh130
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c616
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c162
-rwxr-xr-xtools/testing/selftests/net/reuseaddr_ports_exhausted.sh35
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c (renamed from tools/testing/selftests/networking/timestamping/rxtimestamp.c)0
-rw-r--r--tools/testing/selftests/net/timestamping.c (renamed from tools/testing/selftests/networking/timestamping/timestamping.c)0
-rw-r--r--tools/testing/selftests/net/txtimestamp.c (renamed from tools/testing/selftests/networking/timestamping/txtimestamp.c)179
-rwxr-xr-xtools/testing/selftests/net/txtimestamp.sh (renamed from tools/testing/selftests/networking/timestamping/txtimestamp.sh)33
-rw-r--r--tools/testing/selftests/networking/timestamping/.gitignore4
-rw-r--r--tools/testing/selftests/networking/timestamping/Makefile11
-rw-r--r--tools/testing/selftests/networking/timestamping/config2
-rw-r--r--tools/testing/selftests/tc-testing/config6
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/basic.json902
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json185
-rw-r--r--tools/testing/vsock/vsock_test.c77
164 files changed, 13419 insertions, 1024 deletions
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index b13926432b84..8d6e8901ed2b 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,7 +1,9 @@
*.d
+/_bpftool
/bpftool
bpftool*.8
bpf-helpers.*
FEATURE-DUMP.bpftool
feature
libbpf
+profiler.skel.h
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 4d08f35034a2..b04156cfd7a3 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -19,19 +19,24 @@ SYNOPSIS
FEATURE COMMANDS
================
-| **bpftool** **feature probe** [*COMPONENT*] [**macros** [**prefix** *PREFIX*]]
+| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**macros** [**prefix** *PREFIX*]]
| **bpftool** **feature help**
|
| *COMPONENT* := { **kernel** | **dev** *NAME* }
DESCRIPTION
===========
- **bpftool feature probe** [**kernel**] [**macros** [**prefix** *PREFIX*]]
+ **bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]]
Probe the running kernel and dump a number of eBPF-related
parameters, such as availability of the **bpf()** system call,
JIT status, eBPF program types availability, eBPF helper
functions availability, and more.
+ By default, bpftool **does not run probes** for
+ **bpf_probe_write_user**\ () and **bpf_trace_printk**\()
+ helpers which print warnings to kernel logs. To enable them
+ and run all probes, the **full** keyword should be used.
+
If the **macros** keyword (but not the **-j** option) is
passed, a subset of the output is dumped as a list of
**#define** macros that are ready to be included in a C
@@ -44,16 +49,12 @@ DESCRIPTION
Keyword **kernel** can be omitted. If no probe target is
specified, probing the kernel is the default behaviour.
- Note that when probed, some eBPF helpers (e.g.
- **bpf_trace_printk**\ () or **bpf_probe_write_user**\ ()) may
- print warnings to kernel logs.
-
- **bpftool feature probe dev** *NAME* [**macros** [**prefix** *PREFIX*]]
+ **bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]]
Probe network device for supported eBPF features and dump
results to the console.
- The two keywords **macros** and **prefix** have the same
- role as when probing the kernel.
+ The keywords **full**, **macros** and **prefix** have the
+ same role as when probing the kernel.
**bpftool feature help**
Print short help message.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 64ddf8a4c518..9f19404f470e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -30,6 +30,7 @@ PROG COMMANDS
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
| **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
+| **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs*
| **bpftool** **prog help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -42,11 +43,15 @@ PROG COMMANDS
| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
-| **cgroup/getsockopt** | **cgroup/setsockopt**
+| **cgroup/getsockopt** | **cgroup/setsockopt** |
+| **struct_ops** | **fentry** | **fexit** | **freplace**
| }
| *ATTACH_TYPE* := {
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
| }
+| *METRIC* := {
+| **cycles** | **instructions** | **l1d_loads** | **llc_misses**
+| }
DESCRIPTION
@@ -188,6 +193,12 @@ DESCRIPTION
not all of them can take the **ctx_in**/**ctx_out**
arguments. bpftool does not perform checks on program types.
+ **bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs*
+ Profile *METRICs* for bpf program *PROG* for *DURATION*
+ seconds or until user hits Ctrl-C. *DURATION* is optional.
+ If *DURATION* is not specified, the profiling will run up to
+ UINT_MAX seconds.
+
**bpftool prog help**
Print short help message.
@@ -310,6 +321,15 @@ EXAMPLES
**# rm /sys/fs/bpf/xdp1**
+|
+| **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses**
+
+::
+ 51397 run_cnt
+ 40176203 cycles (83.05%)
+ 42518139 instructions # 1.06 insns per cycle (83.39%)
+ 123 llc_misses # 2.89 LLC misses per million insns (83.15%)
+
SEE ALSO
========
**bpf**\ (2),
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
new file mode 100644
index 000000000000..f045cc89dd6d
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -0,0 +1,116 @@
+==================
+bpftool-struct_ops
+==================
+-------------------------------------------------------------------------------
+tool to register/unregister/introspect BPF struct_ops
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+
+ *COMMANDS* :=
+ { **show** | **list** | **dump** | **register** | **unregister** | **help** }
+
+STRUCT_OPS COMMANDS
+===================
+
+| **bpftool** **struct_ops { show | list }** [*STRUCT_OPS_MAP*]
+| **bpftool** **struct_ops dump** [*STRUCT_OPS_MAP*]
+| **bpftool** **struct_ops register** *OBJ*
+| **bpftool** **struct_ops unregister** *STRUCT_OPS_MAP*
+| **bpftool** **struct_ops help**
+|
+| *STRUCT_OPS_MAP* := { **id** *STRUCT_OPS_MAP_ID* | **name** *STRUCT_OPS_MAP_NAME* }
+| *OBJ* := /a/file/of/bpf_struct_ops.o
+
+
+DESCRIPTION
+===========
+ **bpftool struct_ops { show | list }** [*STRUCT_OPS_MAP*]
+ Show brief information about the struct_ops in the system.
+ If *STRUCT_OPS_MAP* is specified, it shows information only
+ for the given struct_ops. Otherwise, it lists all struct_ops
+ currently existing in the system.
+
+ Output will start with struct_ops map ID, followed by its map
+ name and its struct_ops's kernel type.
+
+ **bpftool struct_ops dump** [*STRUCT_OPS_MAP*]
+ Dump details information about the struct_ops in the system.
+ If *STRUCT_OPS_MAP* is specified, it dumps information only
+ for the given struct_ops. Otherwise, it dumps all struct_ops
+ currently existing in the system.
+
+ **bpftool struct_ops register** *OBJ*
+ Register bpf struct_ops from *OBJ*. All struct_ops under
+ the ELF section ".struct_ops" will be registered to
+ its kernel subsystem.
+
+ **bpftool struct_ops unregister** *STRUCT_OPS_MAP*
+ Unregister the *STRUCT_OPS_MAP* from the kernel subsystem.
+
+ **bpftool struct_ops help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -V, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+ -d, --debug
+ Print all logs available, even debug-level information. This
+ includes logs from libbpf as well as from the verifier, when
+ attempting to load programs.
+
+EXAMPLES
+========
+**# bpftool struct_ops show**
+
+::
+
+ 100: dctcp tcp_congestion_ops
+ 105: cubic tcp_congestion_ops
+
+**# bpftool struct_ops unregister id 105**
+
+::
+
+ Unregistered tcp_congestion_ops cubic id 105
+
+**# bpftool struct_ops register bpf_cubic.o**
+
+::
+
+ Registered tcp_congestion_ops cubic id 110
+
+
+SEE ALSO
+========
+ **bpf**\ (2),
+ **bpf-helpers**\ (7),
+ **bpftool**\ (8),
+ **bpftool-prog**\ (8),
+ **bpftool-map**\ (8),
+ **bpftool-cgroup**\ (8),
+ **bpftool-feature**\ (8),
+ **bpftool-net**\ (8),
+ **bpftool-perf**\ (8),
+ **bpftool-btf**\ (8)
+ **bpftool-gen**\ (8)
+
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index c4e810335810..f584d1fdfc64 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -59,10 +59,12 @@ LIBS = $(LIBBPF) -lelf -lz
INSTALL ?= install
RM ?= rm -f
+CLANG ?= clang
FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib
-FEATURE_DISPLAY = libbfd disassembler-four-args zlib
+FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib \
+ clang-bpf-global-var
+FEATURE_DISPLAY = libbfd disassembler-four-args zlib clang-bpf-global-var
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -110,14 +112,39 @@ SRCS += $(BFD_SRCS)
endif
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
+_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o
+
+ifeq ($(feature-clang-bpf-global-var),1)
+ __OBJS = $(OBJS)
+else
+ __OBJS = $(_OBJS)
+endif
+
+$(OUTPUT)_prog.o: prog.c
+ $(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
+
+$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
+
+skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
+ $(QUIET_CLANG)$(CLANG) \
+ -I$(srctree)/tools/include/uapi/ \
+ -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
+ -g -O2 -target bpf -c $< -o $@
+
+profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
+ $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
+
+$(OUTPUT)prog.o: prog.c profiler.skel.h
+ $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
$(OUTPUT)feature.o: | zdep
-$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
+$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
$(OUTPUT)%.o: %.c
$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
@@ -125,6 +152,7 @@ $(OUTPUT)%.o: %.c
clean: $(LIBBPF)-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+ $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o
$(Q)$(RM) -r -- $(OUTPUT)libbpf/
$(call QUIET_CLEAN, core-gen)
$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 754d8395e451..45ee99b159e2 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -337,6 +337,7 @@ _bpftool()
local PROG_TYPE='id pinned tag name'
local MAP_TYPE='id pinned name'
+ local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
case $command in
show|list)
[[ $prev != "$command" ]] && return 0
@@ -388,7 +389,7 @@ _bpftool()
_bpftool_get_prog_ids
;;
name)
- _bpftool_get_map_names
+ _bpftool_get_prog_names
;;
pinned)
_filedir
@@ -469,7 +470,8 @@ _bpftool()
cgroup/recvmsg4 cgroup/recvmsg6 \
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
- cgroup/setsockopt" -- \
+ cgroup/setsockopt struct_ops \
+ fentry fexit freplace" -- \
"$cur" ) )
return 0
;;
@@ -497,9 +499,51 @@ _bpftool()
tracelog)
return 0
;;
+ profile)
+ case $cword in
+ 3)
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ 4)
+ case $prev in
+ id)
+ _bpftool_get_prog_ids
+ ;;
+ name)
+ _bpftool_get_prog_names
+ ;;
+ pinned)
+ _filedir
+ ;;
+ esac
+ return 0
+ ;;
+ 5)
+ COMPREPLY=( $( compgen -W "$METRIC_TYPE duration" -- "$cur" ) )
+ return 0
+ ;;
+ 6)
+ case $prev in
+ duration)
+ return 0
+ ;;
+ *)
+ COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ esac
+ return 0
+ ;;
+ *)
+ COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ esac
+ ;;
run)
- if [[ ${#words[@]} -lt 5 ]]; then
- _filedir
+ if [[ ${#words[@]} -eq 4 ]]; then
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
return 0
fi
case $prev in
@@ -507,6 +551,10 @@ _bpftool()
_bpftool_get_prog_ids
return 0
;;
+ name)
+ _bpftool_get_prog_names
+ return 0
+ ;;
data_in|data_out|ctx_in|ctx_out)
_filedir
return 0
@@ -524,7 +572,35 @@ _bpftool()
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'dump help pin attach detach \
- load loadall show list tracelog run' -- "$cur" ) )
+ load loadall show list tracelog run profile' -- "$cur" ) )
+ ;;
+ esac
+ ;;
+ struct_ops)
+ local STRUCT_OPS_TYPE='id name'
+ case $command in
+ show|list|dump|unregister)
+ case $prev in
+ $command)
+ COMPREPLY=( $( compgen -W "$STRUCT_OPS_TYPE" -- "$cur" ) )
+ ;;
+ id)
+ _bpftool_get_map_ids_for_type struct_ops
+ ;;
+ name)
+ _bpftool_get_map_names_for_type struct_ops
+ ;;
+ esac
+ return 0
+ ;;
+ register)
+ _filedir
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'register unregister show list dump help' \
+ -- "$cur" ) )
;;
esac
;;
@@ -712,11 +788,17 @@ _bpftool()
esac
;;
pin)
- if [[ $prev == "$command" ]]; then
- COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
- else
- _filedir
- fi
+ case $prev in
+ $command)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+ ;;
+ id)
+ _bpftool_get_map_ids
+ ;;
+ name)
+ _bpftool_get_map_names
+ ;;
+ esac
return 0
;;
event_pipe)
@@ -843,7 +925,7 @@ _bpftool()
case $command in
skeleton)
_filedir
- ;;
+ ;;
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) )
@@ -943,6 +1025,9 @@ _bpftool()
id)
_bpftool_get_prog_ids
;;
+ name)
+ _bpftool_get_prog_names
+ ;;
pinned)
_filedir
;;
@@ -983,11 +1068,12 @@ _bpftool()
probe)
[[ $prev == "prefix" ]] && return 0
if _bpftool_search_list 'macros'; then
- COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) )
+ _bpftool_once_attr 'prefix'
else
COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) )
fi
_bpftool_one_of_list 'kernel dev'
+ _bpftool_once_attr 'full'
return 0
;;
*)
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index b3745ed711ba..bcaf55b59498 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -389,6 +389,9 @@ static int dump_btf_c(const struct btf *btf,
if (IS_ERR(d))
return PTR_ERR(d);
+ printf("#ifndef __VMLINUX_H__\n");
+ printf("#define __VMLINUX_H__\n");
+ printf("\n");
printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n");
printf("#endif\n\n");
@@ -412,6 +415,8 @@ static int dump_btf_c(const struct btf *btf,
printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
printf("#pragma clang attribute pop\n");
printf("#endif\n");
+ printf("\n");
+ printf("#endif /* __VMLINUX_H__ */\n");
done:
btf_dump__free(d);
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 01cc52b834fa..497807bec675 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -4,11 +4,13 @@
#include <ctype.h>
#include <stdio.h> /* for (FILE *) used by json_writer */
#include <string.h>
+#include <unistd.h>
#include <asm/byteorder.h>
#include <linux/bitops.h>
#include <linux/btf.h>
#include <linux/err.h>
#include <bpf/btf.h>
+#include <bpf/bpf.h>
#include "json_writer.h"
#include "main.h"
@@ -22,13 +24,102 @@
static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
__u8 bit_offset, const void *data);
-static void btf_dumper_ptr(const void *data, json_writer_t *jw,
- bool is_plain_text)
+static int btf_dump_func(const struct btf *btf, char *func_sig,
+ const struct btf_type *func_proto,
+ const struct btf_type *func, int pos, int size);
+
+static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
+ const struct btf_type *func_proto,
+ __u32 prog_id)
{
- if (is_plain_text)
- jsonw_printf(jw, "%p", *(void **)data);
+ struct bpf_prog_info_linear *prog_info = NULL;
+ const struct btf_type *func_type;
+ const char *prog_name = NULL;
+ struct bpf_func_info *finfo;
+ struct btf *prog_btf = NULL;
+ struct bpf_prog_info *info;
+ int prog_fd, func_sig_len;
+ char prog_str[1024];
+
+ /* Get the ptr's func_proto */
+ func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0,
+ sizeof(prog_str));
+ if (func_sig_len == -1)
+ return -1;
+
+ if (!prog_id)
+ goto print;
+
+ /* Get the bpf_prog's name. Obtain from func_info. */
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd == -1)
+ goto print;
+
+ prog_info = bpf_program__get_prog_info_linear(prog_fd,
+ 1UL << BPF_PROG_INFO_FUNC_INFO);
+ close(prog_fd);
+ if (IS_ERR(prog_info)) {
+ prog_info = NULL;
+ goto print;
+ }
+ info = &prog_info->info;
+
+ if (!info->btf_id || !info->nr_func_info ||
+ btf__get_from_id(info->btf_id, &prog_btf))
+ goto print;
+ finfo = (struct bpf_func_info *)info->func_info;
+ func_type = btf__type_by_id(prog_btf, finfo->type_id);
+ if (!func_type || !btf_is_func(func_type))
+ goto print;
+
+ prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+print:
+ if (!prog_id)
+ snprintf(&prog_str[func_sig_len],
+ sizeof(prog_str) - func_sig_len, " 0");
+ else if (prog_name)
+ snprintf(&prog_str[func_sig_len],
+ sizeof(prog_str) - func_sig_len,
+ " %s/prog_id:%u", prog_name, prog_id);
else
- jsonw_printf(jw, "%lu", *(unsigned long *)data);
+ snprintf(&prog_str[func_sig_len],
+ sizeof(prog_str) - func_sig_len,
+ " <unknown_prog_name>/prog_id:%u", prog_id);
+
+ prog_str[sizeof(prog_str) - 1] = '\0';
+ jsonw_string(d->jw, prog_str);
+ btf__free(prog_btf);
+ free(prog_info);
+ return 0;
+}
+
+static void btf_dumper_ptr(const struct btf_dumper *d,
+ const struct btf_type *t,
+ const void *data)
+{
+ unsigned long value = *(unsigned long *)data;
+ const struct btf_type *ptr_type;
+ __s32 ptr_type_id;
+
+ if (!d->prog_id_as_func_ptr || value > UINT32_MAX)
+ goto print_ptr_value;
+
+ ptr_type_id = btf__resolve_type(d->btf, t->type);
+ if (ptr_type_id < 0)
+ goto print_ptr_value;
+ ptr_type = btf__type_by_id(d->btf, ptr_type_id);
+ if (!ptr_type || !btf_is_func_proto(ptr_type))
+ goto print_ptr_value;
+
+ if (!dump_prog_id_as_func_ptr(d, ptr_type, value))
+ return;
+
+print_ptr_value:
+ if (d->is_plain_text)
+ jsonw_printf(d->jw, "%p", (void *)value);
+ else
+ jsonw_printf(d->jw, "%lu", value);
}
static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
@@ -43,9 +134,78 @@ static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
return btf_dumper_do_type(d, actual_type_id, bit_offset, data);
}
-static void btf_dumper_enum(const void *data, json_writer_t *jw)
+static int btf_dumper_enum(const struct btf_dumper *d,
+ const struct btf_type *t,
+ const void *data)
+{
+ const struct btf_enum *enums = btf_enum(t);
+ __s64 value;
+ __u16 i;
+
+ switch (t->size) {
+ case 8:
+ value = *(__s64 *)data;
+ break;
+ case 4:
+ value = *(__s32 *)data;
+ break;
+ case 2:
+ value = *(__s16 *)data;
+ break;
+ case 1:
+ value = *(__s8 *)data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < btf_vlen(t); i++) {
+ if (value == enums[i].val) {
+ jsonw_string(d->jw,
+ btf__name_by_offset(d->btf,
+ enums[i].name_off));
+ return 0;
+ }
+ }
+
+ jsonw_int(d->jw, value);
+ return 0;
+}
+
+static bool is_str_array(const struct btf *btf, const struct btf_array *arr,
+ const char *s)
{
- jsonw_printf(jw, "%d", *(int *)data);
+ const struct btf_type *elem_type;
+ const char *end_s;
+
+ if (!arr->nelems)
+ return false;
+
+ elem_type = btf__type_by_id(btf, arr->type);
+ /* Not skipping typedef. typedef to char does not count as
+ * a string now.
+ */
+ while (elem_type && btf_is_mod(elem_type))
+ elem_type = btf__type_by_id(btf, elem_type->type);
+
+ if (!elem_type || !btf_is_int(elem_type) || elem_type->size != 1)
+ return false;
+
+ if (btf_int_encoding(elem_type) != BTF_INT_CHAR &&
+ strcmp("char", btf__name_by_offset(btf, elem_type->name_off)))
+ return false;
+
+ end_s = s + arr->nelems;
+ while (s < end_s) {
+ if (!*s)
+ return true;
+ if (*s <= 0x1f || *s >= 0x7f)
+ return false;
+ s++;
+ }
+
+ /* '\0' is not found */
+ return false;
}
static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
@@ -57,6 +217,11 @@ static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
int ret = 0;
__u32 i;
+ if (is_str_array(d->btf, arr, data)) {
+ jsonw_string(d->jw, data);
+ return 0;
+ }
+
elem_size = btf__resolve_size(d->btf, arr->type);
if (elem_size < 0)
return elem_size;
@@ -366,10 +531,9 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
case BTF_KIND_ARRAY:
return btf_dumper_array(d, type_id, data);
case BTF_KIND_ENUM:
- btf_dumper_enum(data, d->jw);
- return 0;
+ return btf_dumper_enum(d, t, data);
case BTF_KIND_PTR:
- btf_dumper_ptr(data, d->jw, d->is_plain_text);
+ btf_dumper_ptr(d, t, data);
return 0;
case BTF_KIND_UNKN:
jsonw_printf(d->jw, "(unknown)");
@@ -414,10 +578,6 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
return -1; \
} while (0)
-static int btf_dump_func(const struct btf *btf, char *func_sig,
- const struct btf_type *func_proto,
- const struct btf_type *func, int pos, int size);
-
static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
char *func_sig, int pos, int size)
{
@@ -526,8 +686,15 @@ static int btf_dump_func(const struct btf *btf, char *func_sig,
BTF_PRINT_ARG(", ");
if (arg->type) {
BTF_PRINT_TYPE(arg->type);
- BTF_PRINT_ARG("%s",
- btf__name_by_offset(btf, arg->name_off));
+ if (arg->name_off)
+ BTF_PRINT_ARG("%s",
+ btf__name_by_offset(btf, arg->name_off));
+ else if (pos && func_sig[pos - 1] == ' ')
+ /* Remove unnecessary space for
+ * FUNC_PROTO that does not have
+ * arg->name_off
+ */
+ func_sig[--pos] = '\0';
} else {
BTF_PRINT_ARG("...");
}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b75b8ec5469c..f2223dbdfb0a 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -211,39 +211,14 @@ int do_pin_fd(int fd, const char *name)
return err;
}
-int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***))
{
- unsigned int id;
- char *endptr;
int err;
int fd;
- if (argc < 3) {
- p_err("too few arguments, id ID and FILE path is required");
- return -1;
- } else if (argc > 3) {
- p_err("too many arguments");
- return -1;
- }
-
- if (!is_prefix(*argv, "id")) {
- p_err("expected 'id' got %s", *argv);
- return -1;
- }
- NEXT_ARG();
-
- id = strtoul(*argv, &endptr, 0);
- if (*endptr) {
- p_err("can't parse %s as ID", *argv);
- return -1;
- }
- NEXT_ARG();
-
- fd = get_fd_by_id(id);
- if (fd < 0) {
- p_err("can't open object by id (%u): %s", id, strerror(errno));
- return -1;
- }
+ fd = get_fd(&argc, &argv);
+ if (fd < 0)
+ return fd;
err = do_pin_fd(fd, *argv);
@@ -597,3 +572,10 @@ int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what)
return 0;
}
+
+int __printf(2, 0)
+print_all_levels(__maybe_unused enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ return vfprintf(stderr, format, args);
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 941873d778d8..88718ee6a438 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -112,18 +112,12 @@ print_start_section(const char *json_title, const char *plain_title,
}
}
-static void
-print_end_then_start_section(const char *json_title, const char *plain_title,
- const char *define_comment,
- const char *define_prefix)
+static void print_end_section(void)
{
if (json_output)
jsonw_end_object(json_wtr);
else
printf("\n");
-
- print_start_section(json_title, plain_title, define_comment,
- define_prefix);
}
/* Probing functions */
@@ -520,13 +514,38 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
}
static void
+probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
+ const char *define_prefix, unsigned int id,
+ const char *ptype_name, __u32 ifindex)
+{
+ bool res;
+
+ if (!supported_type)
+ res = false;
+ else
+ res = bpf_probe_helper(id, prog_type, ifindex);
+
+ if (json_output) {
+ if (res)
+ jsonw_string(json_wtr, helper_name[id]);
+ } else if (define_prefix) {
+ printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n",
+ define_prefix, ptype_name, helper_name[id],
+ res ? "1" : "0");
+ } else {
+ if (res)
+ printf("\n\t- %s", helper_name[id]);
+ }
+}
+
+static void
probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
- const char *define_prefix, __u32 ifindex)
+ const char *define_prefix, bool full_mode,
+ __u32 ifindex)
{
const char *ptype_name = prog_type_name[prog_type];
char feat_name[128];
unsigned int id;
- bool res;
if (ifindex)
/* Only test helpers for offload-able program types */
@@ -548,21 +567,19 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
}
for (id = 1; id < ARRAY_SIZE(helper_name); id++) {
- if (!supported_type)
- res = false;
- else
- res = bpf_probe_helper(id, prog_type, ifindex);
-
- if (json_output) {
- if (res)
- jsonw_string(json_wtr, helper_name[id]);
- } else if (define_prefix) {
- printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n",
- define_prefix, ptype_name, helper_name[id],
- res ? "1" : "0");
- } else {
- if (res)
- printf("\n\t- %s", helper_name[id]);
+ /* Skip helper functions which emit dmesg messages when not in
+ * the full mode.
+ */
+ switch (id) {
+ case BPF_FUNC_trace_printk:
+ case BPF_FUNC_probe_write_user:
+ if (!full_mode)
+ continue;
+ /* fallthrough */
+ default:
+ probe_helper_for_progtype(prog_type, supported_type,
+ define_prefix, id, ptype_name,
+ ifindex);
}
}
@@ -584,13 +601,132 @@ probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
res, define_prefix);
}
+static void
+section_system_config(enum probe_component target, const char *define_prefix)
+{
+ switch (target) {
+ case COMPONENT_KERNEL:
+ case COMPONENT_UNSPEC:
+ if (define_prefix)
+ break;
+
+ print_start_section("system_config",
+ "Scanning system configuration...",
+ NULL, /* define_comment never used here */
+ NULL); /* define_prefix always NULL here */
+ if (check_procfs()) {
+ probe_unprivileged_disabled();
+ probe_jit_enable();
+ probe_jit_harden();
+ probe_jit_kallsyms();
+ probe_jit_limit();
+ } else {
+ p_info("/* procfs not mounted, skipping related probes */");
+ }
+ probe_kernel_image_config();
+ print_end_section();
+ break;
+ default:
+ break;
+ }
+}
+
+static bool section_syscall_config(const char *define_prefix)
+{
+ bool res;
+
+ print_start_section("syscall_config",
+ "Scanning system call availability...",
+ "/*** System call availability ***/",
+ define_prefix);
+ res = probe_bpf_syscall(define_prefix);
+ print_end_section();
+
+ return res;
+}
+
+static void
+section_program_types(bool *supported_types, const char *define_prefix,
+ __u32 ifindex)
+{
+ unsigned int i;
+
+ print_start_section("program_types",
+ "Scanning eBPF program types...",
+ "/*** eBPF program types ***/",
+ define_prefix);
+
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+ probe_prog_type(i, supported_types, define_prefix, ifindex);
+
+ print_end_section();
+}
+
+static void section_map_types(const char *define_prefix, __u32 ifindex)
+{
+ unsigned int i;
+
+ print_start_section("map_types",
+ "Scanning eBPF map types...",
+ "/*** eBPF map types ***/",
+ define_prefix);
+
+ for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
+ probe_map_type(i, define_prefix, ifindex);
+
+ print_end_section();
+}
+
+static void
+section_helpers(bool *supported_types, const char *define_prefix,
+ bool full_mode, __u32 ifindex)
+{
+ unsigned int i;
+
+ print_start_section("helpers",
+ "Scanning eBPF helper functions...",
+ "/*** eBPF helper functions ***/",
+ define_prefix);
+
+ if (define_prefix)
+ printf("/*\n"
+ " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n"
+ " * to determine if <helper_name> is available for <prog_type_name>,\n"
+ " * e.g.\n"
+ " * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n"
+ " * // do stuff with this helper\n"
+ " * #elif\n"
+ " * // use a workaround\n"
+ " * #endif\n"
+ " */\n"
+ "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n"
+ " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
+ define_prefix, define_prefix, define_prefix,
+ define_prefix);
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+ probe_helpers_for_progtype(i, supported_types[i],
+ define_prefix, full_mode, ifindex);
+
+ print_end_section();
+}
+
+static void section_misc(const char *define_prefix, __u32 ifindex)
+{
+ print_start_section("misc",
+ "Scanning miscellaneous eBPF features...",
+ "/*** eBPF misc features ***/",
+ define_prefix);
+ probe_large_insn_limit(define_prefix, ifindex);
+ print_end_section();
+}
+
static int do_probe(int argc, char **argv)
{
enum probe_component target = COMPONENT_UNSPEC;
const char *define_prefix = NULL;
bool supported_types[128] = {};
+ bool full_mode = false;
__u32 ifindex = 0;
- unsigned int i;
char *ifname;
/* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
@@ -629,6 +765,9 @@ static int do_probe(int argc, char **argv)
strerror(errno));
return -1;
}
+ } else if (is_prefix(*argv, "full")) {
+ full_mode = true;
+ NEXT_ARG();
} else if (is_prefix(*argv, "macros") && !define_prefix) {
define_prefix = "";
NEXT_ARG();
@@ -658,97 +797,19 @@ static int do_probe(int argc, char **argv)
jsonw_start_object(json_wtr);
}
- switch (target) {
- case COMPONENT_KERNEL:
- case COMPONENT_UNSPEC:
- if (define_prefix)
- break;
-
- print_start_section("system_config",
- "Scanning system configuration...",
- NULL, /* define_comment never used here */
- NULL); /* define_prefix always NULL here */
- if (check_procfs()) {
- probe_unprivileged_disabled();
- probe_jit_enable();
- probe_jit_harden();
- probe_jit_kallsyms();
- probe_jit_limit();
- } else {
- p_info("/* procfs not mounted, skipping related probes */");
- }
- probe_kernel_image_config();
- if (json_output)
- jsonw_end_object(json_wtr);
- else
- printf("\n");
- break;
- default:
- break;
- }
-
- print_start_section("syscall_config",
- "Scanning system call availability...",
- "/*** System call availability ***/",
- define_prefix);
-
- if (!probe_bpf_syscall(define_prefix))
+ section_system_config(target, define_prefix);
+ if (!section_syscall_config(define_prefix))
/* bpf() syscall unavailable, don't probe other BPF features */
goto exit_close_json;
-
- print_end_then_start_section("program_types",
- "Scanning eBPF program types...",
- "/*** eBPF program types ***/",
- define_prefix);
-
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
- probe_prog_type(i, supported_types, define_prefix, ifindex);
-
- print_end_then_start_section("map_types",
- "Scanning eBPF map types...",
- "/*** eBPF map types ***/",
- define_prefix);
-
- for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
- probe_map_type(i, define_prefix, ifindex);
-
- print_end_then_start_section("helpers",
- "Scanning eBPF helper functions...",
- "/*** eBPF helper functions ***/",
- define_prefix);
-
- if (define_prefix)
- printf("/*\n"
- " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n"
- " * to determine if <helper_name> is available for <prog_type_name>,\n"
- " * e.g.\n"
- " * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n"
- " * // do stuff with this helper\n"
- " * #elif\n"
- " * // use a workaround\n"
- " * #endif\n"
- " */\n"
- "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n"
- " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
- define_prefix, define_prefix, define_prefix,
- define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
- probe_helpers_for_progtype(i, supported_types[i],
- define_prefix, ifindex);
-
- print_end_then_start_section("misc",
- "Scanning miscellaneous eBPF features...",
- "/*** eBPF misc features ***/",
- define_prefix);
- probe_large_insn_limit(define_prefix, ifindex);
+ section_program_types(supported_types, define_prefix, ifindex);
+ section_map_types(define_prefix, ifindex);
+ section_helpers(supported_types, define_prefix, full_mode, ifindex);
+ section_misc(define_prefix, ifindex);
exit_close_json:
- if (json_output) {
- /* End current "section" of probes */
- jsonw_end_object(json_wtr);
+ if (json_output)
/* End root object */
jsonw_end_object(json_wtr);
- }
return 0;
}
@@ -761,7 +822,7 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s probe [COMPONENT] [macros [prefix PREFIX]]\n"
+ "Usage: %s %s probe [COMPONENT] [full] [macros [prefix PREFIX]]\n"
" %s %s help\n"
"\n"
" COMPONENT := { kernel | dev NAME }\n"
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 6d41bbfc6459..466c269eabdd 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -58,7 +58,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n"
+ " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen | struct_ops }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@@ -79,13 +79,6 @@ static int do_version(int argc, char **argv)
return 0;
}
-static int __printf(2, 0)
-print_all_levels(__maybe_unused enum libbpf_print_level level,
- const char *format, va_list args)
-{
- return vfprintf(stderr, format, args);
-}
-
int cmd_select(const struct cmd *cmds, int argc, char **argv,
int (*help)(int argc, char **argv))
{
@@ -228,6 +221,7 @@ static const struct cmd cmds[] = {
{ "feature", do_feature },
{ "btf", do_btf },
{ "gen", do_gen },
+ { "struct_ops", do_struct_ops },
{ "version", do_version },
{ 0 }
};
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 4e75b58d3989..86f14ce26fd7 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -14,6 +14,8 @@
#include <linux/hashtable.h>
#include <tools/libc_compat.h>
+#include <bpf/libbpf.h>
+
#include "json_writer.h"
#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
@@ -76,6 +78,9 @@ static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
[BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
+ [BPF_PROG_TYPE_TRACING] = "tracing",
+ [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_PROG_TYPE_EXT] = "ext",
};
extern const char * const map_type_name[];
@@ -143,7 +148,7 @@ char *get_fdinfo(int fd, const char *key);
int open_obj_pinned(char *path, bool quiet);
int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
int mount_bpffs_for_pin(const char *name);
-int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
int do_pin_fd(int fd, const char *name);
int do_prog(int argc, char **arg);
@@ -156,6 +161,7 @@ int do_tracelog(int argc, char **arg);
int do_feature(int argc, char **argv);
int do_btf(int argc, char **argv);
int do_gen(int argc, char **argv);
+int do_struct_ops(int argc, char **argv);
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
@@ -200,6 +206,7 @@ struct btf_dumper {
const struct btf *btf;
json_writer_t *jw;
bool is_plain_text;
+ bool prog_id_as_func_ptr;
};
/* btf_dumper_type - print data along with type information
@@ -226,4 +233,7 @@ struct tcmsg;
int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb);
int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind,
const char *devname, int ifindex);
+
+int print_all_levels(__maybe_unused enum libbpf_print_level level,
+ const char *format, va_list args);
#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e6c85680b34d..693a632f6813 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1384,7 +1384,7 @@ static int do_pin(int argc, char **argv)
{
int err;
- err = do_pin_any(argc, argv, bpf_map_get_fd_by_id);
+ err = do_pin_any(argc, argv, map_parse_fd);
if (!err && json_output)
jsonw_null(json_wtr);
return err;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index b352ab041160..f6a5974a7b0a 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -4,6 +4,7 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
+#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -11,10 +12,13 @@
#include <time.h>
#include <unistd.h>
#include <net/if.h>
+#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <linux/err.h>
+#include <linux/perf_event.h>
#include <linux/sizes.h>
#include <bpf/bpf.h>
@@ -809,7 +813,7 @@ static int do_pin(int argc, char **argv)
{
int err;
- err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id);
+ err = do_pin_any(argc, argv, prog_parse_fd);
if (!err && json_output)
jsonw_null(json_wtr);
return err;
@@ -1243,6 +1247,25 @@ free_data_in:
return err;
}
+static int
+get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *expected_attach_type)
+{
+ libbpf_print_fn_t print_backup;
+ int ret;
+
+ ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type);
+ if (!ret)
+ return ret;
+
+ /* libbpf_prog_type_by_name() failed, let's re-run with debug level */
+ print_backup = libbpf_set_print(print_all_levels);
+ ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type);
+ libbpf_set_print(print_backup);
+
+ return ret;
+}
+
static int load_with_options(int argc, char **argv, bool first_prog_only)
{
enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC;
@@ -1292,8 +1315,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
strcat(type, *argv);
strcat(type, "/");
- err = libbpf_prog_type_by_name(type, &common_prog_type,
- &expected_attach_type);
+ err = get_prog_type_by_name(type, &common_prog_type,
+ &expected_attach_type);
free(type);
if (err < 0)
goto err_free_reuse_maps;
@@ -1392,8 +1415,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
if (prog_type == BPF_PROG_TYPE_UNSPEC) {
const char *sec_name = bpf_program__title(pos, false);
- err = libbpf_prog_type_by_name(sec_name, &prog_type,
- &expected_attach_type);
+ err = get_prog_type_by_name(sec_name, &prog_type,
+ &expected_attach_type);
if (err < 0)
goto err_close_obj;
}
@@ -1537,6 +1560,422 @@ static int do_loadall(int argc, char **argv)
return load_with_options(argc, argv, false);
}
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+
+static int do_profile(int argc, char **argv)
+{
+ p_err("bpftool prog profile command is not supported. Please build bpftool with clang >= 10.0.0");
+ return 0;
+}
+
+#else /* BPFTOOL_WITHOUT_SKELETONS */
+
+#include "profiler.skel.h"
+
+struct profile_metric {
+ const char *name;
+ struct bpf_perf_event_value val;
+ struct perf_event_attr attr;
+ bool selected;
+
+ /* calculate ratios like instructions per cycle */
+ const int ratio_metric; /* 0 for N/A, 1 for index 0 (cycles) */
+ const char *ratio_desc;
+ const float ratio_mul;
+} metrics[] = {
+ {
+ .name = "cycles",
+ .attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .exclude_user = 1,
+ },
+ },
+ {
+ .name = "instructions",
+ .attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .exclude_user = 1,
+ },
+ .ratio_metric = 1,
+ .ratio_desc = "insns per cycle",
+ .ratio_mul = 1.0,
+ },
+ {
+ .name = "l1d_loads",
+ .attr = {
+ .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1D |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
+ .exclude_user = 1,
+ },
+ },
+ {
+ .name = "llc_misses",
+ .attr = {
+ .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_LL |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+ .exclude_user = 1
+ },
+ .ratio_metric = 2,
+ .ratio_desc = "LLC misses per million insns",
+ .ratio_mul = 1e6,
+ },
+};
+
+static __u64 profile_total_count;
+
+#define MAX_NUM_PROFILE_METRICS 4
+
+static int profile_parse_metrics(int argc, char **argv)
+{
+ unsigned int metric_cnt;
+ int selected_cnt = 0;
+ unsigned int i;
+
+ metric_cnt = sizeof(metrics) / sizeof(struct profile_metric);
+
+ while (argc > 0) {
+ for (i = 0; i < metric_cnt; i++) {
+ if (is_prefix(argv[0], metrics[i].name)) {
+ if (!metrics[i].selected)
+ selected_cnt++;
+ metrics[i].selected = true;
+ break;
+ }
+ }
+ if (i == metric_cnt) {
+ p_err("unknown metric %s", argv[0]);
+ return -1;
+ }
+ NEXT_ARG();
+ }
+ if (selected_cnt > MAX_NUM_PROFILE_METRICS) {
+ p_err("too many (%d) metrics, please specify no more than %d metrics at at time",
+ selected_cnt, MAX_NUM_PROFILE_METRICS);
+ return -1;
+ }
+ return selected_cnt;
+}
+
+static void profile_read_values(struct profiler_bpf *obj)
+{
+ __u32 m, cpu, num_cpu = obj->rodata->num_cpu;
+ int reading_map_fd, count_map_fd;
+ __u64 counts[num_cpu];
+ __u32 key = 0;
+ int err;
+
+ reading_map_fd = bpf_map__fd(obj->maps.accum_readings);
+ count_map_fd = bpf_map__fd(obj->maps.counts);
+ if (reading_map_fd < 0 || count_map_fd < 0) {
+ p_err("failed to get fd for map");
+ return;
+ }
+
+ err = bpf_map_lookup_elem(count_map_fd, &key, counts);
+ if (err) {
+ p_err("failed to read count_map: %s", strerror(errno));
+ return;
+ }
+
+ profile_total_count = 0;
+ for (cpu = 0; cpu < num_cpu; cpu++)
+ profile_total_count += counts[cpu];
+
+ for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+ struct bpf_perf_event_value values[num_cpu];
+
+ if (!metrics[m].selected)
+ continue;
+
+ err = bpf_map_lookup_elem(reading_map_fd, &key, values);
+ if (err) {
+ p_err("failed to read reading_map: %s",
+ strerror(errno));
+ return;
+ }
+ for (cpu = 0; cpu < num_cpu; cpu++) {
+ metrics[m].val.counter += values[cpu].counter;
+ metrics[m].val.enabled += values[cpu].enabled;
+ metrics[m].val.running += values[cpu].running;
+ }
+ key++;
+ }
+}
+
+static void profile_print_readings_json(void)
+{
+ __u32 m;
+
+ jsonw_start_array(json_wtr);
+ for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+ if (!metrics[m].selected)
+ continue;
+ jsonw_start_object(json_wtr);
+ jsonw_string_field(json_wtr, "metric", metrics[m].name);
+ jsonw_lluint_field(json_wtr, "run_cnt", profile_total_count);
+ jsonw_lluint_field(json_wtr, "value", metrics[m].val.counter);
+ jsonw_lluint_field(json_wtr, "enabled", metrics[m].val.enabled);
+ jsonw_lluint_field(json_wtr, "running", metrics[m].val.running);
+
+ jsonw_end_object(json_wtr);
+ }
+ jsonw_end_array(json_wtr);
+}
+
+static void profile_print_readings_plain(void)
+{
+ __u32 m;
+
+ printf("\n%18llu %-20s\n", profile_total_count, "run_cnt");
+ for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+ struct bpf_perf_event_value *val = &metrics[m].val;
+ int r;
+
+ if (!metrics[m].selected)
+ continue;
+ printf("%18llu %-20s", val->counter, metrics[m].name);
+
+ r = metrics[m].ratio_metric - 1;
+ if (r >= 0 && metrics[r].selected &&
+ metrics[r].val.counter > 0) {
+ printf("# %8.2f %-30s",
+ val->counter * metrics[m].ratio_mul /
+ metrics[r].val.counter,
+ metrics[m].ratio_desc);
+ } else {
+ printf("%-41s", "");
+ }
+
+ if (val->enabled > val->running)
+ printf("(%4.2f%%)",
+ val->running * 100.0 / val->enabled);
+ printf("\n");
+ }
+}
+
+static void profile_print_readings(void)
+{
+ if (json_output)
+ profile_print_readings_json();
+ else
+ profile_print_readings_plain();
+}
+
+static char *profile_target_name(int tgt_fd)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_func_info *func_info;
+ const struct btf_type *t;
+ char *name = NULL;
+ struct btf *btf;
+
+ info_linear = bpf_program__get_prog_info_linear(
+ tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ p_err("failed to get info_linear for prog FD %d", tgt_fd);
+ return NULL;
+ }
+
+ if (info_linear->info.btf_id == 0 ||
+ btf__get_from_id(info_linear->info.btf_id, &btf)) {
+ p_err("prog FD %d doesn't have valid btf", tgt_fd);
+ goto out;
+ }
+
+ func_info = (struct bpf_func_info *)(info_linear->info.func_info);
+ t = btf__type_by_id(btf, func_info[0].type_id);
+ if (!t) {
+ p_err("btf %d doesn't have type %d",
+ info_linear->info.btf_id, func_info[0].type_id);
+ goto out;
+ }
+ name = strdup(btf__name_by_offset(btf, t->name_off));
+out:
+ free(info_linear);
+ return name;
+}
+
+static struct profiler_bpf *profile_obj;
+static int profile_tgt_fd = -1;
+static char *profile_tgt_name;
+static int *profile_perf_events;
+static int profile_perf_event_cnt;
+
+static void profile_close_perf_events(struct profiler_bpf *obj)
+{
+ int i;
+
+ for (i = profile_perf_event_cnt - 1; i >= 0; i--)
+ close(profile_perf_events[i]);
+
+ free(profile_perf_events);
+ profile_perf_event_cnt = 0;
+}
+
+static int profile_open_perf_events(struct profiler_bpf *obj)
+{
+ unsigned int cpu, m;
+ int map_fd, pmu_fd;
+
+ profile_perf_events = calloc(
+ sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric);
+ if (!profile_perf_events) {
+ p_err("failed to allocate memory for perf_event array: %s",
+ strerror(errno));
+ return -1;
+ }
+ map_fd = bpf_map__fd(obj->maps.events);
+ if (map_fd < 0) {
+ p_err("failed to get fd for events map");
+ return -1;
+ }
+
+ for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+ if (!metrics[m].selected)
+ continue;
+ for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) {
+ pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr,
+ -1/*pid*/, cpu, -1/*group_fd*/, 0);
+ if (pmu_fd < 0 ||
+ bpf_map_update_elem(map_fd, &profile_perf_event_cnt,
+ &pmu_fd, BPF_ANY) ||
+ ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
+ p_err("failed to create event %s on cpu %d",
+ metrics[m].name, cpu);
+ return -1;
+ }
+ profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
+ }
+ }
+ return 0;
+}
+
+static void profile_print_and_cleanup(void)
+{
+ profile_close_perf_events(profile_obj);
+ profile_read_values(profile_obj);
+ profile_print_readings();
+ profiler_bpf__destroy(profile_obj);
+
+ close(profile_tgt_fd);
+ free(profile_tgt_name);
+}
+
+static void int_exit(int signo)
+{
+ profile_print_and_cleanup();
+ exit(0);
+}
+
+static int do_profile(int argc, char **argv)
+{
+ int num_metric, num_cpu, err = -1;
+ struct bpf_program *prog;
+ unsigned long duration;
+ char *endptr;
+
+ /* we at least need two args for the prog and one metric */
+ if (!REQ_ARGS(3))
+ return -EINVAL;
+
+ /* parse target fd */
+ profile_tgt_fd = prog_parse_fd(&argc, &argv);
+ if (profile_tgt_fd < 0) {
+ p_err("failed to parse fd");
+ return -1;
+ }
+
+ /* parse profiling optional duration */
+ if (argc > 2 && is_prefix(argv[0], "duration")) {
+ NEXT_ARG();
+ duration = strtoul(*argv, &endptr, 0);
+ if (*endptr)
+ usage();
+ NEXT_ARG();
+ } else {
+ duration = UINT_MAX;
+ }
+
+ num_metric = profile_parse_metrics(argc, argv);
+ if (num_metric <= 0)
+ goto out;
+
+ num_cpu = libbpf_num_possible_cpus();
+ if (num_cpu <= 0) {
+ p_err("failed to identify number of CPUs");
+ goto out;
+ }
+
+ profile_obj = profiler_bpf__open();
+ if (!profile_obj) {
+ p_err("failed to open and/or load BPF object");
+ goto out;
+ }
+
+ profile_obj->rodata->num_cpu = num_cpu;
+ profile_obj->rodata->num_metric = num_metric;
+
+ /* adjust map sizes */
+ bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu);
+ bpf_map__resize(profile_obj->maps.fentry_readings, num_metric);
+ bpf_map__resize(profile_obj->maps.accum_readings, num_metric);
+ bpf_map__resize(profile_obj->maps.counts, 1);
+
+ /* change target name */
+ profile_tgt_name = profile_target_name(profile_tgt_fd);
+ if (!profile_tgt_name)
+ goto out;
+
+ bpf_object__for_each_program(prog, profile_obj->obj) {
+ err = bpf_program__set_attach_target(prog, profile_tgt_fd,
+ profile_tgt_name);
+ if (err) {
+ p_err("failed to set attach target\n");
+ goto out;
+ }
+ }
+
+ set_max_rlimit();
+ err = profiler_bpf__load(profile_obj);
+ if (err) {
+ p_err("failed to load profile_obj");
+ goto out;
+ }
+
+ err = profile_open_perf_events(profile_obj);
+ if (err)
+ goto out;
+
+ err = profiler_bpf__attach(profile_obj);
+ if (err) {
+ p_err("failed to attach profile_obj");
+ goto out;
+ }
+ signal(SIGINT, int_exit);
+
+ sleep(duration);
+ profile_print_and_cleanup();
+ return 0;
+
+out:
+ profile_close_perf_events(profile_obj);
+ if (profile_obj)
+ profiler_bpf__destroy(profile_obj);
+ close(profile_tgt_fd);
+ free(profile_tgt_name);
+ return err;
+}
+
+#endif /* BPFTOOL_WITHOUT_SKELETONS */
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -1560,6 +1999,7 @@ static int do_help(int argc, char **argv)
" [data_out FILE [data_size_out L]] \\\n"
" [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n"
" [repeat N]\n"
+ " %s %s profile PROG [duration DURATION] METRICs\n"
" %s %s tracelog\n"
" %s %s help\n"
"\n"
@@ -1573,16 +2013,17 @@ static int do_help(int argc, char **argv)
" cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
" cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
" cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
- " cgroup/recvmsg6 | cgroup/getsockopt |\n"
- " cgroup/setsockopt }\n"
+ " cgroup/recvmsg6 | cgroup/getsockopt | cgroup/setsockopt |\n"
+ " struct_ops | fentry | fexit | freplace }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
+ " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2]);
+ bin_name, argv[-2], bin_name, argv[-2]);
return 0;
}
@@ -1599,6 +2040,7 @@ static const struct cmd cmds[] = {
{ "detach", do_detach },
{ "tracelog", do_tracelog },
{ "run", do_run },
+ { "profile", do_profile },
{ 0 }
};
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
new file mode 100644
index 000000000000..20034c12f7c5
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "profiler.h"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* map of perf event fds, num_cpu * num_metric entries */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+/* readings at fentry */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+} fentry_readings SEC(".maps");
+
+/* accumulated readings */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+} accum_readings SEC(".maps");
+
+/* sample counts, one per cpu */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u64));
+} counts SEC(".maps");
+
+const volatile __u32 num_cpu = 1;
+const volatile __u32 num_metric = 1;
+#define MAX_NUM_MATRICS 4
+
+SEC("fentry/XXX")
+int BPF_PROG(fentry_XXX)
+{
+ struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];
+ u32 key = bpf_get_smp_processor_id();
+ u32 i;
+
+ /* look up before reading, to reduce error */
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ u32 flag = i;
+
+ ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag);
+ if (!ptrs[i])
+ return 0;
+ }
+
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ struct bpf_perf_event_value reading;
+ int err;
+
+ err = bpf_perf_event_read_value(&events, key, &reading,
+ sizeof(reading));
+ if (err)
+ return 0;
+ *(ptrs[i]) = reading;
+ key += num_cpu;
+ }
+
+ return 0;
+}
+
+static inline void
+fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
+{
+ struct bpf_perf_event_value *before, diff, *accum;
+
+ before = bpf_map_lookup_elem(&fentry_readings, &id);
+ /* only account samples with a valid fentry_reading */
+ if (before && before->counter) {
+ struct bpf_perf_event_value *accum;
+
+ diff.counter = after->counter - before->counter;
+ diff.enabled = after->enabled - before->enabled;
+ diff.running = after->running - before->running;
+
+ accum = bpf_map_lookup_elem(&accum_readings, &id);
+ if (accum) {
+ accum->counter += diff.counter;
+ accum->enabled += diff.enabled;
+ accum->running += diff.running;
+ }
+ }
+}
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+ struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 i, one = 1, zero = 0;
+ int err;
+ u64 *count;
+
+ /* read all events before updating the maps, to reduce error */
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,
+ readings + i, sizeof(*readings));
+ if (err)
+ return 0;
+ }
+ count = bpf_map_lookup_elem(&counts, &zero);
+ if (count) {
+ *count += 1;
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++)
+ fexit_update_maps(i, &readings[i]);
+ }
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h
new file mode 100644
index 000000000000..1f767e9510f7
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/profiler.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __PROFILER_H
+#define __PROFILER_H
+
+/* useful typedefs from vmlinux.h */
+
+typedef signed char __s8;
+typedef unsigned char __u8;
+typedef short int __s16;
+typedef short unsigned int __u16;
+typedef int __s32;
+typedef unsigned int __u32;
+typedef long long int __s64;
+typedef long long unsigned int __u64;
+
+typedef __s8 s8;
+typedef __u8 u8;
+typedef __s16 s16;
+typedef __u16 u16;
+typedef __s32 s32;
+typedef __u32 u32;
+typedef __s64 s64;
+typedef __u64 u64;
+
+enum {
+ false = 0,
+ true = 1,
+};
+
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+
+typedef __u16 __bitwise__ __le16;
+typedef __u16 __bitwise__ __be16;
+typedef __u32 __bitwise__ __le32;
+typedef __u32 __bitwise__ __be32;
+typedef __u64 __bitwise__ __le64;
+typedef __u64 __bitwise__ __be64;
+
+typedef __u16 __bitwise__ __sum16;
+typedef __u32 __bitwise__ __wsum;
+
+#endif /* __PROFILER_H */
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
new file mode 100644
index 000000000000..2a7befbd11ad
--- /dev/null
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -0,0 +1,596 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <linux/err.h>
+
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
+
+static const struct btf_type *map_info_type;
+static __u32 map_info_alloc_len;
+static struct btf *btf_vmlinux;
+static __s32 map_info_type_id;
+
+struct res {
+ unsigned int nr_maps;
+ unsigned int nr_errs;
+};
+
+static const struct btf *get_btf_vmlinux(void)
+{
+ if (btf_vmlinux)
+ return btf_vmlinux;
+
+ btf_vmlinux = libbpf_find_kernel_btf();
+ if (IS_ERR(btf_vmlinux))
+ p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y");
+
+ return btf_vmlinux;
+}
+
+static const char *get_kern_struct_ops_name(const struct bpf_map_info *info)
+{
+ const struct btf *kern_btf;
+ const struct btf_type *t;
+ const char *st_ops_name;
+
+ kern_btf = get_btf_vmlinux();
+ if (IS_ERR(kern_btf))
+ return "<btf_vmlinux_not_found>";
+
+ t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id);
+ st_ops_name = btf__name_by_offset(kern_btf, t->name_off);
+ st_ops_name += strlen(STRUCT_OPS_VALUE_PREFIX);
+
+ return st_ops_name;
+}
+
+static __s32 get_map_info_type_id(void)
+{
+ const struct btf *kern_btf;
+
+ if (map_info_type_id)
+ return map_info_type_id;
+
+ kern_btf = get_btf_vmlinux();
+ if (IS_ERR(kern_btf)) {
+ map_info_type_id = PTR_ERR(kern_btf);
+ return map_info_type_id;
+ }
+
+ map_info_type_id = btf__find_by_name_kind(kern_btf, "bpf_map_info",
+ BTF_KIND_STRUCT);
+ if (map_info_type_id < 0) {
+ p_err("can't find bpf_map_info from btf_vmlinux");
+ return map_info_type_id;
+ }
+ map_info_type = btf__type_by_id(kern_btf, map_info_type_id);
+
+ /* Ensure map_info_alloc() has at least what the bpftool needs */
+ map_info_alloc_len = map_info_type->size;
+ if (map_info_alloc_len < sizeof(struct bpf_map_info))
+ map_info_alloc_len = sizeof(struct bpf_map_info);
+
+ return map_info_type_id;
+}
+
+/* If the subcmd needs to print out the bpf_map_info,
+ * it should always call map_info_alloc to allocate
+ * a bpf_map_info object instead of allocating it
+ * on the stack.
+ *
+ * map_info_alloc() will take the running kernel's btf
+ * into account. i.e. it will consider the
+ * sizeof(struct bpf_map_info) of the running kernel.
+ *
+ * It will enable the "struct_ops" cmd to print the latest
+ * "struct bpf_map_info".
+ *
+ * [ Recall that "struct_ops" requires the kernel's btf to
+ * be available ]
+ */
+static struct bpf_map_info *map_info_alloc(__u32 *alloc_len)
+{
+ struct bpf_map_info *info;
+
+ if (get_map_info_type_id() < 0)
+ return NULL;
+
+ info = calloc(1, map_info_alloc_len);
+ if (!info)
+ p_err("mem alloc failed");
+ else
+ *alloc_len = map_info_alloc_len;
+
+ return info;
+}
+
+/* It iterates all struct_ops maps of the system.
+ * It returns the fd in "*res_fd" and map_info in "*info".
+ * In the very first iteration, info->id should be 0.
+ * An optional map "*name" filter can be specified.
+ * The filter can be made more flexible in the future.
+ * e.g. filter by kernel-struct-ops-name, regex-name, glob-name, ...etc.
+ *
+ * Return value:
+ * 1: A struct_ops map found. It is returned in "*res_fd" and "*info".
+ * The caller can continue to call get_next in the future.
+ * 0: No struct_ops map is returned.
+ * All struct_ops map has been found.
+ * -1: Error and the caller should abort the iteration.
+ */
+static int get_next_struct_ops_map(const char *name, int *res_fd,
+ struct bpf_map_info *info, __u32 info_len)
+{
+ __u32 id = info->id;
+ int err, fd;
+
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ return 0;
+ p_err("can't get next map: %s", strerror(errno));
+ return -1;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ p_err("can't get map by id (%u): %s",
+ id, strerror(errno));
+ return -1;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, info, &info_len);
+ if (err) {
+ p_err("can't get map info: %s", strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if (info->type == BPF_MAP_TYPE_STRUCT_OPS &&
+ (!name || !strcmp(name, info->name))) {
+ *res_fd = fd;
+ return 1;
+ }
+ close(fd);
+ }
+}
+
+static int cmd_retval(const struct res *res, bool must_have_one_map)
+{
+ if (res->nr_errs || (!res->nr_maps && must_have_one_map))
+ return -1;
+
+ return 0;
+}
+
+/* "data" is the work_func private storage */
+typedef int (*work_func)(int fd, const struct bpf_map_info *info, void *data,
+ struct json_writer *wtr);
+
+/* Find all struct_ops map in the system.
+ * Filter out by "name" (if specified).
+ * Then call "func(fd, info, data, wtr)" on each struct_ops map found.
+ */
+static struct res do_search(const char *name, work_func func, void *data,
+ struct json_writer *wtr)
+{
+ struct bpf_map_info *info;
+ struct res res = {};
+ __u32 info_len;
+ int fd, err;
+
+ info = map_info_alloc(&info_len);
+ if (!info) {
+ res.nr_errs++;
+ return res;
+ }
+
+ if (wtr)
+ jsonw_start_array(wtr);
+ while ((err = get_next_struct_ops_map(name, &fd, info, info_len)) == 1) {
+ res.nr_maps++;
+ err = func(fd, info, data, wtr);
+ if (err)
+ res.nr_errs++;
+ close(fd);
+ }
+ if (wtr)
+ jsonw_end_array(wtr);
+
+ if (err)
+ res.nr_errs++;
+
+ if (!wtr && name && !res.nr_errs && !res.nr_maps)
+ /* It is not printing empty [].
+ * Thus, needs to specifically say nothing found
+ * for "name" here.
+ */
+ p_err("no struct_ops found for %s", name);
+ else if (!wtr && json_output && !res.nr_errs)
+ /* The "func()" above is not writing any json (i.e. !wtr
+ * test here).
+ *
+ * However, "-j" is enabled and there is no errs here,
+ * so call json_null() as the current convention of
+ * other cmds.
+ */
+ jsonw_null(json_wtr);
+
+ free(info);
+ return res;
+}
+
+static struct res do_one_id(const char *id_str, work_func func, void *data,
+ struct json_writer *wtr)
+{
+ struct bpf_map_info *info;
+ struct res res = {};
+ unsigned long id;
+ __u32 info_len;
+ char *endptr;
+ int fd;
+
+ id = strtoul(id_str, &endptr, 0);
+ if (*endptr || !id || id > UINT32_MAX) {
+ p_err("invalid id %s", id_str);
+ res.nr_errs++;
+ return res;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd == -1) {
+ p_err("can't get map by id (%lu): %s", id, strerror(errno));
+ res.nr_errs++;
+ return res;
+ }
+
+ info = map_info_alloc(&info_len);
+ if (!info) {
+ res.nr_errs++;
+ goto done;
+ }
+
+ if (bpf_obj_get_info_by_fd(fd, info, &info_len)) {
+ p_err("can't get map info: %s", strerror(errno));
+ res.nr_errs++;
+ goto done;
+ }
+
+ if (info->type != BPF_MAP_TYPE_STRUCT_OPS) {
+ p_err("%s id %u is not a struct_ops map", info->name, info->id);
+ res.nr_errs++;
+ goto done;
+ }
+
+ res.nr_maps++;
+
+ if (func(fd, info, data, wtr))
+ res.nr_errs++;
+ else if (!wtr && json_output)
+ /* The "func()" above is not writing any json (i.e. !wtr
+ * test here).
+ *
+ * However, "-j" is enabled and there is no errs here,
+ * so call json_null() as the current convention of
+ * other cmds.
+ */
+ jsonw_null(json_wtr);
+
+done:
+ free(info);
+ close(fd);
+
+ return res;
+}
+
+static struct res do_work_on_struct_ops(const char *search_type,
+ const char *search_term,
+ work_func func, void *data,
+ struct json_writer *wtr)
+{
+ if (search_type) {
+ if (is_prefix(search_type, "id"))
+ return do_one_id(search_term, func, data, wtr);
+ else if (!is_prefix(search_type, "name"))
+ usage();
+ }
+
+ return do_search(search_term, func, data, wtr);
+}
+
+static int __do_show(int fd, const struct bpf_map_info *info, void *data,
+ struct json_writer *wtr)
+{
+ if (wtr) {
+ jsonw_start_object(wtr);
+ jsonw_uint_field(wtr, "id", info->id);
+ jsonw_string_field(wtr, "name", info->name);
+ jsonw_string_field(wtr, "kernel_struct_ops",
+ get_kern_struct_ops_name(info));
+ jsonw_end_object(wtr);
+ } else {
+ printf("%u: %-15s %-32s\n", info->id, info->name,
+ get_kern_struct_ops_name(info));
+ }
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ const char *search_type = NULL, *search_term = NULL;
+ struct res res;
+
+ if (argc && argc != 2)
+ usage();
+
+ if (argc == 2) {
+ search_type = GET_ARG();
+ search_term = GET_ARG();
+ }
+
+ res = do_work_on_struct_ops(search_type, search_term, __do_show,
+ NULL, json_wtr);
+
+ return cmd_retval(&res, !!search_term);
+}
+
+static int __do_dump(int fd, const struct bpf_map_info *info, void *data,
+ struct json_writer *wtr)
+{
+ struct btf_dumper *d = (struct btf_dumper *)data;
+ const struct btf_type *struct_ops_type;
+ const struct btf *kern_btf = d->btf;
+ const char *struct_ops_name;
+ int zero = 0;
+ void *value;
+
+ /* note: d->jw == wtr */
+
+ kern_btf = d->btf;
+
+ /* The kernel supporting BPF_MAP_TYPE_STRUCT_OPS must have
+ * btf_vmlinux_value_type_id.
+ */
+ struct_ops_type = btf__type_by_id(kern_btf,
+ info->btf_vmlinux_value_type_id);
+ struct_ops_name = btf__name_by_offset(kern_btf,
+ struct_ops_type->name_off);
+ value = calloc(1, info->value_size);
+ if (!value) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+
+ if (bpf_map_lookup_elem(fd, &zero, value)) {
+ p_err("can't lookup struct_ops map %s id %u",
+ info->name, info->id);
+ free(value);
+ return -1;
+ }
+
+ jsonw_start_object(wtr);
+ jsonw_name(wtr, "bpf_map_info");
+ btf_dumper_type(d, map_info_type_id, (void *)info);
+ jsonw_end_object(wtr);
+
+ jsonw_start_object(wtr);
+ jsonw_name(wtr, struct_ops_name);
+ btf_dumper_type(d, info->btf_vmlinux_value_type_id, value);
+ jsonw_end_object(wtr);
+
+ free(value);
+
+ return 0;
+}
+
+static int do_dump(int argc, char **argv)
+{
+ const char *search_type = NULL, *search_term = NULL;
+ json_writer_t *wtr = json_wtr;
+ const struct btf *kern_btf;
+ struct btf_dumper d = {};
+ struct res res;
+
+ if (argc && argc != 2)
+ usage();
+
+ if (argc == 2) {
+ search_type = GET_ARG();
+ search_term = GET_ARG();
+ }
+
+ kern_btf = get_btf_vmlinux();
+ if (IS_ERR(kern_btf))
+ return -1;
+
+ if (!json_output) {
+ wtr = jsonw_new(stdout);
+ if (!wtr) {
+ p_err("can't create json writer");
+ return -1;
+ }
+ jsonw_pretty(wtr, true);
+ }
+
+ d.btf = kern_btf;
+ d.jw = wtr;
+ d.is_plain_text = !json_output;
+ d.prog_id_as_func_ptr = true;
+
+ res = do_work_on_struct_ops(search_type, search_term, __do_dump, &d,
+ wtr);
+
+ if (!json_output)
+ jsonw_destroy(&wtr);
+
+ return cmd_retval(&res, !!search_term);
+}
+
+static int __do_unregister(int fd, const struct bpf_map_info *info, void *data,
+ struct json_writer *wtr)
+{
+ int zero = 0;
+
+ if (bpf_map_delete_elem(fd, &zero)) {
+ p_err("can't unload %s %s id %u: %s",
+ get_kern_struct_ops_name(info), info->name,
+ info->id, strerror(errno));
+ return -1;
+ }
+
+ p_info("Unregistered %s %s id %u",
+ get_kern_struct_ops_name(info), info->name,
+ info->id);
+
+ return 0;
+}
+
+static int do_unregister(int argc, char **argv)
+{
+ const char *search_type, *search_term;
+ struct res res;
+
+ if (argc != 2)
+ usage();
+
+ search_type = GET_ARG();
+ search_term = GET_ARG();
+
+ res = do_work_on_struct_ops(search_type, search_term,
+ __do_unregister, NULL, NULL);
+
+ return cmd_retval(&res, true);
+}
+
+static int do_register(int argc, char **argv)
+{
+ const struct bpf_map_def *def;
+ struct bpf_map_info info = {};
+ __u32 info_len = sizeof(info);
+ int nr_errs = 0, nr_maps = 0;
+ struct bpf_object *obj;
+ struct bpf_link *link;
+ struct bpf_map *map;
+ const char *file;
+
+ if (argc != 1)
+ usage();
+
+ file = GET_ARG();
+
+ obj = bpf_object__open(file);
+ if (IS_ERR_OR_NULL(obj))
+ return -1;
+
+ set_max_rlimit();
+
+ if (bpf_object__load(obj)) {
+ bpf_object__close(obj);
+ return -1;
+ }
+
+ bpf_object__for_each_map(map, obj) {
+ def = bpf_map__def(map);
+ if (def->type != BPF_MAP_TYPE_STRUCT_OPS)
+ continue;
+
+ link = bpf_map__attach_struct_ops(map);
+ if (IS_ERR(link)) {
+ p_err("can't register struct_ops %s: %s",
+ bpf_map__name(map),
+ strerror(-PTR_ERR(link)));
+ nr_errs++;
+ continue;
+ }
+ nr_maps++;
+
+ bpf_link__disconnect(link);
+ bpf_link__destroy(link);
+
+ if (!bpf_obj_get_info_by_fd(bpf_map__fd(map), &info,
+ &info_len))
+ p_info("Registered %s %s id %u",
+ get_kern_struct_ops_name(&info),
+ bpf_map__name(map),
+ info.id);
+ else
+ /* Not p_err. The struct_ops was attached
+ * successfully.
+ */
+ p_info("Registered %s but can't find id: %s",
+ bpf_map__name(map), strerror(errno));
+ }
+
+ bpf_object__close(obj);
+
+ if (nr_errs)
+ return -1;
+
+ if (!nr_maps) {
+ p_err("no struct_ops found in %s", file);
+ return -1;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s %s { show | list } [STRUCT_OPS_MAP]\n"
+ " %s %s dump [STRUCT_OPS_MAP]\n"
+ " %s %s register OBJ\n"
+ " %s %s unregister STRUCT_OPS_MAP\n"
+ " %s %s help\n"
+ "\n"
+ " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
+ " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n",
+ bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "register", do_register },
+ { "unregister", do_unregister },
+ { "dump", do_dump },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_struct_ops(int argc, char **argv)
+{
+ int err;
+
+ err = cmd_select(cmds, argc, argv, do_help);
+
+ btf__free(btf_vmlinux);
+ return err;
+}
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index 48a39f72fadf..1f18a409f044 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -5,9 +5,7 @@
#include "runqslower.h"
#define TASK_RUNNING 0
-
-#define BPF_F_INDEX_MASK 0xffffffffULL
-#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+#define BPF_F_CURRENT_CPU 0xffffffffULL
const volatile __u64 min_us = 0;
const volatile pid_t targ_pid = 0;
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 7ac0d8088565..ab8e89a7009c 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -67,7 +67,8 @@ FILES= \
test-llvm.bin \
test-llvm-version.bin \
test-libaio.bin \
- test-libzstd.bin
+ test-libzstd.bin \
+ test-clang-bpf-global-var.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
@@ -75,6 +76,7 @@ CC ?= $(CROSS_COMPILE)gcc
CXX ?= $(CROSS_COMPILE)g++
PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config
+CLANG ?= clang
all: $(FILES)
@@ -321,6 +323,11 @@ $(OUTPUT)test-libaio.bin:
$(OUTPUT)test-libzstd.bin:
$(BUILD) -lzstd
+$(OUTPUT)test-clang-bpf-global-var.bin:
+ $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
+ grep BTF_KIND_VAR
+
+
###############################
clean:
diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c
new file mode 100644
index 000000000000..221f1481d52e
--- /dev/null
+++ b/tools/build/feature/test-clang-bpf-global-var.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+volatile int global_value_for_test = 1;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 22f235260a3a..2e29a671d67e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -73,7 +73,7 @@ struct bpf_insn {
/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
- __u8 data[0]; /* Arbitrary size */
+ __u8 data[]; /* Arbitrary size */
};
struct bpf_cgroup_storage_key {
@@ -111,6 +111,8 @@ enum bpf_cmd {
BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH,
+ BPF_LINK_CREATE,
+ BPF_LINK_UPDATE,
};
enum bpf_map_type {
@@ -181,6 +183,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_TRACING,
BPF_PROG_TYPE_STRUCT_OPS,
BPF_PROG_TYPE_EXT,
+ BPF_PROG_TYPE_LSM,
};
enum bpf_attach_type {
@@ -210,6 +213,8 @@ enum bpf_attach_type {
BPF_TRACE_RAW_TP,
BPF_TRACE_FENTRY,
BPF_TRACE_FEXIT,
+ BPF_MODIFY_RETURN,
+ BPF_LSM_MAC,
__MAX_BPF_ATTACH_TYPE
};
@@ -325,44 +330,46 @@ enum bpf_attach_type {
#define BPF_PSEUDO_CALL 1
/* flags for BPF_MAP_UPDATE_ELEM command */
-#define BPF_ANY 0 /* create new element or update existing */
-#define BPF_NOEXIST 1 /* create new element if it didn't exist */
-#define BPF_EXIST 2 /* update existing element */
-#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
+enum {
+ BPF_ANY = 0, /* create new element or update existing */
+ BPF_NOEXIST = 1, /* create new element if it didn't exist */
+ BPF_EXIST = 2, /* update existing element */
+ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
+};
/* flags for BPF_MAP_CREATE command */
-#define BPF_F_NO_PREALLOC (1U << 0)
+enum {
+ BPF_F_NO_PREALLOC = (1U << 0),
/* Instead of having one common LRU list in the
* BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
* which can scale and perform better.
* Note, the LRU nodes (including free nodes) cannot be moved
* across different LRU lists.
*/
-#define BPF_F_NO_COMMON_LRU (1U << 1)
+ BPF_F_NO_COMMON_LRU = (1U << 1),
/* Specify numa node during map creation */
-#define BPF_F_NUMA_NODE (1U << 2)
-
-#define BPF_OBJ_NAME_LEN 16U
+ BPF_F_NUMA_NODE = (1U << 2),
/* Flags for accessing BPF object from syscall side. */
-#define BPF_F_RDONLY (1U << 3)
-#define BPF_F_WRONLY (1U << 4)
+ BPF_F_RDONLY = (1U << 3),
+ BPF_F_WRONLY = (1U << 4),
/* Flag for stack_map, store build_id+offset instead of pointer */
-#define BPF_F_STACK_BUILD_ID (1U << 5)
+ BPF_F_STACK_BUILD_ID = (1U << 5),
/* Zero-initialize hash function seed. This should only be used for testing. */
-#define BPF_F_ZERO_SEED (1U << 6)
+ BPF_F_ZERO_SEED = (1U << 6),
/* Flags for accessing BPF object from program side. */
-#define BPF_F_RDONLY_PROG (1U << 7)
-#define BPF_F_WRONLY_PROG (1U << 8)
+ BPF_F_RDONLY_PROG = (1U << 7),
+ BPF_F_WRONLY_PROG = (1U << 8),
/* Clone map from listener for newly accepted socket */
-#define BPF_F_CLONE (1U << 9)
+ BPF_F_CLONE = (1U << 9),
/* Enable memory-mapping BPF map */
-#define BPF_F_MMAPABLE (1U << 10)
+ BPF_F_MMAPABLE = (1U << 10),
+};
/* Flags for BPF_PROG_QUERY. */
@@ -391,6 +398,8 @@ struct bpf_stack_build_id {
};
};
+#define BPF_OBJ_NAME_LEN 16U
+
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
@@ -534,7 +543,7 @@ union bpf_attr {
__u32 prog_cnt;
} query;
- struct {
+ struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
@@ -562,6 +571,24 @@ union bpf_attr {
__u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */
} task_fd_query;
+
+ struct { /* struct used by BPF_LINK_CREATE command */
+ __u32 prog_fd; /* eBPF program to attach */
+ __u32 target_fd; /* object to attach to */
+ __u32 attach_type; /* attach type */
+ __u32 flags; /* extra flags */
+ } link_create;
+
+ struct { /* struct used by BPF_LINK_UPDATE command */
+ __u32 link_fd; /* link fd */
+ /* new program fd to update link with */
+ __u32 new_prog_fd;
+ __u32 flags; /* extra flags */
+ /* expected link's program fd; is specified only if
+ * BPF_F_REPLACE flag is set in flags */
+ __u32 old_prog_fd;
+ } link_update;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -2890,6 +2917,114 @@ union bpf_attr {
* Obtain the 64bit jiffies
* Return
* The 64 bit jiffies
+ *
+ * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ * Description
+ * For an eBPF program attached to a perf event, retrieve the
+ * branch records (struct perf_branch_entry) associated to *ctx*
+ * and store it in the buffer pointed by *buf* up to size
+ * *size* bytes.
+ * Return
+ * On success, number of bytes written to *buf*. On error, a
+ * negative value.
+ *
+ * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
+ * instead return the number of bytes required to store all the
+ * branch entries. If this flag is set, *buf* may be NULL.
+ *
+ * **-EINVAL** if arguments invalid or **size** not a multiple
+ * of sizeof(struct perf_branch_entry).
+ *
+ * **-ENOENT** if architecture does not support branch records.
+ *
+ * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ * Description
+ * Returns 0 on success, values for *pid* and *tgid* as seen from the current
+ * *namespace* will be returned in *nsdata*.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** if dev and inum supplied don't match dev_t and inode number
+ * with nsfs of current task, or if dev conversion to dev_t lost high bits.
+ *
+ * **-ENOENT** if pidns does not exists for the current task.
+ *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ * This helper is similar to **bpf_perf_eventoutput**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_netns_cookie(void *ctx)
+ * Description
+ * Retrieve the cookie (generated by the kernel) of the network
+ * namespace the input *ctx* is associated with. The network
+ * namespace cookie remains stable for its lifetime and provides
+ * a global identifier that can be assumed unique. If *ctx* is
+ * NULL, then the helper returns the cookie for the initial
+ * network namespace. The cookie itself is very similar to that
+ * of bpf_get_socket_cookie() helper, but for network namespaces
+ * instead of sockets.
+ * Return
+ * A 8-byte long opaque number.
+ *
+ * u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of the cgroup associated
+ * with the current task at the *ancestor_level*. The root cgroup
+ * is at *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with the current task, then return value will be the
+ * same as that of **bpf_get_current_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with the current task.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_get_current_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * Description
+ * Assign the *sk* to the *skb*. When combined with appropriate
+ * routing configuration to receive the packet towards the socket,
+ * will cause *skb* to be delivered to the specified socket.
+ * Subsequent redirection of *skb* via **bpf_redirect**\ (),
+ * **bpf_clone_redirect**\ () or other methods outside of BPF may
+ * interfere with successful delivery to the socket.
+ *
+ * This operation is only valid from TC ingress path.
+ *
+ * The *flags* argument must be zero.
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EINVAL** Unsupported flags specified.
+ * * **-ENOENT** Socket is unavailable for assignment.
+ * * **-ENETUNREACH** Socket is unreachable (wrong netns).
+ * * **-EOPNOTSUPP** Unsupported operation, for example a
+ * call from outside of TC ingress.
+ * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3010,7 +3145,13 @@ union bpf_attr {
FN(probe_read_kernel_str), \
FN(tcp_send_ack), \
FN(send_signal_thread), \
- FN(jiffies64),
+ FN(jiffies64), \
+ FN(read_branch_records), \
+ FN(get_ns_current_pid_tgid), \
+ FN(xdp_output), \
+ FN(get_netns_cookie), \
+ FN(get_current_ancestor_cgroup_id), \
+ FN(sk_assign),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -3025,69 +3166,100 @@ enum bpf_func_id {
/* All flags used by eBPF helper functions, placed here. */
/* BPF_FUNC_skb_store_bytes flags. */
-#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
-#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+enum {
+ BPF_F_RECOMPUTE_CSUM = (1ULL << 0),
+ BPF_F_INVALIDATE_HASH = (1ULL << 1),
+};
/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
* First 4 bits are for passing the header field size.
*/
-#define BPF_F_HDR_FIELD_MASK 0xfULL
+enum {
+ BPF_F_HDR_FIELD_MASK = 0xfULL,
+};
/* BPF_FUNC_l4_csum_replace flags. */
-#define BPF_F_PSEUDO_HDR (1ULL << 4)
-#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
-#define BPF_F_MARK_ENFORCE (1ULL << 6)
+enum {
+ BPF_F_PSEUDO_HDR = (1ULL << 4),
+ BPF_F_MARK_MANGLED_0 = (1ULL << 5),
+ BPF_F_MARK_ENFORCE = (1ULL << 6),
+};
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
-#define BPF_F_INGRESS (1ULL << 0)
+enum {
+ BPF_F_INGRESS = (1ULL << 0),
+};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
-#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+enum {
+ BPF_F_TUNINFO_IPV6 = (1ULL << 0),
+};
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
-#define BPF_F_SKIP_FIELD_MASK 0xffULL
-#define BPF_F_USER_STACK (1ULL << 8)
+enum {
+ BPF_F_SKIP_FIELD_MASK = 0xffULL,
+ BPF_F_USER_STACK = (1ULL << 8),
/* flags used by BPF_FUNC_get_stackid only. */
-#define BPF_F_FAST_STACK_CMP (1ULL << 9)
-#define BPF_F_REUSE_STACKID (1ULL << 10)
+ BPF_F_FAST_STACK_CMP = (1ULL << 9),
+ BPF_F_REUSE_STACKID = (1ULL << 10),
/* flags used by BPF_FUNC_get_stack only. */
-#define BPF_F_USER_BUILD_ID (1ULL << 11)
+ BPF_F_USER_BUILD_ID = (1ULL << 11),
+};
/* BPF_FUNC_skb_set_tunnel_key flags. */
-#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
-#define BPF_F_DONT_FRAGMENT (1ULL << 2)
-#define BPF_F_SEQ_NUMBER (1ULL << 3)
+enum {
+ BPF_F_ZERO_CSUM_TX = (1ULL << 1),
+ BPF_F_DONT_FRAGMENT = (1ULL << 2),
+ BPF_F_SEQ_NUMBER = (1ULL << 3),
+};
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
* BPF_FUNC_perf_event_read_value flags.
*/
-#define BPF_F_INDEX_MASK 0xffffffffULL
-#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+enum {
+ BPF_F_INDEX_MASK = 0xffffffffULL,
+ BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
/* BPF_FUNC_perf_event_output for sk_buff input context. */
-#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+ BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
+};
/* Current network namespace */
-#define BPF_F_CURRENT_NETNS (-1L)
+enum {
+ BPF_F_CURRENT_NETNS = (-1L),
+};
/* BPF_FUNC_skb_adjust_room flags. */
-#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+enum {
+ BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
+ BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1),
+ BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
+ BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
+ BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
+};
-#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
-#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+enum {
+ BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff,
+ BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56,
+};
-#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
-#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
-#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
-#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
BPF_ADJ_ROOM_ENCAP_L2_MASK) \
<< BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* BPF_FUNC_sysctl_get_name flags. */
-#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
+enum {
+ BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
+};
/* BPF_FUNC_sk_storage_get flags */
-#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
+enum {
+ BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+};
+
+/* BPF_FUNC_read_branch_records flags. */
+enum {
+ BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0),
+};
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
@@ -3153,6 +3325,7 @@ struct __sk_buff {
__u32 wire_len;
__u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk);
+ __u32 gso_size;
};
struct bpf_tunnel_key {
@@ -3505,13 +3678,14 @@ struct bpf_sock_ops {
};
/* Definitions for bpf_sock_ops_cb_flags */
-#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
-#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
-#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
-#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
- * supported cb flags
- */
+enum {
+ BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0),
+ BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1),
+ BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2),
+ BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3),
+/* Mask of all currently supported cb flags */
+ BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF,
+};
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
@@ -3590,8 +3764,10 @@ enum {
BPF_TCP_MAX_STATES /* Leave at the end! */
};
-#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
-#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+enum {
+ TCP_BPF_IW = 1001, /* Set TCP initial congestion window */
+ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */
+};
struct bpf_perf_event_value {
__u64 counter;
@@ -3599,12 +3775,16 @@ struct bpf_perf_event_value {
__u64 running;
};
-#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
-#define BPF_DEVCG_ACC_READ (1ULL << 1)
-#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+enum {
+ BPF_DEVCG_ACC_MKNOD = (1ULL << 0),
+ BPF_DEVCG_ACC_READ = (1ULL << 1),
+ BPF_DEVCG_ACC_WRITE = (1ULL << 2),
+};
-#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
-#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+enum {
+ BPF_DEVCG_DEV_BLOCK = (1ULL << 0),
+ BPF_DEVCG_DEV_CHAR = (1ULL << 1),
+};
struct bpf_cgroup_dev_ctx {
/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
@@ -3620,8 +3800,10 @@ struct bpf_raw_tracepoint_args {
/* DIRECT: Skip the FIB rules and go to FIB table associated with device
* OUTPUT: Do lookup from egress perspective; default is ingress
*/
-#define BPF_FIB_LOOKUP_DIRECT (1U << 0)
-#define BPF_FIB_LOOKUP_OUTPUT (1U << 1)
+enum {
+ BPF_FIB_LOOKUP_DIRECT = (1U << 0),
+ BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+};
enum {
BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
@@ -3693,9 +3875,11 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
-#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
-#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
-#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
+enum {
+ BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0),
+ BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1),
+ BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2),
+};
struct bpf_flow_keys {
__u16 nhoff;
@@ -3761,4 +3945,8 @@ struct bpf_sockopt {
__s32 retval;
};
+struct bpf_pidns_info {
+ __u32 pid;
+ __u32 tgid;
+};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 024af2d1d0af..ca6665ea758a 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -463,6 +463,7 @@ enum {
IFLA_MACSEC_REPLAY_PROTECT,
IFLA_MACSEC_VALIDATION,
IFLA_MACSEC_PAD,
+ IFLA_MACSEC_OFFLOAD,
__IFLA_MACSEC_MAX,
};
@@ -489,6 +490,7 @@ enum macsec_validation_type {
enum macsec_offload {
MACSEC_OFFLOAD_OFF = 0,
MACSEC_OFFLOAD_PHY = 1,
+ MACSEC_OFFLOAD_MAC = 2,
__MACSEC_OFFLOAD_END,
MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1,
};
@@ -960,11 +962,12 @@ enum {
#define XDP_FLAGS_SKB_MODE (1U << 1)
#define XDP_FLAGS_DRV_MODE (1U << 2)
#define XDP_FLAGS_HW_MODE (1U << 3)
+#define XDP_FLAGS_REPLACE (1U << 4)
#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \
XDP_FLAGS_DRV_MODE | \
XDP_FLAGS_HW_MODE)
#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
- XDP_FLAGS_MODES)
+ XDP_FLAGS_MODES | XDP_FLAGS_REPLACE)
/* These are stored into IFLA_XDP_ATTACHED on dump. */
enum {
@@ -984,6 +987,7 @@ enum {
IFLA_XDP_DRV_PROG_ID,
IFLA_XDP_SKB_PROG_ID,
IFLA_XDP_HW_PROG_ID,
+ IFLA_XDP_EXPECTED_FD,
__IFLA_XDP_MAX,
};
diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h
index 91fa51a9c31d..91fa51a9c31d 100644
--- a/tools/testing/selftests/bpf/include/uapi/linux/types.h
+++ b/tools/include/uapi/linux/types.h
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c6dafe563176..5cc1b0785d18 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -235,7 +235,8 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
memset(&attr, 0, sizeof(attr));
attr.prog_type = load_attr->prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
- if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
+ if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+ attr.prog_type == BPF_PROG_TYPE_LSM) {
attr.attach_btf_id = load_attr->attach_btf_id;
} else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
attr.prog_type == BPF_PROG_TYPE_EXT) {
@@ -584,6 +585,40 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
}
+int bpf_link_create(int prog_fd, int target_fd,
+ enum bpf_attach_type attach_type,
+ const struct bpf_link_create_opts *opts)
+{
+ union bpf_attr attr;
+
+ if (!OPTS_VALID(opts, bpf_link_create_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_create.prog_fd = prog_fd;
+ attr.link_create.target_fd = target_fd;
+ attr.link_create.attach_type = attach_type;
+
+ return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_link_update(int link_fd, int new_prog_fd,
+ const struct bpf_link_update_opts *opts)
+{
+ union bpf_attr attr;
+
+ if (!OPTS_VALID(opts, bpf_link_update_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_update.link_fd = link_fd;
+ attr.link_update.new_prog_fd = new_prog_fd;
+ attr.link_update.flags = OPTS_GET(opts, flags, 0);
+ attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+
+ return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+}
+
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
{
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index b976e77316cc..46d47afdd887 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -168,6 +168,25 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
+struct bpf_link_create_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+};
+#define bpf_link_create_opts__last_field sz
+
+LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
+ enum bpf_attach_type attach_type,
+ const struct bpf_link_create_opts *opts);
+
+struct bpf_link_update_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags; /* extra flags */
+ __u32 old_prog_fd; /* expected old program FD */
+};
+#define bpf_link_update_opts__last_field old_prog_fd
+
+LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
+ const struct bpf_link_update_opts *opts);
+
struct bpf_prog_test_run_attr {
int prog_fd;
int repeat;
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index b0dafe8b4ebc..f3f3c3fb98cb 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -49,7 +49,8 @@
#if defined(bpf_target_x86)
-#ifdef __KERNEL__
+#if defined(__KERNEL__) || defined(__VMLINUX_H__)
+
#define PT_REGS_PARM1(x) ((x)->di)
#define PT_REGS_PARM2(x) ((x)->si)
#define PT_REGS_PARM3(x) ((x)->dx)
@@ -60,7 +61,20 @@
#define PT_REGS_RC(x) ((x)->ax)
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->ip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip)
+
#else
+
#ifdef __i386__
/* i386 kernel is built with -mregparm=3 */
#define PT_REGS_PARM1(x) ((x)->eax)
@@ -73,7 +87,20 @@
#define PT_REGS_RC(x) ((x)->eax)
#define PT_REGS_SP(x) ((x)->esp)
#define PT_REGS_IP(x) ((x)->eip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx)
+#define PT_REGS_PARM4_CORE(x) 0
+#define PT_REGS_PARM5_CORE(x) 0
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip)
+
#else
+
#define PT_REGS_PARM1(x) ((x)->rdi)
#define PT_REGS_PARM2(x) ((x)->rsi)
#define PT_REGS_PARM3(x) ((x)->rdx)
@@ -84,6 +111,18 @@
#define PT_REGS_RC(x) ((x)->rax)
#define PT_REGS_SP(x) ((x)->rsp)
#define PT_REGS_IP(x) ((x)->rip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip)
+
#endif
#endif
@@ -104,6 +143,17 @@ struct pt_regs;
#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr)
+
#elif defined(bpf_target_arm)
#define PT_REGS_PARM1(x) ((x)->uregs[0])
@@ -117,6 +167,17 @@ struct pt_regs;
#define PT_REGS_SP(x) ((x)->uregs[13])
#define PT_REGS_IP(x) ((x)->uregs[12])
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12])
+
#elif defined(bpf_target_arm64)
/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
@@ -134,6 +195,17 @@ struct pt_regs;
#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc)
+
#elif defined(bpf_target_mips)
#define PT_REGS_PARM1(x) ((x)->regs[4])
@@ -147,6 +219,17 @@ struct pt_regs;
#define PT_REGS_SP(x) ((x)->regs[29])
#define PT_REGS_IP(x) ((x)->cp0_epc)
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
+
#elif defined(bpf_target_powerpc)
#define PT_REGS_PARM1(x) ((x)->gpr[3])
@@ -158,6 +241,15 @@ struct pt_regs;
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->nip)
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip)
+
#elif defined(bpf_target_sparc)
#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
@@ -169,11 +261,22 @@ struct pt_regs;
#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP])
+
/* Should this also be a bpf_target check for the sparc case? */
#if defined(__arch64__)
#define PT_REGS_IP(x) ((x)->tpc)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc)
#else
#define PT_REGS_IP(x) ((x)->pc)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc)
#endif
#endif
@@ -192,4 +295,122 @@ struct pt_regs;
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif
+#define ___bpf_concat(a, b) a ## b
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#define ___bpf_narg(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define ___bpf_empty(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___bpf_ctx_cast0() ctx
+#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
+#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
+#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
+#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
+#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
+#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
+#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
+#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
+#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
+#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
+#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
+#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
+#define ___bpf_ctx_cast(args...) \
+ ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
+
+/*
+ * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
+ * similar kinds of BPF programs, that accept input arguments as a single
+ * pointer to untyped u64 array, where each u64 can actually be a typed
+ * pointer or integer of different size. Instead of requring user to write
+ * manual casts and work with array elements by index, BPF_PROG macro
+ * allows user to declare a list of named and typed input arguments in the
+ * same syntax as for normal C function. All the casting is hidden and
+ * performed transparently, while user code can just assume working with
+ * function arguments of specified type and name.
+ *
+ * Original raw context argument is preserved as well as 'ctx' argument.
+ * This is useful when using BPF helpers that expect original context
+ * as one of the parameters (e.g., for bpf_perf_event_output()).
+ */
+#define BPF_PROG(name, args...) \
+name(unsigned long long *ctx); \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(unsigned long long *ctx, ##args); \
+typeof(name(0)) name(unsigned long long *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_ctx_cast(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(unsigned long long *ctx, ##args)
+
+struct pt_regs;
+
+#define ___bpf_kprobe_args0() ctx
+#define ___bpf_kprobe_args1(x) \
+ ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
+#define ___bpf_kprobe_args2(x, args...) \
+ ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
+#define ___bpf_kprobe_args3(x, args...) \
+ ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
+#define ___bpf_kprobe_args4(x, args...) \
+ ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
+#define ___bpf_kprobe_args5(x, args...) \
+ ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args(args...) \
+ ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
+ * low-level way of getting kprobe input arguments from struct pt_regs, and
+ * provides a familiar typed and named function arguments syntax and
+ * semantics of accessing kprobe input paremeters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ */
+#define BPF_KPROBE(name, args...) \
+name(struct pt_regs *ctx); \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args); \
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_kprobe_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args)
+
+#define ___bpf_kretprobe_args0() ctx
+#define ___bpf_kretprobe_args1(x) \
+ ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
+#define ___bpf_kretprobe_args(args...) \
+ ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional
+ * return value (in addition to `struct pt_regs *ctx`), but no input
+ * arguments, because they will be clobbered by the time probed function
+ * returns.
+ */
+#define BPF_KRETPROBE(name, args...) \
+name(struct pt_regs *ctx); \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args); \
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_kretprobe_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 3d1c25fc97ae..bfef3d606b54 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -657,22 +657,32 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
int btf__load(struct btf *btf)
{
- __u32 log_buf_size = BPF_LOG_BUF_SIZE;
+ __u32 log_buf_size = 0;
char *log_buf = NULL;
int err = 0;
if (btf->fd >= 0)
return -EEXIST;
- log_buf = malloc(log_buf_size);
- if (!log_buf)
- return -ENOMEM;
+retry_load:
+ if (log_buf_size) {
+ log_buf = malloc(log_buf_size);
+ if (!log_buf)
+ return -ENOMEM;
- *log_buf = 0;
+ *log_buf = 0;
+ }
btf->fd = bpf_load_btf(btf->data, btf->data_size,
log_buf, log_buf_size, false);
if (btf->fd < 0) {
+ if (!log_buf || errno == ENOSPC) {
+ log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
+ log_buf_size << 1);
+ free(log_buf);
+ goto retry_load;
+ }
+
err = -errno;
pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
if (*log_buf)
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index bd09ed1710f1..0c28ee82834b 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -916,13 +916,13 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
/* enumerators share namespace with typedef idents */
dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
if (dup_cnt > 1) {
- btf_dump_printf(d, "\n%s%s___%zu = %d,",
+ btf_dump_printf(d, "\n%s%s___%zu = %u,",
pfx(lvl + 1), name, dup_cnt,
- (__s32)v->val);
+ (__u32)v->val);
} else {
- btf_dump_printf(d, "\n%s%s = %d,",
+ btf_dump_printf(d, "\n%s%s = %u,",
pfx(lvl + 1), name,
- (__s32)v->val);
+ (__u32)v->val);
}
}
btf_dump_printf(d, "\n%s}", pfx(lvl));
@@ -1030,7 +1030,7 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
return -EINVAL;
- fname = OPTS_GET(opts, field_name, NULL);
+ fname = OPTS_GET(opts, field_name, "");
lvl = OPTS_GET(opts, indent_level, 0);
btf_dump_emit_type_decl(d, id, fname, lvl);
return 0;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7469c7dcc15e..ff9174282a8c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1845,7 +1845,6 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
* type definition, while using only sizeof(void *) space in ELF data section.
*/
static bool get_map_field_int(const char *map_name, const struct btf *btf,
- const struct btf_type *def,
const struct btf_member *m, __u32 *res)
{
const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
@@ -1972,19 +1971,19 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
return -EINVAL;
}
if (strcmp(name, "type") == 0) {
- if (!get_map_field_int(map_name, obj->btf, def, m,
+ if (!get_map_field_int(map_name, obj->btf, m,
&map->def.type))
return -EINVAL;
pr_debug("map '%s': found type = %u.\n",
map_name, map->def.type);
} else if (strcmp(name, "max_entries") == 0) {
- if (!get_map_field_int(map_name, obj->btf, def, m,
+ if (!get_map_field_int(map_name, obj->btf, m,
&map->def.max_entries))
return -EINVAL;
pr_debug("map '%s': found max_entries = %u.\n",
map_name, map->def.max_entries);
} else if (strcmp(name, "map_flags") == 0) {
- if (!get_map_field_int(map_name, obj->btf, def, m,
+ if (!get_map_field_int(map_name, obj->btf, m,
&map->def.map_flags))
return -EINVAL;
pr_debug("map '%s': found map_flags = %u.\n",
@@ -1992,8 +1991,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
} else if (strcmp(name, "key_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map_name, obj->btf, def, m,
- &sz))
+ if (!get_map_field_int(map_name, obj->btf, m, &sz))
return -EINVAL;
pr_debug("map '%s': found key_size = %u.\n",
map_name, sz);
@@ -2035,8 +2033,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
} else if (strcmp(name, "value_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map_name, obj->btf, def, m,
- &sz))
+ if (!get_map_field_int(map_name, obj->btf, m, &sz))
return -EINVAL;
pr_debug("map '%s': found value_size = %u.\n",
map_name, sz);
@@ -2079,8 +2076,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
__u32 val;
int err;
- if (!get_map_field_int(map_name, obj->btf, def, m,
- &val))
+ if (!get_map_field_int(map_name, obj->btf, m, &val))
return -EINVAL;
pr_debug("map '%s': found pinning = %u.\n",
map_name, val);
@@ -2284,11 +2280,16 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
}
}
-static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
+static bool libbpf_needs_btf(const struct bpf_object *obj)
{
return obj->efile.btf_maps_shndx >= 0 ||
- obj->efile.st_ops_shndx >= 0 ||
- obj->nr_extern > 0;
+ obj->efile.st_ops_shndx >= 0 ||
+ obj->nr_extern > 0;
+}
+
+static bool kernel_needs_btf(const struct bpf_object *obj)
+{
+ return obj->efile.st_ops_shndx >= 0;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@@ -2324,7 +2325,7 @@ static int bpf_object__init_btf(struct bpf_object *obj,
}
}
out:
- if (err && bpf_object__is_btf_mandatory(obj)) {
+ if (err && libbpf_needs_btf(obj)) {
pr_warn("BTF is required, but is missing or corrupted.\n");
return err;
}
@@ -2348,7 +2349,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
btf_ext__free(obj->btf_ext);
obj->btf_ext = NULL;
- if (bpf_object__is_btf_mandatory(obj)) {
+ if (libbpf_needs_btf(obj)) {
pr_warn("BTF is required, but is missing or corrupted.\n");
return -ENOENT;
}
@@ -2357,7 +2358,8 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
{
- if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+ prog->type == BPF_PROG_TYPE_LSM)
return true;
/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
@@ -2412,7 +2414,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
obj->btf_ext = NULL;
}
- if (bpf_object__is_btf_mandatory(obj))
+ if (kernel_needs_btf(obj))
return err;
}
return 0;
@@ -3868,6 +3870,10 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
if (str_is_empty(targ_name))
continue;
+ t = skip_mods_and_typedefs(targ_btf, i, NULL);
+ if (!btf_is_composite(t) && !btf_is_array(t))
+ continue;
+
targ_essent_len = bpf_core_essential_name_len(targ_name);
if (targ_essent_len != local_essent_len)
continue;
@@ -4846,8 +4852,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
{
struct bpf_load_program_attr load_attr;
char *cp, errmsg[STRERR_BUFSIZE];
- int log_buf_size = BPF_LOG_BUF_SIZE;
- char *log_buf;
+ size_t log_buf_size = 0;
+ char *log_buf = NULL;
int btf_fd, ret;
if (!insns || !insns_cnt)
@@ -4861,7 +4867,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
load_attr.license = license;
- if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+ prog->type == BPF_PROG_TYPE_LSM) {
load_attr.attach_btf_id = prog->attach_btf_id;
} else if (prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_EXT) {
@@ -4887,22 +4894,28 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.prog_flags = prog->prog_flags;
retry_load:
- log_buf = malloc(log_buf_size);
- if (!log_buf)
- pr_warn("Alloc log buffer for bpf loader error, continue without log\n");
+ if (log_buf_size) {
+ log_buf = malloc(log_buf_size);
+ if (!log_buf)
+ return -ENOMEM;
+
+ *log_buf = 0;
+ }
ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
if (ret >= 0) {
- if (load_attr.log_level)
+ if (log_buf && load_attr.log_level)
pr_debug("verifier log:\n%s", log_buf);
*pfd = ret;
ret = 0;
goto out;
}
- if (errno == ENOSPC) {
- log_buf_size <<= 1;
+ if (!log_buf || errno == ENOSPC) {
+ log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
+ log_buf_size << 1);
+
free(log_buf);
goto retry_load;
}
@@ -4945,8 +4958,9 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
{
int err = 0, fd, i, btf_id;
- if (prog->type == BPF_PROG_TYPE_TRACING ||
- prog->type == BPF_PROG_TYPE_EXT) {
+ if ((prog->type == BPF_PROG_TYPE_TRACING ||
+ prog->type == BPF_PROG_TYPE_LSM ||
+ prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
btf_id = libbpf_find_attach_btf_id(prog);
if (btf_id <= 0)
return btf_id;
@@ -6185,6 +6199,7 @@ bool bpf_program__is_##NAME(const struct bpf_program *prog) \
} \
BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
+BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
@@ -6251,6 +6266,8 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
struct bpf_program *prog);
static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
struct bpf_program *prog);
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
struct bpf_sec_def {
const char *sec;
@@ -6290,6 +6307,10 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_FENTRY,
.is_attach_btf = true,
.attach_fn = attach_trace),
+ SEC_DEF("fmod_ret/", TRACING,
+ .expected_attach_type = BPF_MODIFY_RETURN,
+ .is_attach_btf = true,
+ .attach_fn = attach_trace),
SEC_DEF("fexit/", TRACING,
.expected_attach_type = BPF_TRACE_FEXIT,
.is_attach_btf = true,
@@ -6297,6 +6318,10 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("freplace/", EXT,
.is_attach_btf = true,
.attach_fn = attach_trace),
+ SEC_DEF("lsm/", LSM,
+ .is_attach_btf = true,
+ .expected_attach_type = BPF_LSM_MAC,
+ .attach_fn = attach_lsm),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -6559,6 +6584,7 @@ invalid_prog:
}
#define BTF_TRACE_PREFIX "btf_trace_"
+#define BTF_LSM_PREFIX "bpf_lsm_"
#define BTF_MAX_NAME_SIZE 128
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
@@ -6586,9 +6612,15 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
if (attach_type == BPF_TRACE_RAW_TP)
err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
BTF_KIND_TYPEDEF);
+ else if (attach_type == BPF_LSM_MAC)
+ err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
+ BTF_KIND_FUNC);
else
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+ if (err <= 0)
+ pr_warn("%s is not found in vmlinux BTF\n", name);
+
return err;
}
@@ -6661,8 +6693,6 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)
err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
name + section_defs[i].len,
attach_type);
- if (err <= 0)
- pr_warn("%s is not found in vmlinux BTF\n", name);
return err;
}
pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
@@ -6742,6 +6772,17 @@ void *bpf_map__priv(const struct bpf_map *map)
return map ? map->priv : ERR_PTR(-EINVAL);
}
+int bpf_map__set_initial_value(struct bpf_map *map,
+ const void *data, size_t size)
+{
+ if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
+ size != map->def.value_size || map->fd >= 0)
+ return -EINVAL;
+
+ memcpy(map->mmaped, data, size);
+ return 0;
+}
+
bool bpf_map__is_offload_neutral(const struct bpf_map *map)
{
return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
@@ -6932,9 +6973,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_link {
int (*detach)(struct bpf_link *link);
int (*destroy)(struct bpf_link *link);
+ char *pin_path; /* NULL, if not pinned */
+ int fd; /* hook FD, -1 if not applicable */
bool disconnected;
};
+/* Replace link's underlying BPF program with the new one */
+int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
+{
+ return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+}
+
/* Release "ownership" of underlying BPF resource (typically, BPF program
* attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
* link, when destructed through bpf_link__destroy() call won't attempt to
@@ -6961,26 +7010,109 @@ int bpf_link__destroy(struct bpf_link *link)
err = link->detach(link);
if (link->destroy)
link->destroy(link);
+ if (link->pin_path)
+ free(link->pin_path);
free(link);
return err;
}
-struct bpf_link_fd {
- struct bpf_link link; /* has to be at the top of struct */
- int fd; /* hook FD */
-};
+int bpf_link__fd(const struct bpf_link *link)
+{
+ return link->fd;
+}
+
+const char *bpf_link__pin_path(const struct bpf_link *link)
+{
+ return link->pin_path;
+}
+
+static int bpf_link__detach_fd(struct bpf_link *link)
+{
+ return close(link->fd);
+}
+
+struct bpf_link *bpf_link__open(const char *path)
+{
+ struct bpf_link *link;
+ int fd;
+
+ fd = bpf_obj_get(path);
+ if (fd < 0) {
+ fd = -errno;
+ pr_warn("failed to open link at %s: %d\n", path, fd);
+ return ERR_PTR(fd);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link) {
+ close(fd);
+ return ERR_PTR(-ENOMEM);
+ }
+ link->detach = &bpf_link__detach_fd;
+ link->fd = fd;
+
+ link->pin_path = strdup(path);
+ if (!link->pin_path) {
+ bpf_link__destroy(link);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return link;
+}
+
+int bpf_link__pin(struct bpf_link *link, const char *path)
+{
+ int err;
+
+ if (link->pin_path)
+ return -EBUSY;
+ err = make_parent_dir(path);
+ if (err)
+ return err;
+ err = check_path(path);
+ if (err)
+ return err;
+
+ link->pin_path = strdup(path);
+ if (!link->pin_path)
+ return -ENOMEM;
+
+ if (bpf_obj_pin(link->fd, link->pin_path)) {
+ err = -errno;
+ zfree(&link->pin_path);
+ return err;
+ }
+
+ pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
+ return 0;
+}
+
+int bpf_link__unpin(struct bpf_link *link)
+{
+ int err;
+
+ if (!link->pin_path)
+ return -EINVAL;
+
+ err = unlink(link->pin_path);
+ if (err != 0)
+ return -errno;
+
+ pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
+ zfree(&link->pin_path);
+ return 0;
+}
static int bpf_link__detach_perf_event(struct bpf_link *link)
{
- struct bpf_link_fd *l = (void *)link;
int err;
- err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
+ err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
if (err)
err = -errno;
- close(l->fd);
+ close(link->fd);
return err;
}
@@ -6988,7 +7120,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
int pfd)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link_fd *link;
+ struct bpf_link *link;
int prog_fd, err;
if (pfd < 0) {
@@ -7006,7 +7138,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.detach = &bpf_link__detach_perf_event;
+ link->detach = &bpf_link__detach_perf_event;
link->fd = pfd;
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
@@ -7025,7 +7157,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return ERR_PTR(err);
}
- return (struct bpf_link *)link;
+ return link;
}
/*
@@ -7313,18 +7445,11 @@ out:
return link;
}
-static int bpf_link__detach_fd(struct bpf_link *link)
-{
- struct bpf_link_fd *l = (void *)link;
-
- return close(l->fd);
-}
-
struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link_fd *link;
+ struct bpf_link *link;
int prog_fd, pfd;
prog_fd = bpf_program__fd(prog);
@@ -7337,7 +7462,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.detach = &bpf_link__detach_fd;
+ link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
if (pfd < 0) {
@@ -7349,7 +7474,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
return ERR_PTR(pfd);
}
link->fd = pfd;
- return (struct bpf_link *)link;
+ return link;
}
static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
@@ -7360,10 +7485,11 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
return bpf_program__attach_raw_tracepoint(prog, tp_name);
}
-struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+/* Common logic for all BPF program types that attach to a btf_id */
+static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link_fd *link;
+ struct bpf_link *link;
int prog_fd, pfd;
prog_fd = bpf_program__fd(prog);
@@ -7376,13 +7502,13 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.detach = &bpf_link__detach_fd;
+ link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
if (pfd < 0) {
pfd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to trace: %s\n",
+ pr_warn("program '%s': failed to attach: %s\n",
bpf_program__title(prog, false),
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
@@ -7391,12 +7517,68 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
return (struct bpf_link *)link;
}
+struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+{
+ return bpf_program__attach_btf_id(prog);
+}
+
+struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
+{
+ return bpf_program__attach_btf_id(prog);
+}
+
static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
return bpf_program__attach_trace(prog);
}
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ return bpf_program__attach_lsm(prog);
+}
+
+struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+{
+ const struct bpf_sec_def *sec_def;
+ enum bpf_attach_type attach_type;
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int prog_fd, link_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->detach = &bpf_link__detach_fd;
+
+ attach_type = bpf_program__get_expected_attach_type(prog);
+ if (!attach_type) {
+ sec_def = find_sec_def(bpf_program__title(prog, false));
+ if (sec_def)
+ attach_type = sec_def->attach_type;
+ }
+ link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL);
+ if (link_fd < 0) {
+ link_fd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to cgroup: %s\n",
+ bpf_program__title(prog, false),
+ libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(link_fd);
+ }
+ link->fd = link_fd;
+ return link;
+}
+
struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{
const struct bpf_sec_def *sec_def;
@@ -7410,10 +7592,9 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog)
static int bpf_link__detach_struct_ops(struct bpf_link *link)
{
- struct bpf_link_fd *l = (void *)link;
__u32 zero = 0;
- if (bpf_map_delete_elem(l->fd, &zero))
+ if (bpf_map_delete_elem(link->fd, &zero))
return -errno;
return 0;
@@ -7422,7 +7603,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link)
struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
{
struct bpf_struct_ops *st_ops;
- struct bpf_link_fd *link;
+ struct bpf_link *link;
__u32 i, zero = 0;
int err;
@@ -7454,10 +7635,10 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
return ERR_PTR(err);
}
- link->link.detach = bpf_link__detach_struct_ops;
+ link->detach = bpf_link__detach_struct_ops;
link->fd = map->fd;
- return (struct bpf_link *)link;
+ return link;
}
enum bpf_perf_event_ret
@@ -8138,6 +8319,31 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
}
}
+int bpf_program__set_attach_target(struct bpf_program *prog,
+ int attach_prog_fd,
+ const char *attach_func_name)
+{
+ int btf_id;
+
+ if (!prog || attach_prog_fd < 0 || !attach_func_name)
+ return -EINVAL;
+
+ if (attach_prog_fd)
+ btf_id = libbpf_find_prog_btf_id(attach_func_name,
+ attach_prog_fd);
+ else
+ btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
+ attach_func_name,
+ prog->expected_attach_type);
+
+ if (btf_id < 0)
+ return btf_id;
+
+ prog->attach_btf_id = btf_id;
+ prog->attach_prog_fd = attach_prog_fd;
+ return 0;
+}
+
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
{
int err = 0, n, len, start, end = -1;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3fe12c9d1f92..44df1d3e7287 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -219,6 +219,13 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
struct bpf_link;
+LIBBPF_API struct bpf_link *bpf_link__open(const char *path);
+LIBBPF_API int bpf_link__fd(const struct bpf_link *link);
+LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link);
+LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
+LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
+LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
+ struct bpf_program *prog);
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
@@ -240,11 +247,17 @@ bpf_program__attach_tracepoint(struct bpf_program *prog,
LIBBPF_API struct bpf_link *
bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name);
-
LIBBPF_API struct bpf_link *
bpf_program__attach_trace(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_lsm(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+
struct bpf_map;
+
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+
struct bpf_insn;
/*
@@ -316,6 +329,7 @@ LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
@@ -334,10 +348,15 @@ LIBBPF_API void
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
+LIBBPF_API int
+bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
+ const char *attach_func_name);
+
LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
@@ -398,6 +417,8 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
bpf_map_clear_priv_t clear_priv);
LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
+ const void *data, size_t size);
LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
@@ -435,7 +456,15 @@ struct xdp_link_info {
__u8 attach_mode;
};
+struct bpf_xdp_set_link_opts {
+ size_t sz;
+ __u32 old_fd;
+};
+#define bpf_xdp_set_link_opts__last_field old_fd
+
LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
+ const struct bpf_xdp_set_link_opts *opts);
LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index b035122142bb..bb8831605b25 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -235,3 +235,22 @@ LIBBPF_0.0.7 {
btf__align_of;
libbpf_find_kernel_btf;
} LIBBPF_0.0.6;
+
+LIBBPF_0.0.8 {
+ global:
+ bpf_link__fd;
+ bpf_link__open;
+ bpf_link__pin;
+ bpf_link__pin_path;
+ bpf_link__unpin;
+ bpf_link__update_program;
+ bpf_link_create;
+ bpf_link_update;
+ bpf_map__set_initial_value;
+ bpf_program__attach_cgroup;
+ bpf_program__attach_lsm;
+ bpf_program__is_lsm;
+ bpf_program__set_attach_target;
+ bpf_program__set_lsm;
+ bpf_set_link_xdp_fd_opts;
+} LIBBPF_0.0.7;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index b782ebef6ac9..2c92059c0c90 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -108,6 +108,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_STRUCT_OPS:
case BPF_PROG_TYPE_EXT:
+ case BPF_PROG_TYPE_LSM:
default:
break;
}
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 431bd25c6cdb..18b5319025e1 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -132,7 +132,8 @@ done:
return ret;
}
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
+ __u32 flags)
{
int sock, seq = 0, ret;
struct nlattr *nla, *nla_xdp;
@@ -178,6 +179,14 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
nla->nla_len += nla_xdp->nla_len;
}
+ if (flags & XDP_FLAGS_REPLACE) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
@@ -191,6 +200,29 @@ cleanup:
return ret;
}
+int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
+ const struct bpf_xdp_set_link_opts *opts)
+{
+ int old_fd = -1;
+
+ if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
+ return -EINVAL;
+
+ if (OPTS_HAS(opts, old_fd)) {
+ old_fd = OPTS_GET(opts, old_fd, -1);
+ flags |= XDP_FLAGS_REPLACE;
+ }
+
+ return __bpf_set_link_xdp_fd_replace(ifindex, fd,
+ old_fd,
+ flags);
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+}
+
static int __dump_link_nlmsg(struct nlmsghdr *nlh,
libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
{
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 9807903f121e..f7f4efb70a4c 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -280,7 +280,11 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
fill->consumer = map + off.fr.consumer;
fill->flags = map + off.fr.flags;
fill->ring = map + off.fr.desc;
- fill->cached_cons = umem->config.fill_size;
+ fill->cached_prod = *fill->producer;
+ /* cached_cons is "size" bigger than the real consumer pointer
+ * See xsk_prod_nb_free
+ */
+ fill->cached_cons = *fill->consumer + umem->config.fill_size;
map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
@@ -297,6 +301,8 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
comp->consumer = map + off.cr.consumer;
comp->flags = map + off.cr.flags;
comp->ring = map + off.cr.desc;
+ comp->cached_prod = *comp->producer;
+ comp->cached_cons = *comp->consumer;
*umem_ptr = umem;
return 0;
@@ -672,6 +678,8 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
rx->consumer = rx_map + off.rx.consumer;
rx->flags = rx_map + off.rx.flags;
rx->ring = rx_map + off.rx.desc;
+ rx->cached_prod = *rx->producer;
+ rx->cached_cons = *rx->consumer;
}
xsk->rx = rx;
@@ -691,7 +699,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
tx->consumer = tx_map + off.tx.consumer;
tx->flags = tx_map + off.tx.flags;
tx->ring = tx_map + off.tx.desc;
- tx->cached_cons = xsk->config.tx_size;
+ tx->cached_prod = *tx->producer;
+ /* cached_cons is r->size bigger than the real consumer pointer
+ * See xsk_prod_nb_free
+ */
+ tx->cached_cons = *tx->consumer + xsk->config.tx_size;
}
xsk->tx = tx;
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 6d2f3a1b2249..a7974638561c 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -106,6 +106,7 @@ ifneq ($(silent),1)
ifneq ($(V),1)
QUIET_CC = @echo ' CC '$@;
QUIET_CC_FPIC = @echo ' CC FPIC '$@;
+ QUIET_CLANG = @echo ' CLANG '$@;
QUIET_AR = @echo ' AR '$@;
QUIET_LINK = @echo ' LINK '$@;
QUIET_MKDIR = @echo ' MKDIR '$@;
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index 61df01cdf0b2..304fdf1a21dc 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -3,4 +3,7 @@ gpiogpio-hammer
gpioinclude/
gpiolsgpio
tpm2/SpaceTest.log
-tpm2/*.pyc
+
+# Python bytecode and cache
+__pycache__/
+*.py[cod]
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index b93fa645ee54..077818d0197f 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -36,7 +36,6 @@ TARGETS += net
TARGETS += net/forwarding
TARGETS += net/mptcp
TARGETS += netfilter
-TARGETS += networking/timestamping
TARGETS += nsfs
TARGETS += pidfd
TARGETS += powerpc
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index ec464859c6b6..2198cd876675 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -31,6 +31,7 @@ test_tcp_check_syncookie_user
test_sysctl
test_hashmap
test_btf_dump
+test_current_pid_tgid_new_ns
xdping
test_cpp
*.skel.h
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 257a1aaaa37d..7729892e0b04 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -20,8 +20,9 @@ CLANG ?= clang
LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
BPF_GCC ?= $(shell command -v bpf-gcc;)
-CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \
+CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) \
-I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \
+ -I$(APIDIR) \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lz -lrt -lpthread
@@ -32,7 +33,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
- test_progs-no_alu32
+ test_progs-no_alu32 \
+ test_current_pid_tgid_new_ns
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)
@@ -62,7 +64,8 @@ TEST_PROGS := test_kmod.sh \
test_tc_tunnel.sh \
test_tc_edt.sh \
test_xdping.sh \
- test_bpftool_build.sh
+ test_bpftool_build.sh \
+ test_bpftool.sh
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
@@ -128,10 +131,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
$(call msg,CC,,$@)
$(CC) -c $(CFLAGS) -o $@ $<
-VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \
- /sys/kernel/btf/vmlinux \
- /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))
+VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
$(OUTPUT)/runqslower: $(BPFOBJ)
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \
@@ -171,6 +177,10 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
$(call msg,MKDIR,,$@)
mkdir -p $@
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+ $(call msg,GEN,,$@)
+ $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+
# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
@@ -189,8 +199,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
- -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \
- -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include)
+ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
+ -I$(abspath $(OUTPUT)/../usr/include)
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
-Wno-compare-distinct-pointer-types
@@ -209,7 +219,7 @@ define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+ $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
@@ -223,7 +233,7 @@ define CLANG_NATIVE_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+ $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
@@ -279,6 +289,7 @@ $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
+ $$(INCLUDE_DIR)/vmlinux.h \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS), \
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 8f21965ffc6c..5bf2fe9b1efa 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -6,7 +6,7 @@
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
#define BPF_STRUCT_OPS(name, args...) \
SEC("struct_ops/"#name) \
diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h
deleted file mode 100644
index c6f1354d93fb..000000000000
--- a/tools/testing/selftests/bpf/bpf_trace_helpers.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __BPF_TRACE_HELPERS_H
-#define __BPF_TRACE_HELPERS_H
-
-#include <bpf/bpf_helpers.h>
-
-#define ___bpf_concat(a, b) a ## b
-#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
-#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
-#define ___bpf_narg(...) \
- ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define ___bpf_empty(...) \
- ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
-
-#define ___bpf_ctx_cast0() ctx
-#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
-#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
-#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
-#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
-#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
-#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
-#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
-#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
-#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
-#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
-#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
-#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
-#define ___bpf_ctx_cast(args...) \
- ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
-
-/*
- * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
- * similar kinds of BPF programs, that accept input arguments as a single
- * pointer to untyped u64 array, where each u64 can actually be a typed
- * pointer or integer of different size. Instead of requring user to write
- * manual casts and work with array elements by index, BPF_PROG macro
- * allows user to declare a list of named and typed input arguments in the
- * same syntax as for normal C function. All the casting is hidden and
- * performed transparently, while user code can just assume working with
- * function arguments of specified type and name.
- *
- * Original raw context argument is preserved as well as 'ctx' argument.
- * This is useful when using BPF helpers that expect original context
- * as one of the parameters (e.g., for bpf_perf_event_output()).
- */
-#define BPF_PROG(name, args...) \
-name(unsigned long long *ctx); \
-static __always_inline typeof(name(0)) \
-____##name(unsigned long long *ctx, ##args); \
-typeof(name(0)) name(unsigned long long *ctx) \
-{ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- return ____##name(___bpf_ctx_cast(args)); \
- _Pragma("GCC diagnostic pop") \
-} \
-static __always_inline typeof(name(0)) \
-____##name(unsigned long long *ctx, ##args)
-
-struct pt_regs;
-
-#define ___bpf_kprobe_args0() ctx
-#define ___bpf_kprobe_args1(x) \
- ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
-#define ___bpf_kprobe_args2(x, args...) \
- ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
-#define ___bpf_kprobe_args3(x, args...) \
- ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
-#define ___bpf_kprobe_args4(x, args...) \
- ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
-#define ___bpf_kprobe_args5(x, args...) \
- ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
-#define ___bpf_kprobe_args(args...) \
- ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
-
-/*
- * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
- * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
- * low-level way of getting kprobe input arguments from struct pt_regs, and
- * provides a familiar typed and named function arguments syntax and
- * semantics of accessing kprobe input paremeters.
- *
- * Original struct pt_regs* context is preserved as 'ctx' argument. This might
- * be necessary when using BPF helpers like bpf_perf_event_output().
- */
-#define BPF_KPROBE(name, args...) \
-name(struct pt_regs *ctx); \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
-typeof(name(0)) name(struct pt_regs *ctx) \
-{ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- return ____##name(___bpf_kprobe_args(args)); \
- _Pragma("GCC diagnostic pop") \
-} \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
-
-#define ___bpf_kretprobe_args0() ctx
-#define ___bpf_kretprobe_argsN(x, args...) \
- ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx)
-#define ___bpf_kretprobe_args(args...) \
- ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args)
-
-/*
- * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all
- * input kprobe arguments, one last extra argument has to be specified, which
- * captures kprobe return value.
- */
-#define BPF_KRETPROBE(name, args...) \
-name(struct pt_regs *ctx); \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
-typeof(name(0)) name(struct pt_regs *ctx) \
-{ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- return ____##name(___bpf_kretprobe_args(args)); \
- _Pragma("GCC diagnostic pop") \
-} \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
-#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 5dc109f4c097..60e3ae5d4e48 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -35,3 +35,5 @@ CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_IPV6_SIT=m
CONFIG_BPF_JIT=y
+CONFIG_BPF_LSM=y
+CONFIG_SECURITY=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 8482bbc67eec..9a8f47fc0b91 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -11,6 +11,7 @@
static const unsigned int total_bytes = 10 * 1024 * 1024;
static const struct timeval timeo_sec = { .tv_sec = 10 };
static const size_t timeo_optlen = sizeof(timeo_sec);
+static int expected_stg = 0xeB9F;
static int stop, duration;
static int settimeo(int fd)
@@ -88,7 +89,7 @@ done:
return NULL;
}
-static void do_test(const char *tcp_ca)
+static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
{
struct sockaddr_in6 sa6 = {};
ssize_t nr_recv = 0, bytes = 0;
@@ -126,14 +127,34 @@ static void do_test(const char *tcp_ca)
err = listen(lfd, 1);
if (CHECK(err == -1, "listen", "errno:%d\n", errno))
goto done;
- err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
- if (CHECK(err != 0, "pthread_create", "err:%d\n", err))
- goto done;
+
+ if (sk_stg_map) {
+ err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
+ &expected_stg, BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
+ "err:%d errno:%d\n", err, errno))
+ goto done;
+ }
/* connect to server */
err = connect(fd, (struct sockaddr *)&sa6, addrlen);
if (CHECK(err == -1, "connect", "errno:%d\n", errno))
- goto wait_thread;
+ goto done;
+
+ if (sk_stg_map) {
+ int tmp_stg;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
+ &tmp_stg);
+ if (CHECK(!err || errno != ENOENT,
+ "bpf_map_lookup_elem(sk_stg_map)",
+ "err:%d errno:%d\n", err, errno))
+ goto done;
+ }
+
+ err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
+ if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+ goto done;
/* recv total_bytes */
while (bytes < total_bytes && !READ_ONCE(stop)) {
@@ -149,7 +170,6 @@ static void do_test(const char *tcp_ca)
CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
bytes, total_bytes, nr_recv, errno);
-wait_thread:
WRITE_ONCE(stop, 1);
pthread_join(srv_thread, &thread_ret);
CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
@@ -175,7 +195,7 @@ static void test_cubic(void)
return;
}
- do_test("bpf_cubic");
+ do_test("bpf_cubic", NULL);
bpf_link__destroy(link);
bpf_cubic__destroy(cubic_skel);
@@ -197,7 +217,10 @@ static void test_dctcp(void)
return;
}
- do_test("bpf_dctcp");
+ do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
+ CHECK(dctcp_skel->bss->stg_result != expected_stg,
+ "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
+ dctcp_skel->bss->stg_result, expected_stg);
bpf_link__destroy(link);
bpf_dctcp__destroy(dctcp_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 7390d3061065..cb33a7ee4e04 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -125,6 +125,6 @@ void test_btf_dump() {
if (!test__start_subtest(t->name))
continue;
- test_btf_dump_case(i, &btf_dump_test_cases[i]);
+ test_btf_dump_case(i, &btf_dump_test_cases[i]);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
index 5b13f2c6c402..70e94e783070 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -6,7 +6,7 @@
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 2ff21dbce179..139f8e82c7c6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -6,7 +6,7 @@
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int map_fd = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
index 9d8cb48b99de..9e96f8d87fea 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -8,7 +8,7 @@
#define BAR "/foo/bar/"
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(int verdict)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
new file mode 100644
index 000000000000..6e04f8d1d15b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup_link.skel.h"
+
+static __u32 duration = 0;
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+static struct test_cgroup_link *skel = NULL;
+
+int ping_and_check(int exp_calls, int exp_alt_calls)
+{
+ skel->bss->calls = 0;
+ skel->bss->alt_calls = 0;
+ CHECK_FAIL(system(PING_CMD));
+ if (CHECK(skel->bss->calls != exp_calls, "call_cnt",
+ "exp %d, got %d\n", exp_calls, skel->bss->calls))
+ return -EINVAL;
+ if (CHECK(skel->bss->alt_calls != exp_alt_calls, "alt_call_cnt",
+ "exp %d, got %d\n", exp_alt_calls, skel->bss->alt_calls))
+ return -EINVAL;
+ return 0;
+}
+
+void test_cgroup_link(void)
+{
+ struct {
+ const char *path;
+ int fd;
+ } cgs[] = {
+ { "/cg1" },
+ { "/cg1/cg2" },
+ { "/cg1/cg2/cg3" },
+ { "/cg1/cg2/cg3/cg4" },
+ };
+ int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
+ struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
+ __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+ int i = 0, err, prog_fd;
+ bool detach_legacy = false;
+
+ skel = test_cgroup_link__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "failed to open/load skeleton\n"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.egress);
+
+ err = setup_cgroup_environment();
+ if (CHECK(err, "cg_init", "failed: %d\n", err))
+ goto cleanup;
+
+ for (i = 0; i < cg_nr; i++) {
+ cgs[i].fd = create_and_get_cgroup(cgs[i].path);
+ if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd))
+ goto cleanup;
+ }
+
+ err = join_cgroup(cgs[last_cg].path);
+ if (CHECK(err, "cg_join", "fail: %d\n", err))
+ goto cleanup;
+
+ for (i = 0; i < cg_nr; i++) {
+ links[i] = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[i].fd);
+ if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
+ i, PTR_ERR(links[i])))
+ goto cleanup;
+ }
+
+ ping_and_check(cg_nr, 0);
+
+ /* query the number of effective progs and attach flags in root cg */
+ err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL,
+ &prog_cnt);
+ CHECK_FAIL(err);
+ CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+ if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt))
+ goto cleanup;
+
+ /* query the number of effective progs in last cg */
+ err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, NULL, NULL,
+ &prog_cnt);
+ CHECK_FAIL(err);
+ CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+ if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
+ cg_nr, prog_cnt))
+ goto cleanup;
+
+ /* query the effective prog IDs in last cg */
+ err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags,
+ prog_ids, &prog_cnt);
+ CHECK_FAIL(err);
+ CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+ if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
+ cg_nr, prog_cnt))
+ goto cleanup;
+ for (i = 1; i < prog_cnt; i++) {
+ CHECK(prog_ids[i - 1] != prog_ids[i], "prog_id_check",
+ "idx %d, prev id %d, cur id %d\n",
+ i, prog_ids[i - 1], prog_ids[i]);
+ }
+
+ /* detach bottom program and ping again */
+ bpf_link__destroy(links[last_cg]);
+ links[last_cg] = NULL;
+
+ ping_and_check(cg_nr - 1, 0);
+
+ /* mix in with non link-based multi-attachments */
+ err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS, BPF_F_ALLOW_MULTI);
+ if (CHECK(err, "cg_attach_legacy", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = true;
+
+ links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[last_cg].fd);
+ if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
+ PTR_ERR(links[last_cg])))
+ goto cleanup;
+
+ ping_and_check(cg_nr + 1, 0);
+
+ /* detach link */
+ bpf_link__destroy(links[last_cg]);
+ links[last_cg] = NULL;
+
+ /* detach legacy */
+ err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+ if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = false;
+
+ /* attach legacy exclusive prog attachment */
+ err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS, 0);
+ if (CHECK(err, "cg_attach_exclusive", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = true;
+
+ /* attempt to mix in with multi-attach bpf_link */
+ tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[last_cg].fd);
+ if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+ bpf_link__destroy(tmp_link);
+ goto cleanup;
+ }
+
+ ping_and_check(cg_nr, 0);
+
+ /* detach */
+ err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+ if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = false;
+
+ ping_and_check(cg_nr - 1, 0);
+
+ /* attach back link-based one */
+ links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[last_cg].fd);
+ if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
+ PTR_ERR(links[last_cg])))
+ goto cleanup;
+
+ ping_and_check(cg_nr, 0);
+
+ /* check legacy exclusive prog can't be attached */
+ err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS, 0);
+ if (CHECK(!err, "cg_attach_exclusive", "unexpected success")) {
+ bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+ goto cleanup;
+ }
+
+ /* replace BPF programs inside their links for all but first link */
+ for (i = 1; i < cg_nr; i++) {
+ err = bpf_link__update_program(links[i], skel->progs.egress_alt);
+ if (CHECK(err, "prog_upd", "link #%d\n", i))
+ goto cleanup;
+ }
+
+ ping_and_check(1, cg_nr - 1);
+
+ /* Attempt program update with wrong expected BPF program */
+ link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress_alt);
+ link_upd_opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(links[0]),
+ bpf_program__fd(skel->progs.egress_alt),
+ &link_upd_opts);
+ if (CHECK(err == 0 || errno != EPERM, "prog_cmpxchg1",
+ "unexpectedly succeeded, err %d, errno %d\n", err, -errno))
+ goto cleanup;
+
+ /* Compare-exchange single link program from egress to egress_alt */
+ link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress);
+ link_upd_opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(links[0]),
+ bpf_program__fd(skel->progs.egress_alt),
+ &link_upd_opts);
+ if (CHECK(err, "prog_cmpxchg2", "errno %d\n", -errno))
+ goto cleanup;
+
+ /* ping */
+ ping_and_check(0, cg_nr);
+
+ /* close cgroup FDs before detaching links */
+ for (i = 0; i < cg_nr; i++) {
+ if (cgs[i].fd > 0) {
+ close(cgs[i].fd);
+ cgs[i].fd = -1;
+ }
+ }
+
+ /* BPF programs should still get called */
+ ping_and_check(0, cg_nr);
+
+ /* leave cgroup and remove them, don't detach programs */
+ cleanup_cgroup_environment();
+
+ /* BPF programs should have been auto-detached */
+ ping_and_check(0, 0);
+
+cleanup:
+ if (detach_legacy)
+ bpf_prog_detach2(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS);
+
+ for (i = 0; i < cg_nr; i++) {
+ if (!IS_ERR(links[i]))
+ bpf_link__destroy(links[i]);
+ }
+ test_cgroup_link__destroy(skel);
+
+ for (i = 0; i < cg_nr; i++) {
+ if (cgs[i].fd > 0)
+ close(cgs[i].fd);
+ }
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 235ac4f67f5b..83493bd5745c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,22 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
#include "fexit_test.skel.h"
void test_fentry_fexit(void)
{
- struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
struct fexit_test *fexit_skel = NULL;
__u64 *fentry_res, *fexit_res;
__u32 duration = 0, retval;
- int err, pkt_fd, i;
+ int err, prog_fd, i;
- pkt_skel = test_pkt_access__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
- return;
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto close_prog;
@@ -31,8 +26,8 @@ void test_fentry_fexit(void)
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto close_prog;
- pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
@@ -49,7 +44,6 @@ void test_fentry_fexit(void)
}
close_prog:
- test_pkt_access__destroy(pkt_skel);
fentry_test__destroy(fentry_skel);
fexit_test__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 5cc06021f27d..04ebbf1cb390 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,20 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
void test_fentry_test(void)
{
- struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
- int err, pkt_fd, i;
+ int err, prog_fd, i;
__u32 duration = 0, retval;
__u64 *result;
- pkt_skel = test_pkt_access__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
- return;
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto cleanup;
@@ -23,10 +18,10 @@ void test_fentry_test(void)
if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
goto cleanup;
- pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ prog_fd = bpf_program__fd(fentry_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
+ CHECK(err || retval, "test_run",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
@@ -39,5 +34,4 @@ void test_fentry_test(void)
cleanup:
fentry_test__destroy(fentry_skel);
- test_pkt_access__destroy(pkt_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index d2c3655dd7a3..78d7a2765c27 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,64 +1,37 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include "fexit_test.skel.h"
void test_fexit_test(void)
{
- struct bpf_prog_load_attr attr = {
- .file = "./fexit_test.o",
- };
-
- char prog_name[] = "fexit/bpf_fentry_testX";
- struct bpf_object *obj = NULL, *pkt_obj;
- int err, pkt_fd, kfree_skb_fd, i;
- struct bpf_link *link[6] = {};
- struct bpf_program *prog[6];
+ struct fexit_test *fexit_skel = NULL;
+ int err, prog_fd, i;
__u32 duration = 0, retval;
- struct bpf_map *data_map;
- const int zero = 0;
- u64 result[6];
+ __u64 *result;
- err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
- &pkt_obj, &pkt_fd);
- if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
- return;
- err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
- if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
- goto close_prog;
+ fexit_skel = fexit_test__open_and_load();
+ if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ goto cleanup;
- for (i = 0; i < 6; i++) {
- prog_name[sizeof(prog_name) - 2] = '1' + i;
- prog[i] = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
- goto close_prog;
- link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
- goto close_prog;
- }
- data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss");
- if (CHECK(!data_map, "find_data_map", "data map not found\n"))
- goto close_prog;
+ err = fexit_test__attach(fexit_skel);
+ if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ goto cleanup;
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
+ CHECK(err || retval, "test_run",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
- err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
- goto close_prog;
-
- for (i = 0; i < 6; i++)
- if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
- i + 1, result[i]))
- goto close_prog;
+ result = (__u64 *)fexit_skel->bss;
+ for (i = 0; i < 6; i++) {
+ if (CHECK(result[i] != 1, "result",
+ "fexit_test%d failed err %lld\n", i + 1, result[i]))
+ goto cleanup;
+ }
-close_prog:
- for (i = 0; i < 6; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- bpf_object__close(obj);
- bpf_object__close(pkt_obj);
+cleanup:
+ fexit_test__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index eba9a970703b..925722217edf 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -82,6 +82,7 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
void test_get_stack_raw_tp(void)
{
const char *file = "./test_get_stack_rawtp.o";
+ const char *file_err = "./test_get_stack_rawtp_err.o";
const char *prog_name = "raw_tracepoint/sys_enter";
int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP;
struct perf_buffer_opts pb_opts = {};
@@ -93,6 +94,10 @@ void test_get_stack_raw_tp(void)
struct bpf_map *map;
cpu_set_t cpu_set;
+ err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
new file mode 100644
index 000000000000..3bdaa5a40744
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_global_data_init(void)
+{
+ const char *file = "./test_global_data.o";
+ int err = -ENOMEM, map_fd, zero = 0;
+ __u8 *buff = NULL, *newval = NULL;
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ __u32 duration = 0;
+ size_t sz;
+
+ obj = bpf_object__open_file(file, NULL);
+ if (CHECK_FAIL(!obj))
+ return;
+
+ map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
+ if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
+ goto out;
+
+ sz = bpf_map__def(map)->value_size;
+ newval = malloc(sz);
+ if (CHECK_FAIL(!newval))
+ goto out;
+
+ memset(newval, 0, sz);
+ /* wrong size, should fail */
+ err = bpf_map__set_initial_value(map, newval, sz - 1);
+ if (CHECK(!err, "reject set initial value wrong size", "err %d\n", err))
+ goto out;
+
+ err = bpf_map__set_initial_value(map, newval, sz);
+ if (CHECK(err, "set initial value", "err %d\n", err))
+ goto out;
+
+ err = bpf_object__load(obj);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ map_fd = bpf_map__fd(map);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ buff = malloc(sz);
+ if (buff)
+ err = bpf_map_lookup_elem(map_fd, &zero, buff);
+ if (CHECK(!buff || err || memcmp(buff, newval, sz),
+ "compare .rodata map data override",
+ "err %d errno %d\n", err, errno))
+ goto out;
+
+ memset(newval, 1, sz);
+ /* object loaded - should fail */
+ err = bpf_map__set_initial_value(map, newval, sz);
+ CHECK(!err, "reject set initial value after load", "err %d\n", err);
+out:
+ free(buff);
+ free(newval);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
new file mode 100644
index 000000000000..a743288cf384
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <sys/stat.h>
+
+#include "test_link_pinning.skel.h"
+
+static int duration = 0;
+
+void test_link_pinning_subtest(struct bpf_program *prog,
+ struct test_link_pinning__bss *bss)
+{
+ const char *link_pin_path = "/sys/fs/bpf/pinned_link_test";
+ struct stat statbuf = {};
+ struct bpf_link *link;
+ int err, i;
+
+ link = bpf_program__attach(prog);
+ if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+
+ bss->in = 1;
+ usleep(1);
+ CHECK(bss->out != 1, "res_check1", "exp %d, got %d\n", 1, bss->out);
+
+ /* pin link */
+ err = bpf_link__pin(link, link_pin_path);
+ if (CHECK(err, "link_pin", "err: %d\n", err))
+ goto cleanup;
+
+ CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path1",
+ "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link));
+
+ /* check that link was pinned */
+ err = stat(link_pin_path, &statbuf);
+ if (CHECK(err, "stat_link", "err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ bss->in = 2;
+ usleep(1);
+ CHECK(bss->out != 2, "res_check2", "exp %d, got %d\n", 2, bss->out);
+
+ /* destroy link, pinned link should keep program attached */
+ bpf_link__destroy(link);
+ link = NULL;
+
+ bss->in = 3;
+ usleep(1);
+ CHECK(bss->out != 3, "res_check3", "exp %d, got %d\n", 3, bss->out);
+
+ /* re-open link from BPFFS */
+ link = bpf_link__open(link_pin_path);
+ if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+
+ CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
+ "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link));
+
+ /* unpin link from BPFFS, program still attached */
+ err = bpf_link__unpin(link);
+ if (CHECK(err, "link_unpin", "err: %d\n", err))
+ goto cleanup;
+
+ /* still active, as we have FD open now */
+ bss->in = 4;
+ usleep(1);
+ CHECK(bss->out != 4, "res_check4", "exp %d, got %d\n", 4, bss->out);
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* Validate it's finally detached.
+ * Actual detachment might get delayed a bit, so there is no reliable
+ * way to validate it immediately here, let's count up for long enough
+ * and see if eventually output stops being updated
+ */
+ for (i = 5; i < 10000; i++) {
+ bss->in = i;
+ usleep(1);
+ if (bss->out == i - 1)
+ break;
+ }
+ CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
+
+cleanup:
+ if (!IS_ERR(link))
+ bpf_link__destroy(link);
+}
+
+void test_link_pinning(void)
+{
+ struct test_link_pinning* skel;
+
+ skel = test_link_pinning__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ if (test__start_subtest("pin_raw_tp"))
+ test_link_pinning_subtest(skel->progs.raw_tp_prog, skel->bss);
+ if (test__start_subtest("pin_tp_btf"))
+ test_link_pinning_subtest(skel->progs.tp_btf_prog, skel->bss);
+
+ test_link_pinning__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
new file mode 100644
index 000000000000..97fec70c600b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include "modify_return.skel.h"
+
+#define LOWER(x) ((x) & 0xffff)
+#define UPPER(x) ((x) >> 16)
+
+
+static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
+{
+ struct modify_return *skel = NULL;
+ int err, prog_fd;
+ __u32 duration = 0, retval;
+ __u16 side_effect;
+ __s16 ret;
+
+ skel = modify_return__open_and_load();
+ if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
+ goto cleanup;
+
+ err = modify_return__attach(skel);
+ if (CHECK(err, "modify_return", "attach failed: %d\n", err))
+ goto cleanup;
+
+ skel->bss->input_retval = input_retval;
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
+ &retval, &duration);
+
+ CHECK(err, "test_run", "err %d errno %d\n", err, errno);
+
+ side_effect = UPPER(retval);
+ ret = LOWER(retval);
+
+ CHECK(ret != want_ret, "test_run",
+ "unexpected ret: %d, expected: %d\n", ret, want_ret);
+ CHECK(side_effect != want_side_effect, "modify_return",
+ "unexpected side_effect: %d\n", side_effect);
+
+ CHECK(skel->bss->fentry_result != 1, "modify_return",
+ "fentry failed\n");
+ CHECK(skel->bss->fexit_result != 1, "modify_return",
+ "fexit failed\n");
+ CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
+ "fmod_ret failed\n");
+
+cleanup:
+ modify_return__destroy(skel);
+}
+
+void test_modify_return(void)
+{
+ run_test(0 /* input_retval */,
+ 1 /* want_side_effect */,
+ 4 /* want_ret */);
+ run_test(-EINVAL /* input_retval */,
+ 0 /* want_side_effect */,
+ -EINVAL /* want_ret */);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
new file mode 100644
index 000000000000..542240e16564
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
+#include <test_progs.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+struct bss {
+ __u64 dev;
+ __u64 ino;
+ __u64 pid_tgid;
+ __u64 user_pid_tgid;
+};
+
+void test_ns_current_pid_tgid(void)
+{
+ const char *probe_name = "raw_tracepoint/sys_enter";
+ const char *file = "test_ns_current_pid_tgid.o";
+ int err, key = 0, duration = 0;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_map *bss_map;
+ struct bpf_object *obj;
+ struct bss bss;
+ struct stat st;
+ __u64 id;
+
+ obj = bpf_object__open_file(file, NULL);
+ if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ return;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
+ if (CHECK(!bss_map, "find_bss_map", "failed\n"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_title(obj, probe_name);
+ if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
+ probe_name))
+ goto cleanup;
+
+ memset(&bss, 0, sizeof(bss));
+ pid_t tid = syscall(SYS_gettid);
+ pid_t pid = getpid();
+
+ id = (__u64) tid << 32 | pid;
+ bss.user_pid_tgid = id;
+
+ if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) {
+ perror("Failed to stat /proc/self/ns/pid");
+ goto cleanup;
+ }
+
+ bss.dev = st.st_dev;
+ bss.ino = st.st_ino;
+
+ err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
+ if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err))
+ goto cleanup;
+
+ link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+ if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
+ PTR_ERR(link))) {
+ link = NULL;
+ goto cleanup;
+ }
+
+ /* trigger some syscalls */
+ usleep(1);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
+ if (CHECK(err, "set_bss", "failed to get bss : %d\n", err))
+ goto cleanup;
+
+ if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
+ "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
+ goto cleanup;
+cleanup:
+ if (!link) {
+ bpf_link__destroy(link);
+ link = NULL;
+ }
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
new file mode 100644
index 000000000000..e35c444902a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include "bpf/libbpf_internal.h"
+#include "test_perf_branches.skel.h"
+
+static void check_good_sample(struct test_perf_branches *skel)
+{
+ int written_global = skel->bss->written_global_out;
+ int required_size = skel->bss->required_size_out;
+ int written_stack = skel->bss->written_stack_out;
+ int pbe_size = sizeof(struct perf_branch_entry);
+ int duration = 0;
+
+ if (CHECK(!skel->bss->valid, "output not valid",
+ "no valid sample from prog"))
+ return;
+
+ /*
+ * It's hard to validate the contents of the branch entries b/c it
+ * would require some kind of disassembler and also encoding the
+ * valid jump instructions for supported architectures. So just check
+ * the easy stuff for now.
+ */
+ CHECK(required_size <= 0, "read_branches_size", "err %d\n", required_size);
+ CHECK(written_stack < 0, "read_branches_stack", "err %d\n", written_stack);
+ CHECK(written_stack % pbe_size != 0, "read_branches_stack",
+ "stack bytes written=%d not multiple of struct size=%d\n",
+ written_stack, pbe_size);
+ CHECK(written_global < 0, "read_branches_global", "err %d\n", written_global);
+ CHECK(written_global % pbe_size != 0, "read_branches_global",
+ "global bytes written=%d not multiple of struct size=%d\n",
+ written_global, pbe_size);
+ CHECK(written_global < written_stack, "read_branches_size",
+ "written_global=%d < written_stack=%d\n", written_global, written_stack);
+}
+
+static void check_bad_sample(struct test_perf_branches *skel)
+{
+ int written_global = skel->bss->written_global_out;
+ int required_size = skel->bss->required_size_out;
+ int written_stack = skel->bss->written_stack_out;
+ int duration = 0;
+
+ if (CHECK(!skel->bss->valid, "output not valid",
+ "no valid sample from prog"))
+ return;
+
+ CHECK((required_size != -EINVAL && required_size != -ENOENT),
+ "read_branches_size", "err %d\n", required_size);
+ CHECK((written_stack != -EINVAL && written_stack != -ENOENT),
+ "read_branches_stack", "written %d\n", written_stack);
+ CHECK((written_global != -EINVAL && written_global != -ENOENT),
+ "read_branches_global", "written %d\n", written_global);
+}
+
+static void test_perf_branches_common(int perf_fd,
+ void (*cb)(struct test_perf_branches *))
+{
+ struct test_perf_branches *skel;
+ int err, i, duration = 0;
+ bool detached = false;
+ struct bpf_link *link;
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+
+ skel = test_perf_branches__open_and_load();
+ if (CHECK(!skel, "test_perf_branches_load",
+ "perf_branches skeleton failed\n"))
+ return;
+
+ /* attach perf_event */
+ link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
+ if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+ goto out_destroy_skel;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
+ goto out_destroy;
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+
+ test_perf_branches__detach(skel);
+ detached = true;
+
+ cb(skel);
+out_destroy:
+ bpf_link__destroy(link);
+out_destroy_skel:
+ if (!detached)
+ test_perf_branches__detach(skel);
+ test_perf_branches__destroy(skel);
+}
+
+static void test_perf_branches_hw(void)
+{
+ struct perf_event_attr attr = {0};
+ int duration = 0;
+ int pfd;
+
+ /* create perf event */
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+ attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+
+ /*
+ * Some setups don't support branch records (virtual machines, !x86),
+ * so skip test in this case.
+ */
+ if (pfd == -1) {
+ if (errno == ENOENT || errno == EOPNOTSUPP) {
+ printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
+ __func__);
+ test__skip();
+ return;
+ }
+ if (CHECK(pfd < 0, "perf_event_open", "err %d errno %d\n",
+ pfd, errno))
+ return;
+ }
+
+ test_perf_branches_common(pfd, check_good_sample);
+
+ close(pfd);
+}
+
+/*
+ * Tests negative case -- run bpf_read_branch_records() on improperly configured
+ * perf event.
+ */
+static void test_perf_branches_no_hw(void)
+{
+ struct perf_event_attr attr = {0};
+ int duration = 0;
+ int pfd;
+
+ /* create perf event */
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd))
+ return;
+
+ test_perf_branches_common(pfd, check_bad_sample);
+
+ close(pfd);
+}
+
+void test_perf_branches(void)
+{
+ if (test__start_subtest("perf_branches_hw"))
+ test_perf_branches_hw();
+ if (test__start_subtest("perf_branches_no_hw"))
+ test_perf_branches_no_hw();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 0800036ed654..821b4146b7b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -36,6 +36,7 @@ static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
static __u32 expected_results[NR_RESULTS];
static int sk_fds[REUSEPORT_ARRAY_SIZE];
static int reuseport_array = -1, outer_map = -1;
+static enum bpf_map_type inner_map_type;
static int select_by_skb_data_prog;
static int saved_tcp_syncookie = -1;
static struct bpf_object *obj;
@@ -63,13 +64,15 @@ static union sa46 {
} \
})
-static int create_maps(void)
+static int create_maps(enum bpf_map_type inner_type)
{
struct bpf_create_map_attr attr = {};
+ inner_map_type = inner_type;
+
/* Creating reuseport_array */
attr.name = "reuseport_array";
- attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
+ attr.map_type = inner_type;
attr.key_size = sizeof(__u32);
attr.value_size = sizeof(__u32);
attr.max_entries = REUSEPORT_ARRAY_SIZE;
@@ -506,11 +509,6 @@ static void test_syncookie(int type, sa_family_t family)
.pass_on_failure = 0,
};
- if (type != SOCK_STREAM) {
- test__skip();
- return;
- }
-
/*
* +1 for TCP-SYN and
* +1 for the TCP-ACK (ack the syncookie)
@@ -728,12 +726,36 @@ static void cleanup_per_test(bool no_inner_map)
static void cleanup(void)
{
- if (outer_map != -1)
+ if (outer_map != -1) {
close(outer_map);
- if (reuseport_array != -1)
+ outer_map = -1;
+ }
+
+ if (reuseport_array != -1) {
close(reuseport_array);
- if (obj)
+ reuseport_array = -1;
+ }
+
+ if (obj) {
bpf_object__close(obj);
+ obj = NULL;
+ }
+
+ memset(expected_results, 0, sizeof(expected_results));
+}
+
+static const char *maptype_str(enum bpf_map_type type)
+{
+ switch (type) {
+ case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+ return "reuseport_sockarray";
+ case BPF_MAP_TYPE_SOCKMAP:
+ return "sockmap";
+ case BPF_MAP_TYPE_SOCKHASH:
+ return "sockhash";
+ default:
+ return "unknown";
+ }
}
static const char *family_str(sa_family_t family)
@@ -760,7 +782,7 @@ static const char *sotype_str(int sotype)
}
}
-#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ }
+#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
static void test_config(int sotype, sa_family_t family, bool inany)
{
@@ -768,12 +790,15 @@ static void test_config(int sotype, sa_family_t family, bool inany)
void (*fn)(int sotype, sa_family_t family);
const char *name;
bool no_inner_map;
+ int need_sotype;
} tests[] = {
- TEST_INIT(test_err_inner_map, true /* no_inner_map */),
+ TEST_INIT(test_err_inner_map,
+ .no_inner_map = true),
TEST_INIT(test_err_skb_data),
TEST_INIT(test_err_sk_select_port),
TEST_INIT(test_pass),
- TEST_INIT(test_syncookie),
+ TEST_INIT(test_syncookie,
+ .need_sotype = SOCK_STREAM),
TEST_INIT(test_pass_on_err),
TEST_INIT(test_detach_bpf),
};
@@ -781,7 +806,11 @@ static void test_config(int sotype, sa_family_t family, bool inany)
const struct test *t;
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
- snprintf(s, sizeof(s), "%s/%s %s %s",
+ if (t->need_sotype && t->need_sotype != sotype)
+ continue; /* test not compatible with socket type */
+
+ snprintf(s, sizeof(s), "%s %s/%s %s %s",
+ maptype_str(inner_map_type),
family_str(family), sotype_str(sotype),
inany ? "INANY" : "LOOPBACK", t->name);
@@ -816,13 +845,20 @@ static void test_all(void)
test_config(c->sotype, c->family, c->inany);
}
-void test_select_reuseport(void)
+void test_map_type(enum bpf_map_type mt)
{
- if (create_maps())
+ if (create_maps(mt))
goto out;
if (prepare_bpf_obj())
goto out;
+ test_all();
+out:
+ cleanup();
+}
+
+void test_select_reuseport(void)
+{
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
if (saved_tcp_fo < 0)
goto out;
@@ -835,8 +871,9 @@ void test_select_reuseport(void)
if (disable_syncookie())
goto out;
- test_all();
+ test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ test_map_type(BPF_MAP_TYPE_SOCKMAP);
+ test_map_type(BPF_MAP_TYPE_SOCKHASH);
out:
- cleanup();
restore_sysctls();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
new file mode 100644
index 000000000000..d572e1a2c297
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+// Copyright (c) 2019 Cloudflare
+// Copyright (c) 2020 Isovalent, Inc.
+/*
+ * Test that the socket assign program is able to redirect traffic towards a
+ * socket, regardless of whether the port or address destination of the traffic
+ * matches the port.
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test_progs.h"
+
+#define BIND_PORT 1234
+#define CONNECT_PORT 4321
+#define TEST_DADDR (0xC0A80203)
+#define NS_SELF "/proc/self/ns/net"
+
+static const struct timeval timeo_sec = { .tv_sec = 3 };
+static const size_t timeo_optlen = sizeof(timeo_sec);
+static int stop, duration;
+
+static bool
+configure_stack(void)
+{
+ char tc_cmd[BUFSIZ];
+
+ /* Move to a new networking namespace */
+ if (CHECK_FAIL(unshare(CLONE_NEWNET)))
+ return false;
+
+ /* Configure necessary links, routes */
+ if (CHECK_FAIL(system("ip link set dev lo up")))
+ return false;
+ if (CHECK_FAIL(system("ip route add local default dev lo")))
+ return false;
+ if (CHECK_FAIL(system("ip -6 route add local default dev lo")))
+ return false;
+
+ /* Load qdisc, BPF program */
+ if (CHECK_FAIL(system("tc qdisc add dev lo clsact")))
+ return false;
+ sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
+ "direct-action object-file ./test_sk_assign.o",
+ "section classifier/sk_assign_test",
+ (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "");
+ if (CHECK(system(tc_cmd), "BPF load failed;",
+ "run with -vv for more info\n"))
+ return false;
+
+ return true;
+}
+
+static int
+start_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+ int fd;
+
+ fd = socket(addr->sa_family, type, 0);
+ if (CHECK_FAIL(fd == -1))
+ goto out;
+ if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
+ timeo_optlen)))
+ goto close_out;
+ if (CHECK_FAIL(bind(fd, addr, len) == -1))
+ goto close_out;
+ if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
+ goto close_out;
+
+ goto out;
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int
+connect_to_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+ int fd = -1;
+
+ fd = socket(addr->sa_family, type, 0);
+ if (CHECK_FAIL(fd == -1))
+ goto out;
+ if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
+ timeo_optlen)))
+ goto close_out;
+ if (CHECK_FAIL(connect(fd, addr, len)))
+ goto close_out;
+
+ goto out;
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static in_port_t
+get_port(int fd)
+{
+ struct sockaddr_storage ss;
+ socklen_t slen = sizeof(ss);
+ in_port_t port = 0;
+
+ if (CHECK_FAIL(getsockname(fd, (struct sockaddr *)&ss, &slen)))
+ return port;
+
+ switch (ss.ss_family) {
+ case AF_INET:
+ port = ((struct sockaddr_in *)&ss)->sin_port;
+ break;
+ case AF_INET6:
+ port = ((struct sockaddr_in6 *)&ss)->sin6_port;
+ break;
+ default:
+ CHECK(1, "Invalid address family", "%d\n", ss.ss_family);
+ }
+ return port;
+}
+
+static ssize_t
+rcv_msg(int srv_client, int type)
+{
+ struct sockaddr_storage ss;
+ char buf[BUFSIZ];
+ socklen_t slen;
+
+ if (type == SOCK_STREAM)
+ return read(srv_client, &buf, sizeof(buf));
+ else
+ return recvfrom(srv_client, &buf, sizeof(buf), 0,
+ (struct sockaddr *)&ss, &slen);
+}
+
+static int
+run_test(int server_fd, const struct sockaddr *addr, socklen_t len, int type)
+{
+ int client = -1, srv_client = -1;
+ char buf[] = "testing";
+ in_port_t port;
+ int ret = 1;
+
+ client = connect_to_server(addr, len, type);
+ if (client == -1) {
+ perror("Cannot connect to server");
+ goto out;
+ }
+
+ if (type == SOCK_STREAM) {
+ srv_client = accept(server_fd, NULL, NULL);
+ if (CHECK_FAIL(srv_client == -1)) {
+ perror("Can't accept connection");
+ goto out;
+ }
+ } else {
+ srv_client = server_fd;
+ }
+ if (CHECK_FAIL(write(client, buf, sizeof(buf)) != sizeof(buf))) {
+ perror("Can't write on client");
+ goto out;
+ }
+ if (CHECK_FAIL(rcv_msg(srv_client, type) != sizeof(buf))) {
+ perror("Can't read on server");
+ goto out;
+ }
+
+ port = get_port(srv_client);
+ if (CHECK_FAIL(!port))
+ goto out;
+ /* SOCK_STREAM is connected via accept(), so the server's local address
+ * will be the CONNECT_PORT rather than the BIND port that corresponds
+ * to the listen socket. SOCK_DGRAM on the other hand is connectionless
+ * so we can't really do the same check there; the server doesn't ever
+ * create a socket with CONNECT_PORT.
+ */
+ if (type == SOCK_STREAM &&
+ CHECK(port != htons(CONNECT_PORT), "Expected", "port %u but got %u",
+ CONNECT_PORT, ntohs(port)))
+ goto out;
+ else if (type == SOCK_DGRAM &&
+ CHECK(port != htons(BIND_PORT), "Expected",
+ "port %u but got %u", BIND_PORT, ntohs(port)))
+ goto out;
+
+ ret = 0;
+out:
+ close(client);
+ if (srv_client != server_fd)
+ close(srv_client);
+ if (ret)
+ WRITE_ONCE(stop, 1);
+ return ret;
+}
+
+static void
+prepare_addr(struct sockaddr *addr, int family, __u16 port, bool rewrite_addr)
+{
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = family;
+ addr4->sin_port = htons(port);
+ if (rewrite_addr)
+ addr4->sin_addr.s_addr = htonl(TEST_DADDR);
+ else
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = family;
+ addr6->sin6_port = htons(port);
+ addr6->sin6_addr = in6addr_loopback;
+ if (rewrite_addr)
+ addr6->sin6_addr.s6_addr32[3] = htonl(TEST_DADDR);
+ break;
+ default:
+ fprintf(stderr, "Invalid family %d", family);
+ }
+}
+
+struct test_sk_cfg {
+ const char *name;
+ int family;
+ struct sockaddr *addr;
+ socklen_t len;
+ int type;
+ bool rewrite_addr;
+};
+
+#define TEST(NAME, FAMILY, TYPE, REWRITE) \
+{ \
+ .name = NAME, \
+ .family = FAMILY, \
+ .addr = (FAMILY == AF_INET) ? (struct sockaddr *)&addr4 \
+ : (struct sockaddr *)&addr6, \
+ .len = (FAMILY == AF_INET) ? sizeof(addr4) : sizeof(addr6), \
+ .type = TYPE, \
+ .rewrite_addr = REWRITE, \
+}
+
+void test_sk_assign(void)
+{
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+ struct test_sk_cfg tests[] = {
+ TEST("ipv4 tcp port redir", AF_INET, SOCK_STREAM, false),
+ TEST("ipv4 tcp addr redir", AF_INET, SOCK_STREAM, true),
+ TEST("ipv6 tcp port redir", AF_INET6, SOCK_STREAM, false),
+ TEST("ipv6 tcp addr redir", AF_INET6, SOCK_STREAM, true),
+ TEST("ipv4 udp port redir", AF_INET, SOCK_DGRAM, false),
+ TEST("ipv4 udp addr redir", AF_INET, SOCK_DGRAM, true),
+ TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false),
+ TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
+ };
+ int server = -1;
+ int self_net;
+
+ self_net = open(NS_SELF, O_RDONLY);
+ if (CHECK_FAIL(self_net < 0)) {
+ perror("Unable to open "NS_SELF);
+ return;
+ }
+
+ if (!configure_stack()) {
+ perror("configure_stack");
+ goto cleanup;
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
+ struct test_sk_cfg *test = &tests[i];
+ const struct sockaddr *addr;
+
+ if (!test__start_subtest(test->name))
+ continue;
+ prepare_addr(test->addr, test->family, BIND_PORT, false);
+ addr = (const struct sockaddr *)test->addr;
+ server = start_server(addr, test->len, test->type);
+ if (server == -1)
+ goto cleanup;
+
+ /* connect to unbound ports */
+ prepare_addr(test->addr, test->family, CONNECT_PORT,
+ test->rewrite_addr);
+ if (run_test(server, addr, test->len, test->type))
+ goto close;
+
+ close(server);
+ server = -1;
+ }
+
+close:
+ close(server);
+cleanup:
+ if (CHECK_FAIL(setns(self_net, CLONE_NEWNET)))
+ perror("Failed to setns("NS_SELF")");
+ close(self_net);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index c6d6b685a946..4538bd08203f 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -14,6 +14,7 @@ void test_skb_ctx(void)
.wire_len = 100,
.gso_segs = 8,
.mark = 9,
+ .gso_size = 10,
};
struct bpf_prog_test_run_attr tattr = {
.data_in = &pkt_v4,
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
new file mode 100644
index 000000000000..06b86addc181
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+/*
+ * Tests for sockmap/sockhash holding kTLS sockets.
+ */
+
+#include "test_progs.h"
+
+#define MAX_TEST_NAME 80
+#define TCP_ULP 31
+
+static int tcp_server(int family)
+{
+ int err, s;
+
+ s = socket(family, SOCK_STREAM, 0);
+ if (CHECK_FAIL(s == -1)) {
+ perror("socket");
+ return -1;
+ }
+
+ err = listen(s, SOMAXCONN);
+ if (CHECK_FAIL(err)) {
+ perror("listen");
+ return -1;
+ }
+
+ return s;
+}
+
+static int disconnect(int fd)
+{
+ struct sockaddr unspec = { AF_UNSPEC };
+
+ return connect(fd, &unspec, sizeof(unspec));
+}
+
+/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
+static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
+{
+ struct sockaddr_storage addr = {0};
+ socklen_t len = sizeof(addr);
+ int err, cli, srv, zero = 0;
+
+ srv = tcp_server(family);
+ if (srv == -1)
+ return;
+
+ err = getsockname(srv, (struct sockaddr *)&addr, &len);
+ if (CHECK_FAIL(err)) {
+ perror("getsockopt");
+ goto close_srv;
+ }
+
+ cli = socket(family, SOCK_STREAM, 0);
+ if (CHECK_FAIL(cli == -1)) {
+ perror("socket");
+ goto close_srv;
+ }
+
+ err = connect(cli, (struct sockaddr *)&addr, len);
+ if (CHECK_FAIL(err)) {
+ perror("connect");
+ goto close_cli;
+ }
+
+ err = bpf_map_update_elem(map, &zero, &cli, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_map_update_elem");
+ goto close_cli;
+ }
+
+ err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (CHECK_FAIL(err)) {
+ perror("setsockopt(TCP_ULP)");
+ goto close_cli;
+ }
+
+ err = bpf_map_delete_elem(map, &zero);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_map_delete_elem");
+ goto close_cli;
+ }
+
+ err = disconnect(cli);
+ if (CHECK_FAIL(err))
+ perror("disconnect");
+
+close_cli:
+ close(cli);
+close_srv:
+ close(srv);
+}
+
+static void run_tests(int family, enum bpf_map_type map_type)
+{
+ char test_name[MAX_TEST_NAME];
+ int map;
+
+ map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
+ if (CHECK_FAIL(map == -1)) {
+ perror("bpf_map_create");
+ return;
+ }
+
+ snprintf(test_name, MAX_TEST_NAME,
+ "sockmap_ktls disconnect_after_delete %s %s",
+ family == AF_INET ? "IPv4" : "IPv6",
+ map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH");
+ if (!test__start_subtest(test_name))
+ return;
+
+ test_sockmap_ktls_disconnect_after_delete(family, map);
+
+ close(map);
+}
+
+void test_sockmap_ktls(void)
+{
+ run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
+ run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
+ run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
+ run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
new file mode 100644
index 000000000000..d7d65a700799
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -0,0 +1,1635 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+/*
+ * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
+ * Covers:
+ * 1. BPF map operations - bpf_map_{update,lookup delete}_elem
+ * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
+ * 3. BPF reuseport helper - bpf_sk_select_reuseport
+ */
+
+#include <linux/compiler.h>
+#include <errno.h>
+#include <error.h>
+#include <limits.h>
+#include <netinet/in.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/select.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "test_progs.h"
+#include "test_sockmap_listen.skel.h"
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+#define MAX_TEST_NAME 80
+
+#define _FAIL(errnum, fmt...) \
+ ({ \
+ error_at_line(0, (errnum), __func__, __LINE__, fmt); \
+ CHECK_FAIL(true); \
+ })
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg) \
+ ({ \
+ char __buf[MAX_STRERR_LEN]; \
+ libbpf_strerror((err), __buf, sizeof(__buf)); \
+ FAIL("%s: %s", (msg), __buf); \
+ })
+
+/* Wrappers that fail the test on error and report it. */
+
+#define xaccept_nonblock(fd, addr, len) \
+ ({ \
+ int __ret = \
+ accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("accept"); \
+ __ret; \
+ })
+
+#define xbind(fd, addr, len) \
+ ({ \
+ int __ret = bind((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("bind"); \
+ __ret; \
+ })
+
+#define xclose(fd) \
+ ({ \
+ int __ret = close((fd)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("close"); \
+ __ret; \
+ })
+
+#define xconnect(fd, addr, len) \
+ ({ \
+ int __ret = connect((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("connect"); \
+ __ret; \
+ })
+
+#define xgetsockname(fd, addr, len) \
+ ({ \
+ int __ret = getsockname((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockname"); \
+ __ret; \
+ })
+
+#define xgetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = getsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xlisten(fd, backlog) \
+ ({ \
+ int __ret = listen((fd), (backlog)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("listen"); \
+ __ret; \
+ })
+
+#define xsetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = setsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("setsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xsend(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = send((fd), (buf), (len), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("send"); \
+ __ret; \
+ })
+
+#define xrecv_nonblock(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
+ IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("recv"); \
+ __ret; \
+ })
+
+#define xsocket(family, sotype, flags) \
+ ({ \
+ int __ret = socket(family, sotype, flags); \
+ if (__ret == -1) \
+ FAIL_ERRNO("socket"); \
+ __ret; \
+ })
+
+#define xbpf_map_delete_elem(fd, key) \
+ ({ \
+ int __ret = bpf_map_delete_elem((fd), (key)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("map_delete"); \
+ __ret; \
+ })
+
+#define xbpf_map_lookup_elem(fd, key, val) \
+ ({ \
+ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("map_lookup"); \
+ __ret; \
+ })
+
+#define xbpf_map_update_elem(fd, key, val, flags) \
+ ({ \
+ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("map_update"); \
+ __ret; \
+ })
+
+#define xbpf_prog_attach(prog, target, type, flags) \
+ ({ \
+ int __ret = \
+ bpf_prog_attach((prog), (target), (type), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("prog_attach(" #type ")"); \
+ __ret; \
+ })
+
+#define xbpf_prog_detach2(prog, target, type) \
+ ({ \
+ int __ret = bpf_prog_detach2((prog), (target), (type)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("prog_detach2(" #type ")"); \
+ __ret; \
+ })
+
+#define xpthread_create(thread, attr, func, arg) \
+ ({ \
+ int __ret = pthread_create((thread), (attr), (func), (arg)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_create"); \
+ __ret; \
+ })
+
+#define xpthread_join(thread, retval) \
+ ({ \
+ int __ret = pthread_join((thread), (retval)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_join"); \
+ __ret; \
+ })
+
+static int poll_read(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set rfds;
+ int r;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+
+ r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+
+ return r == 1 ? 0 : -1;
+}
+
+static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return accept(fd, addr, len);
+}
+
+static int recv_timeout(int fd, void *buf, size_t len, int flags,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return recv(fd, buf, len, flags);
+}
+
+static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
+{
+ struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = 0;
+ addr6->sin6_addr = in6addr_loopback;
+ *len = sizeof(*addr6);
+}
+
+static void init_addr_loopback(int family, struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ switch (family) {
+ case AF_INET:
+ init_addr_loopback4(ss, len);
+ return;
+ case AF_INET6:
+ init_addr_loopback6(ss, len);
+ return;
+ default:
+ FAIL("unsupported address family %d", family);
+ }
+}
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+ return (struct sockaddr *)ss;
+}
+
+static int enable_reuseport(int s, int progfd)
+{
+ int err, one = 1;
+
+ err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err)
+ return -1;
+ err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+ sizeof(progfd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err, s;
+
+ init_addr_loopback(family, &addr, &len);
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return -1;
+
+ if (progfd >= 0)
+ enable_reuseport(s, progfd);
+
+ err = xbind(s, sockaddr(&addr), len);
+ if (err)
+ goto close;
+
+ if (sotype & SOCK_DGRAM)
+ return s;
+
+ err = xlisten(s, SOMAXCONN);
+ if (err)
+ goto close;
+
+ return s;
+close:
+ xclose(s);
+ return -1;
+}
+
+static int socket_loopback(int family, int sotype)
+{
+ return socket_loopback_reuseport(family, sotype, -1);
+}
+
+static void test_insert_invalid(int family, int sotype, int mapfd)
+{
+ u32 key = 0;
+ u64 value;
+ int err;
+
+ value = -1;
+ err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ if (!err || errno != EINVAL)
+ FAIL_ERRNO("map_update: expected EINVAL");
+
+ value = INT_MAX;
+ err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ if (!err || errno != EBADF)
+ FAIL_ERRNO("map_update: expected EBADF");
+}
+
+static void test_insert_opened(int family, int sotype, int mapfd)
+{
+ u32 key = 0;
+ u64 value;
+ int err, s;
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return;
+
+ errno = 0;
+ value = s;
+ err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ if (!err || errno != EOPNOTSUPP)
+ FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+
+ xclose(s);
+}
+
+static void test_insert_bound(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ u32 key = 0;
+ u64 value;
+ int err, s;
+
+ init_addr_loopback(family, &addr, &len);
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return;
+
+ err = xbind(s, sockaddr(&addr), len);
+ if (err)
+ goto close;
+
+ errno = 0;
+ value = s;
+ err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ if (!err || errno != EOPNOTSUPP)
+ FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+close:
+ xclose(s);
+}
+
+static void test_insert(int family, int sotype, int mapfd)
+{
+ u64 value;
+ u32 key;
+ int s;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ xclose(s);
+}
+
+static void test_delete_after_insert(int family, int sotype, int mapfd)
+{
+ u64 value;
+ u32 key;
+ int s;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ xbpf_map_delete_elem(mapfd, &key);
+ xclose(s);
+}
+
+static void test_delete_after_close(int family, int sotype, int mapfd)
+{
+ int err, s;
+ u64 value;
+ u32 key;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+ xclose(s);
+
+ errno = 0;
+ err = bpf_map_delete_elem(mapfd, &key);
+ if (!err || (errno != EINVAL && errno != ENOENT))
+ /* SOCKMAP and SOCKHASH return different error codes */
+ FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
+}
+
+static void test_lookup_after_insert(int family, int sotype, int mapfd)
+{
+ u64 cookie, value;
+ socklen_t len;
+ u32 key;
+ int s;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+ len = sizeof(cookie);
+ xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
+
+ xbpf_map_lookup_elem(mapfd, &key, &value);
+
+ if (value != cookie) {
+ FAIL("map_lookup: have %#llx, want %#llx",
+ (unsigned long long)value, (unsigned long long)cookie);
+ }
+
+ xclose(s);
+}
+
+static void test_lookup_after_delete(int family, int sotype, int mapfd)
+{
+ int err, s;
+ u64 value;
+ u32 key;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ xbpf_map_delete_elem(mapfd, &key);
+
+ errno = 0;
+ err = bpf_map_lookup_elem(mapfd, &key, &value);
+ if (!err || errno != ENOENT)
+ FAIL_ERRNO("map_lookup: expected ENOENT");
+
+ xclose(s);
+}
+
+static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
+{
+ u32 key, value32;
+ int err, s;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
+ sizeof(value32), 1, 0);
+ if (mapfd < 0) {
+ FAIL_ERRNO("map_create");
+ goto close;
+ }
+
+ key = 0;
+ value32 = s;
+ xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
+
+ errno = 0;
+ err = bpf_map_lookup_elem(mapfd, &key, &value32);
+ if (!err || errno != ENOSPC)
+ FAIL_ERRNO("map_lookup: expected ENOSPC");
+
+ xclose(mapfd);
+close:
+ xclose(s);
+}
+
+static void test_update_existing(int family, int sotype, int mapfd)
+{
+ int s1, s2;
+ u64 value;
+ u32 key;
+
+ s1 = socket_loopback(family, sotype);
+ if (s1 < 0)
+ return;
+
+ s2 = socket_loopback(family, sotype);
+ if (s2 < 0)
+ goto close_s1;
+
+ key = 0;
+ value = s1;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+ value = s2;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
+ xclose(s2);
+close_s1:
+ xclose(s1);
+}
+
+/* Exercise the code path where we destroy child sockets that never
+ * got accept()'ed, aka orphans, when parent socket gets closed.
+ */
+static void test_destroy_orphan_child(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err, s, c;
+ u64 value;
+ u32 key;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+ c = xsocket(family, sotype, 0);
+ if (c == -1)
+ goto close_srv;
+
+ xconnect(c, sockaddr(&addr), len);
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+/* Perform a passive open after removing listening socket from SOCKMAP
+ * to ensure that callbacks get restored properly.
+ */
+static void test_clone_after_delete(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err, s, c;
+ u64 value;
+ u32 key;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ xbpf_map_delete_elem(mapfd, &key);
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ goto close_srv;
+
+ xconnect(c, sockaddr(&addr), len);
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+/* Check that child socket that got created while parent was in a
+ * SOCKMAP, but got accept()'ed only after the parent has been removed
+ * from SOCKMAP, gets cloned without parent psock state or callbacks.
+ */
+static void test_accept_after_delete(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ const u32 zero = 0;
+ int err, s, c, p;
+ socklen_t len;
+ u64 value;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s == -1)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ value = s;
+ err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv;
+
+ c = xsocket(family, sotype, 0);
+ if (c == -1)
+ goto close_srv;
+
+ /* Create child while parent is in sockmap */
+ err = xconnect(c, sockaddr(&addr), len);
+ if (err)
+ goto close_cli;
+
+ /* Remove parent from sockmap */
+ err = xbpf_map_delete_elem(mapfd, &zero);
+ if (err)
+ goto close_cli;
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p == -1)
+ goto close_cli;
+
+ /* Check that child sk_user_data is not set */
+ value = p;
+ xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+
+ xclose(p);
+close_cli:
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+/* Check that child socket that got created and accepted while parent
+ * was in a SOCKMAP is cloned without parent psock state or callbacks.
+ */
+static void test_accept_before_delete(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ const u32 zero = 0, one = 1;
+ int err, s, c, p;
+ socklen_t len;
+ u64 value;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s == -1)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ value = s;
+ err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv;
+
+ c = xsocket(family, sotype, 0);
+ if (c == -1)
+ goto close_srv;
+
+ /* Create & accept child while parent is in sockmap */
+ err = xconnect(c, sockaddr(&addr), len);
+ if (err)
+ goto close_cli;
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p == -1)
+ goto close_cli;
+
+ /* Check that child sk_user_data is not set */
+ value = p;
+ xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
+
+ xclose(p);
+close_cli:
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+struct connect_accept_ctx {
+ int sockfd;
+ unsigned int done;
+ unsigned int nr_iter;
+};
+
+static bool is_thread_done(struct connect_accept_ctx *ctx)
+{
+ return READ_ONCE(ctx->done);
+}
+
+static void *connect_accept_thread(void *arg)
+{
+ struct connect_accept_ctx *ctx = arg;
+ struct sockaddr_storage addr;
+ int family, socktype;
+ socklen_t len;
+ int err, i, s;
+
+ s = ctx->sockfd;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto done;
+
+ len = sizeof(family);
+ err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
+ if (err)
+ goto done;
+
+ len = sizeof(socktype);
+ err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
+ if (err)
+ goto done;
+
+ for (i = 0; i < ctx->nr_iter; i++) {
+ int c, p;
+
+ c = xsocket(family, socktype, 0);
+ if (c < 0)
+ break;
+
+ err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
+ if (err) {
+ xclose(c);
+ break;
+ }
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0) {
+ xclose(c);
+ break;
+ }
+
+ xclose(p);
+ xclose(c);
+ }
+done:
+ WRITE_ONCE(ctx->done, 1);
+ return NULL;
+}
+
+static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
+{
+ struct connect_accept_ctx ctx = { 0 };
+ struct sockaddr_storage addr;
+ socklen_t len;
+ u32 zero = 0;
+ pthread_t t;
+ int err, s;
+ u64 value;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close;
+
+ ctx.sockfd = s;
+ ctx.nr_iter = 1000;
+
+ err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
+ if (err)
+ goto close;
+
+ value = s;
+ while (!is_thread_done(&ctx)) {
+ err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+ if (err)
+ break;
+
+ err = xbpf_map_delete_elem(mapfd, &zero);
+ if (err)
+ break;
+ }
+
+ xpthread_join(t, NULL);
+close:
+ xclose(s);
+}
+
+static void *listen_thread(void *arg)
+{
+ struct sockaddr unspec = { AF_UNSPEC };
+ struct connect_accept_ctx *ctx = arg;
+ int err, i, s;
+
+ s = ctx->sockfd;
+
+ for (i = 0; i < ctx->nr_iter; i++) {
+ err = xlisten(s, 1);
+ if (err)
+ break;
+ err = xconnect(s, &unspec, sizeof(unspec));
+ if (err)
+ break;
+ }
+
+ WRITE_ONCE(ctx->done, 1);
+ return NULL;
+}
+
+static void test_race_insert_listen(int family, int socktype, int mapfd)
+{
+ struct connect_accept_ctx ctx = { 0 };
+ const u32 zero = 0;
+ const int one = 1;
+ pthread_t t;
+ int err, s;
+ u64 value;
+
+ s = xsocket(family, socktype, 0);
+ if (s < 0)
+ return;
+
+ err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ if (err)
+ goto close;
+
+ ctx.sockfd = s;
+ ctx.nr_iter = 10000;
+
+ err = pthread_create(&t, NULL, listen_thread, &ctx);
+ if (err)
+ goto close;
+
+ value = s;
+ while (!is_thread_done(&ctx)) {
+ err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+ /* Expecting EOPNOTSUPP before listen() */
+ if (err && errno != EOPNOTSUPP) {
+ FAIL_ERRNO("map_update");
+ break;
+ }
+
+ err = bpf_map_delete_elem(mapfd, &zero);
+ /* Expecting no entry after unhash on connect(AF_UNSPEC) */
+ if (err && errno != EINVAL && errno != ENOENT) {
+ FAIL_ERRNO("map_delete");
+ break;
+ }
+ }
+
+ xpthread_join(t, NULL);
+close:
+ xclose(s);
+}
+
+static void zero_verdict_count(int mapfd)
+{
+ unsigned int zero = 0;
+ int key;
+
+ key = SK_DROP;
+ xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
+ key = SK_PASS;
+ xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
+}
+
+enum redir_mode {
+ REDIR_INGRESS,
+ REDIR_EGRESS,
+};
+
+static const char *redir_mode_str(enum redir_mode mode)
+{
+ switch (mode) {
+ case REDIR_INGRESS:
+ return "ingress";
+ case REDIR_EGRESS:
+ return "egress";
+ default:
+ return "unknown";
+ }
+}
+
+static void redir_to_connected(int family, int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ struct sockaddr_storage addr;
+ int s, c0, c1, p0, p1;
+ unsigned int pass;
+ socklen_t len;
+ int err, n;
+ u64 value;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ c0 = xsocket(family, sotype, 0);
+ if (c0 < 0)
+ goto close_srv;
+ err = xconnect(c0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+
+ p0 = xaccept_nonblock(s, NULL, NULL);
+ if (p0 < 0)
+ goto close_cli0;
+
+ c1 = xsocket(family, sotype, 0);
+ if (c1 < 0)
+ goto close_peer0;
+ err = xconnect(c1, sockaddr(&addr), len);
+ if (err)
+ goto close_cli1;
+
+ p1 = xaccept_nonblock(s, NULL, NULL);
+ if (p1 < 0)
+ goto close_cli1;
+
+ key = 0;
+ value = p0;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_peer1;
+
+ key = 1;
+ value = p1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_peer1;
+
+ n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close_peer1;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close_peer1;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = read(c0, &b, 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: read", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close_peer1:
+ xclose(p1);
+close_cli1:
+ xclose(c1);
+close_peer0:
+ xclose(p0);
+close_cli0:
+ xclose(c0);
+close_srv:
+ xclose(s);
+}
+
+static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return;
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ goto detach;
+
+ redir_to_connected(family, sotype, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+ xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
+static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
+ if (err)
+ return;
+
+ redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
+}
+
+static void redir_to_listening(int family, int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ struct sockaddr_storage addr;
+ int s, c, p, err, n;
+ unsigned int drop;
+ socklen_t len;
+ u64 value;
+ u32 key;
+
+ zero_verdict_count(verd_mapfd);
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ goto close_srv;
+ err = xconnect(c, sockaddr(&addr), len);
+ if (err)
+ goto close_cli;
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0)
+ goto close_cli;
+
+ key = 0;
+ value = s;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_peer;
+
+ key = 1;
+ value = p;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_peer;
+
+ n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
+ if (n < 0 && errno != EACCES)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close_peer;
+
+ key = SK_DROP;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
+ if (err)
+ goto close_peer;
+ if (drop != 1)
+ FAIL("%s: want drop count 1, have %d", log_prefix, drop);
+
+close_peer:
+ xclose(p);
+close_cli:
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return;
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ goto detach;
+
+ redir_to_listening(family, sotype, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+ xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
+static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
+ if (err)
+ return;
+
+ redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
+}
+
+static void test_reuseport_select_listening(int family, int sotype,
+ int sock_map, int verd_map,
+ int reuseport_prog)
+{
+ struct sockaddr_storage addr;
+ unsigned int pass;
+ int s, c, err;
+ socklen_t len;
+ u64 value;
+ u32 key;
+
+ zero_verdict_count(verd_map);
+
+ s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
+ reuseport_prog);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ key = 0;
+ value = s;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ goto close_srv;
+ err = xconnect(c, sockaddr(&addr), len);
+ if (err)
+ goto close_cli;
+
+ if (sotype == SOCK_STREAM) {
+ int p;
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0)
+ goto close_cli;
+ xclose(p);
+ } else {
+ char b = 'a';
+ ssize_t n;
+
+ n = xsend(c, &b, sizeof(b), 0);
+ if (n == -1)
+ goto close_cli;
+
+ n = xrecv_nonblock(s, &b, sizeof(b), 0);
+ if (n == -1)
+ goto close_cli;
+ }
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_map, &key, &pass);
+ if (err)
+ goto close_cli;
+ if (pass != 1)
+ FAIL("want pass count 1, have %d", pass);
+
+close_cli:
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+static void test_reuseport_select_connected(int family, int sotype,
+ int sock_map, int verd_map,
+ int reuseport_prog)
+{
+ struct sockaddr_storage addr;
+ int s, c0, c1, p0, err;
+ unsigned int drop;
+ socklen_t len;
+ u64 value;
+ u32 key;
+
+ zero_verdict_count(verd_map);
+
+ s = socket_loopback_reuseport(family, sotype, reuseport_prog);
+ if (s < 0)
+ return;
+
+ /* Populate sock_map[0] to avoid ENOENT on first connection */
+ key = 0;
+ value = s;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ c0 = xsocket(family, sotype, 0);
+ if (c0 < 0)
+ goto close_srv;
+
+ err = xconnect(c0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+
+ if (sotype == SOCK_STREAM) {
+ p0 = xaccept_nonblock(s, NULL, NULL);
+ if (p0 < 0)
+ goto close_cli0;
+ } else {
+ p0 = xsocket(family, sotype, 0);
+ if (p0 < 0)
+ goto close_cli0;
+
+ len = sizeof(addr);
+ err = xgetsockname(c0, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli0;
+
+ err = xconnect(p0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+ }
+
+ /* Update sock_map[0] to redirect to a connected socket */
+ key = 0;
+ value = p0;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
+ if (err)
+ goto close_peer0;
+
+ c1 = xsocket(family, sotype, 0);
+ if (c1 < 0)
+ goto close_peer0;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ errno = 0;
+ err = connect(c1, sockaddr(&addr), len);
+ if (sotype == SOCK_DGRAM) {
+ char b = 'a';
+ ssize_t n;
+
+ n = xsend(c1, &b, sizeof(b), 0);
+ if (n == -1)
+ goto close_cli1;
+
+ n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
+ err = n == -1;
+ }
+ if (!err || errno != ECONNREFUSED)
+ FAIL_ERRNO("connect: expected ECONNREFUSED");
+
+ key = SK_DROP;
+ err = xbpf_map_lookup_elem(verd_map, &key, &drop);
+ if (err)
+ goto close_cli1;
+ if (drop != 1)
+ FAIL("want drop count 1, have %d", drop);
+
+close_cli1:
+ xclose(c1);
+close_peer0:
+ xclose(p0);
+close_cli0:
+ xclose(c0);
+close_srv:
+ xclose(s);
+}
+
+/* Check that redirecting across reuseport groups is not allowed. */
+static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
+ int verd_map, int reuseport_prog)
+{
+ struct sockaddr_storage addr;
+ int s1, s2, c, err;
+ unsigned int drop;
+ socklen_t len;
+ u64 value;
+ u32 key;
+
+ zero_verdict_count(verd_map);
+
+ /* Create two listeners, each in its own reuseport group */
+ s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
+ if (s1 < 0)
+ return;
+
+ s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
+ if (s2 < 0)
+ goto close_srv1;
+
+ key = 0;
+ value = s1;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv2;
+
+ key = 1;
+ value = s2;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+
+ /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
+ len = sizeof(addr);
+ err = xgetsockname(s2, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv2;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ goto close_srv2;
+
+ err = connect(c, sockaddr(&addr), len);
+ if (sotype == SOCK_DGRAM) {
+ char b = 'a';
+ ssize_t n;
+
+ n = xsend(c, &b, sizeof(b), 0);
+ if (n == -1)
+ goto close_cli;
+
+ n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
+ err = n == -1;
+ }
+ if (!err || errno != ECONNREFUSED) {
+ FAIL_ERRNO("connect: expected ECONNREFUSED");
+ goto close_cli;
+ }
+
+ /* Expect drop, can't redirect outside of reuseport group */
+ key = SK_DROP;
+ err = xbpf_map_lookup_elem(verd_map, &key, &drop);
+ if (err)
+ goto close_cli;
+ if (drop != 1)
+ FAIL("want drop count 1, have %d", drop);
+
+close_cli:
+ xclose(c);
+close_srv2:
+ xclose(s2);
+close_srv1:
+ xclose(s1);
+}
+
+#define TEST(fn, ...) \
+ { \
+ fn, #fn, __VA_ARGS__ \
+ }
+
+static void test_ops_cleanup(const struct bpf_map *map)
+{
+ const struct bpf_map_def *def;
+ int err, mapfd;
+ u32 key;
+
+ def = bpf_map__def(map);
+ mapfd = bpf_map__fd(map);
+
+ for (key = 0; key < def->max_entries; key++) {
+ err = bpf_map_delete_elem(mapfd, &key);
+ if (err && errno != EINVAL && errno != ENOENT)
+ FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
+ }
+}
+
+static const char *family_str(sa_family_t family)
+{
+ switch (family) {
+ case AF_INET:
+ return "IPv4";
+ case AF_INET6:
+ return "IPv6";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *map_type_str(const struct bpf_map *map)
+{
+ const struct bpf_map_def *def;
+
+ def = bpf_map__def(map);
+ if (IS_ERR(def))
+ return "invalid";
+
+ switch (def->type) {
+ case BPF_MAP_TYPE_SOCKMAP:
+ return "sockmap";
+ case BPF_MAP_TYPE_SOCKHASH:
+ return "sockhash";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *sotype_str(int sotype)
+{
+ switch (sotype) {
+ case SOCK_DGRAM:
+ return "UDP";
+ case SOCK_STREAM:
+ return "TCP";
+ default:
+ return "unknown";
+ }
+}
+
+static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family, int sotype)
+{
+ const struct op_test {
+ void (*fn)(int family, int sotype, int mapfd);
+ const char *name;
+ int sotype;
+ } tests[] = {
+ /* insert */
+ TEST(test_insert_invalid),
+ TEST(test_insert_opened),
+ TEST(test_insert_bound, SOCK_STREAM),
+ TEST(test_insert),
+ /* delete */
+ TEST(test_delete_after_insert),
+ TEST(test_delete_after_close),
+ /* lookup */
+ TEST(test_lookup_after_insert),
+ TEST(test_lookup_after_delete),
+ TEST(test_lookup_32_bit_value),
+ /* update */
+ TEST(test_update_existing),
+ /* races with insert/delete */
+ TEST(test_destroy_orphan_child, SOCK_STREAM),
+ TEST(test_syn_recv_insert_delete, SOCK_STREAM),
+ TEST(test_race_insert_listen, SOCK_STREAM),
+ /* child clone */
+ TEST(test_clone_after_delete, SOCK_STREAM),
+ TEST(test_accept_after_delete, SOCK_STREAM),
+ TEST(test_accept_before_delete, SOCK_STREAM),
+ };
+ const char *family_name, *map_name, *sotype_name;
+ const struct op_test *t;
+ char s[MAX_TEST_NAME];
+ int map_fd;
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ sotype_name = sotype_str(sotype);
+ map_fd = bpf_map__fd(map);
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
+ sotype_name, t->name);
+
+ if (t->sotype != 0 && t->sotype != sotype)
+ continue;
+
+ if (!test__start_subtest(s))
+ continue;
+
+ t->fn(family, sotype, map_fd);
+ test_ops_cleanup(map);
+ }
+}
+
+static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family, int sotype)
+{
+ const struct redir_test {
+ void (*fn)(struct test_sockmap_listen *skel,
+ struct bpf_map *map, int family, int sotype);
+ const char *name;
+ } tests[] = {
+ TEST(test_skb_redir_to_connected),
+ TEST(test_skb_redir_to_listening),
+ TEST(test_msg_redir_to_connected),
+ TEST(test_msg_redir_to_listening),
+ };
+ const char *family_name, *map_name;
+ const struct redir_test *t;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
+ t->name);
+
+ if (!test__start_subtest(s))
+ continue;
+
+ t->fn(skel, map, family, sotype);
+ }
+}
+
+static void test_reuseport(struct test_sockmap_listen *skel,
+ struct bpf_map *map, int family, int sotype)
+{
+ const struct reuseport_test {
+ void (*fn)(int family, int sotype, int socket_map,
+ int verdict_map, int reuseport_prog);
+ const char *name;
+ int sotype;
+ } tests[] = {
+ TEST(test_reuseport_select_listening),
+ TEST(test_reuseport_select_connected),
+ TEST(test_reuseport_mixed_groups),
+ };
+ int socket_map, verdict_map, reuseport_prog;
+ const char *family_name, *map_name, *sotype_name;
+ const struct reuseport_test *t;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ sotype_name = sotype_str(sotype);
+
+ socket_map = bpf_map__fd(map);
+ verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
+ sotype_name, t->name);
+
+ if (t->sotype != 0 && t->sotype != sotype)
+ continue;
+
+ if (!test__start_subtest(s))
+ continue;
+
+ t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
+ }
+}
+
+static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ test_ops(skel, map, family, SOCK_STREAM);
+ test_ops(skel, map, family, SOCK_DGRAM);
+ test_redir(skel, map, family, SOCK_STREAM);
+ test_reuseport(skel, map, family, SOCK_STREAM);
+ test_reuseport(skel, map, family, SOCK_DGRAM);
+}
+
+void test_sockmap_listen(void)
+{
+ struct test_sockmap_listen *skel;
+
+ skel = test_sockmap_listen__open_and_load();
+ if (!skel) {
+ FAIL("skeleton open/load failed");
+ return;
+ }
+
+ skel->bss->test_sockmap = true;
+ run_tests(skel, skel->maps.sock_map, AF_INET);
+ run_tests(skel, skel->maps.sock_map, AF_INET6);
+
+ skel->bss->test_sockmap = false;
+ run_tests(skel, skel->maps.sock_hash, AF_INET);
+ run_tests(skel, skel->maps.sock_hash, AF_INET6);
+
+ test_sockmap_listen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index f4cd60d6fba2..e56b52ab41da 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -188,7 +188,7 @@ static int start_server(void)
};
int fd;
- fd = socket(AF_INET, SOCK_STREAM, 0);
+ fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
@@ -205,6 +205,7 @@ static int start_server(void)
static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
+static volatile bool server_done = false;
static void *server_thread(void *arg)
{
@@ -222,23 +223,24 @@ static void *server_thread(void *arg)
if (CHECK_FAIL(err < 0)) {
perror("Failed to listed on socket");
- return NULL;
+ return ERR_PTR(err);
}
- client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+ while (true) {
+ client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+ if (client_fd == -1 && errno == EAGAIN) {
+ usleep(50);
+ continue;
+ }
+ break;
+ }
if (CHECK_FAIL(client_fd < 0)) {
perror("Failed to accept client");
- return NULL;
+ return ERR_PTR(err);
}
- /* Wait for the next connection (that never arrives)
- * to keep this thread alive to prevent calling
- * close() on client_fd.
- */
- if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) {
- perror("Unexpected success in second accept");
- return NULL;
- }
+ while (!server_done)
+ usleep(50);
close(client_fd);
@@ -249,6 +251,7 @@ void test_tcp_rtt(void)
{
int server_fd, cgroup_fd;
pthread_t tid;
+ void *server_res;
cgroup_fd = test__join_cgroup("/tcp_rtt");
if (CHECK_FAIL(cgroup_fd < 0))
@@ -267,6 +270,11 @@ void test_tcp_rtt(void)
pthread_mutex_unlock(&server_started_mtx);
CHECK_FAIL(run_test(cgroup_fd, server_fd));
+
+ server_done = true;
+ CHECK_FAIL(pthread_join(tid, &server_res));
+ CHECK_FAIL(IS_ERR(server_res));
+
close_server_fd:
close(server_fd);
close_cgroup_fd:
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
new file mode 100644
index 000000000000..1e4c258de09d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <stdlib.h>
+
+#include "lsm.skel.h"
+
+char *CMD_ARGS[] = {"true", NULL};
+
+int heap_mprotect(void)
+{
+ void *buf;
+ long sz;
+ int ret;
+
+ sz = sysconf(_SC_PAGESIZE);
+ if (sz < 0)
+ return sz;
+
+ buf = memalign(sz, 2 * sz);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ ret = mprotect(buf, sz, PROT_READ | PROT_WRITE | PROT_EXEC);
+ free(buf);
+ return ret;
+}
+
+int exec_cmd(int *monitored_pid)
+{
+ int child_pid, child_status;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ *monitored_pid = getpid();
+ execvp(CMD_ARGS[0], CMD_ARGS);
+ return -EINVAL;
+ } else if (child_pid > 0) {
+ waitpid(child_pid, &child_status, 0);
+ return child_status;
+ }
+
+ return -EINVAL;
+}
+
+void test_test_lsm(void)
+{
+ struct lsm *skel = NULL;
+ int err, duration = 0;
+
+ skel = lsm__open_and_load();
+ if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+ goto close_prog;
+
+ err = lsm__attach(skel);
+ if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+ goto close_prog;
+
+ err = exec_cmd(&skel->bss->monitored_pid);
+ if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n",
+ skel->bss->bprm_count);
+
+ skel->bss->monitored_pid = getpid();
+
+ err = heap_mprotect();
+ if (CHECK(errno != EPERM, "heap_mprotect", "want errno=EPERM, got %d\n",
+ errno))
+ goto close_prog;
+
+ CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
+ "mprotect_count = %d\n", skel->bss->mprotect_count);
+
+close_prog:
+ lsm__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 1f6ccdaed1ac..781c8d11604b 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -55,31 +55,40 @@ void test_trampoline_count(void)
/* attach 'allowed' 40 trampoline programs */
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ obj = NULL;
goto cleanup;
+ }
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
inst[i].obj = obj;
+ obj = NULL;
if (rand() % 2) {
- link = load(obj, fentry_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+ link = load(inst[i].obj, fentry_name);
+ if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ link = NULL;
goto cleanup;
+ }
inst[i].link_fentry = link;
} else {
- link = load(obj, fexit_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+ link = load(inst[i].obj, fexit_name);
+ if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ link = NULL;
goto cleanup;
+ }
inst[i].link_fexit = link;
}
}
/* and try 1 extra.. */
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ obj = NULL;
goto cleanup;
+ }
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
@@ -104,7 +113,9 @@ void test_trampoline_count(void)
cleanup_extra:
bpf_object__close(obj);
cleanup:
- while (--i) {
+ if (i >= MAX_TRAMP_PROGS)
+ i = MAX_TRAMP_PROGS - 1;
+ for (; i >= 0; i--) {
bpf_link__destroy(inst[i].link_fentry);
bpf_link__destroy(inst[i].link_fexit);
bpf_object__close(inst[i].obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
new file mode 100644
index 000000000000..72310cfc6474
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_vmlinux.skel.h"
+
+#define MY_TV_NSEC 1337
+
+static void nsleep()
+{
+ struct timespec ts = { .tv_nsec = MY_TV_NSEC };
+
+ (void)syscall(__NR_nanosleep, &ts, NULL);
+}
+
+void test_vmlinux(void)
+{
+ int duration = 0, err;
+ struct test_vmlinux* skel;
+ struct test_vmlinux__bss *bss;
+
+ skel = test_vmlinux__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+
+ err = test_vmlinux__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger everything */
+ nsleep();
+
+ CHECK(!bss->tp_called, "tp", "not called\n");
+ CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
+ CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
+ CHECK(!bss->kprobe_called, "kprobe", "not called\n");
+ CHECK(!bss->fentry_called, "fentry", "not called\n");
+
+cleanup:
+ test_vmlinux__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
new file mode 100644
index 000000000000..05b294d6b923
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#define IFINDEX_LO 1
+#define XDP_FLAGS_REPLACE (1U << 4)
+
+void test_xdp_attach(void)
+{
+ struct bpf_object *obj1, *obj2, *obj3;
+ const char *file = "./test_xdp.o";
+ int err, fd1, fd2, fd3;
+ __u32 duration = 0;
+ DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
+ .old_fd = -1);
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
+ if (CHECK_FAIL(err))
+ return;
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
+ if (CHECK_FAIL(err))
+ goto out_1;
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
+ if (CHECK_FAIL(err))
+ goto out_2;
+
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
+ &opts);
+ if (CHECK(err, "load_ok", "initial load failed"))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
+ &opts);
+ if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
+ goto out;
+
+ opts.old_fd = fd1;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
+ if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
+ goto out;
+
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
+ if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
+ goto out;
+
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
+ goto out;
+
+ opts.old_fd = fd2;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
+ goto out;
+
+out:
+ bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+out_close:
+ bpf_object__close(obj3);
+out_2:
+ bpf_object__close(obj2);
+out_1:
+ bpf_object__close(obj1);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 6b56bdc73ebc..a0f688c37023 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -4,17 +4,51 @@
#include "test_xdp.skel.h"
#include "test_xdp_bpf2bpf.skel.h"
+struct meta {
+ int ifindex;
+ int pkt_len;
+};
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+ int duration = 0;
+ struct meta *meta = (struct meta *)data;
+ struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+
+ if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
+ "check_size", "size %u < %zu\n",
+ size, sizeof(pkt_v4) + sizeof(*meta)))
+ return;
+
+ if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
+ "meta->ifindex = %d\n", meta->ifindex))
+ return;
+
+ if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
+ "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+ return;
+
+ if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
+ "check_packet_content", "content not the same\n"))
+ return;
+
+ *(bool *)ctx = true;
+}
+
void test_xdp_bpf2bpf(void)
{
__u32 duration = 0, retval, size;
char buf[128];
int err, pkt_fd, map_fd;
+ bool passed = false;
struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
struct iptnl_info value4 = {.family = AF_INET};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct bpf_program *prog;
+ struct perf_buffer *pb = NULL;
+ struct perf_buffer_opts pb_opts = {};
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
@@ -27,11 +61,21 @@ void test_xdp_bpf2bpf(void)
bpf_map_update_elem(map_fd, &key4, &value4, 0);
/* Load trace program */
- opts.attach_prog_fd = pkt_fd,
- ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts);
+ ftrace_skel = test_xdp_bpf2bpf__open();
if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
goto out;
+ /* Demonstrate the bpf_program__set_attach_target() API rather than
+ * the load with options, i.e. opts.attach_prog_fd.
+ */
+ prog = ftrace_skel->progs.trace_on_entry;
+ bpf_program__set_expected_attach_type(prog, BPF_TRACE_FENTRY);
+ bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
+
+ prog = ftrace_skel->progs.trace_on_exit;
+ bpf_program__set_expected_attach_type(prog, BPF_TRACE_FEXIT);
+ bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
+
err = test_xdp_bpf2bpf__load(ftrace_skel);
if (CHECK(err, "__load", "ftrace skeleton failed\n"))
goto out;
@@ -40,6 +84,14 @@ void test_xdp_bpf2bpf(void)
if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
goto out;
+ /* Set up perf buffer */
+ pb_opts.sample_cb = on_sample;
+ pb_opts.ctx = &passed;
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
+ 1, &pb_opts);
+ if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ goto out;
+
/* Run test program */
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
@@ -50,6 +102,15 @@ void test_xdp_bpf2bpf(void)
err, errno, retval, size))
goto out;
+ /* Make sure bpf_xdp_output() was triggered and it sent the expected
+ * data to the perf ring buffer.
+ */
+ err = perf_buffer__poll(pb, 100);
+ if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+ goto out;
+
+ CHECK_FAIL(!passed);
+
/* Verify test results */
if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
"result", "fentry failed err %llu\n",
@@ -60,6 +121,8 @@ void test_xdp_bpf2bpf(void)
"fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
out:
+ if (pb)
+ perf_buffer__free(pb);
test_xdp__destroy(pkt_skel);
test_xdp_bpf2bpf__destroy(ftrace_skel);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index b631fb5032d2..3fb4260570b1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -6,14 +6,24 @@
* the kernel BPF logic.
*/
+#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
+int stg_result = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_stg_map SEC(".maps");
+
#define DCTCP_MAX_ALPHA 1024U
struct dctcp {
@@ -43,12 +53,18 @@ void BPF_PROG(dctcp_init, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk);
+ int *stg;
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->loss_cwnd = 0;
ca->ce_state = 0;
+ stg = bpf_sk_storage_get(&sk_stg_map, (void *)tp, NULL, 0);
+ if (stg) {
+ stg_result = *stg;
+ bpf_sk_storage_delete(&sk_stg_map, (void *)tp);
+ }
dctcp_reset(tp, ca);
}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index d4a02fe44a12..31975c96e2c9 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -13,7 +13,7 @@ enum e1 {
enum e2 {
C = 100,
- D = -100,
+ D = 4294967295,
E = 0,
};
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 38d3a82144ca..9365b686f84b 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index c329fccf9842..98e1efe14549 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
struct sk_buff {
unsigned int len;
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
index 92f3fa47cf40..85c0b516d6ee 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
struct sk_buff {
unsigned int len;
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index 348109b9ea07..bd1e17d8024c 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index 8f48a909f079..a46a264ce24e 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -4,7 +4,7 @@
#include <stdbool.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
new file mode 100644
index 000000000000..a4e3c223028d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+int monitored_pid = 0;
+int mprotect_count = 0;
+int bprm_count = 0;
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
+ unsigned long reqprot, unsigned long prot, int ret)
+{
+ if (ret != 0)
+ return ret;
+
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ int is_heap = 0;
+
+ is_heap = (vma->vm_start >= vma->vm_mm->start_brk &&
+ vma->vm_end <= vma->vm_mm->brk);
+
+ if (is_heap && monitored_pid == pid) {
+ mprotect_count++;
+ ret = -EPERM;
+ }
+
+ return ret;
+}
+
+SEC("lsm/bprm_committed_creds")
+int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (monitored_pid == pid)
+ bprm_count++;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c
new file mode 100644
index 000000000000..8b7466a15c6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/modify_return.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int sequence = 0;
+__s32 input_retval = 0;
+
+__u64 fentry_result = 0;
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(fentry_test, int a, __u64 b)
+{
+ sequence++;
+ fentry_result = (sequence == 1);
+ return 0;
+}
+
+__u64 fmod_ret_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
+{
+ sequence++;
+ /* This is the first fmod_ret program, the ret passed should be 0 */
+ fmod_ret_result = (sequence == 2 && ret == 0);
+ return input_retval;
+}
+
+__u64 fexit_result = 0;
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int a, __u64 b, int ret)
+{
+ sequence++;
+ /* If the input_reval is non-zero a successful modification should have
+ * occurred.
+ */
+ if (input_retval)
+ fexit_result = (sequence == 3 && ret == input_retval);
+ else
+ fexit_result = (sequence == 3 && ret == 4);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index a5c6d5903b22..ca283af80d4e 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -12,7 +12,6 @@ int bpf_prog1(struct __sk_buff *skb)
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
__u8 *d = data;
- __u32 len = (__u32) data_end - (__u32) data;
int err;
if (data + 10 > data_end) {
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index dd8fae6660ab..8056a4c6d918 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -4,6 +4,7 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
int kprobe_res = 0;
int kretprobe_res = 0;
@@ -18,7 +19,7 @@ int handle_kprobe(struct pt_regs *ctx)
}
SEC("kretprobe/sys_nanosleep")
-int handle_kretprobe(struct pt_regs *ctx)
+int BPF_KRETPROBE(handle_kretprobe)
{
kretprobe_res = 2;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
new file mode 100644
index 000000000000..77e47b9e4446
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int calls = 0;
+int alt_calls = 0;
+
+SEC("cgroup_skb/egress1")
+int egress(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&calls, 1);
+ return 1;
+}
+
+SEC("cgroup_skb/egress2")
+int egress_alt(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&alt_calls, 1);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
+
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c
new file mode 100644
index 000000000000..8941a41c2a55
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_STACK_RAWTP 10
+
+SEC("raw_tracepoint/sys_enter")
+int bpf_prog2(void *ctx)
+{
+ __u64 stack[MAX_STACK_RAWTP];
+ int error;
+
+ /* set all the flags which should return -EINVAL */
+ error = bpf_get_stack(ctx, stack, 0, -1);
+ if (error < 0)
+ goto loop;
+
+ return error;
+loop:
+ while (1) {
+ error++;
+ }
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index dd7a4d3dbc0d..1319be1c54ba 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -68,7 +68,7 @@ static struct foo struct3 = {
bpf_map_update_elem(&result_##map, &key, var, 0); \
} while (0)
-SEC("static_data_load")
+SEC("classifier/static_data_load")
int load_static_data(struct __sk_buff *skb)
{
static const __u64 bar = ~0;
diff --git a/tools/testing/selftests/bpf/progs/test_link_pinning.c b/tools/testing/selftests/bpf/progs/test_link_pinning.c
new file mode 100644
index 000000000000..bbf2a5264dc0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_link_pinning.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int in = 0;
+int out = 0;
+
+SEC("raw_tp/sys_enter")
+int raw_tp_prog(const void *ctx)
+{
+ out = in;
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int tp_btf_prog(const void *ctx)
+{
+ out = in;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
new file mode 100644
index 000000000000..1dca70a6de2f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Carlos Neira cneirabustos@gmail.com */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+static volatile struct {
+ __u64 dev;
+ __u64 ino;
+ __u64 pid_tgid;
+ __u64 user_pid_tgid;
+} res;
+
+SEC("raw_tracepoint/sys_enter")
+int trace(void *ctx)
+{
+ __u64 ns_pid_tgid, expected_pid;
+ struct bpf_pidns_info nsdata;
+ __u32 key = 0;
+
+ if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata,
+ sizeof(struct bpf_pidns_info)))
+ return 0;
+
+ ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid;
+ expected_pid = res.user_pid_tgid;
+
+ if (expected_pid != ns_pid_tgid)
+ return 0;
+
+ res.pid_tgid = ns_pid_tgid;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index bfe9fbcb9684..56a50b25cd33 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -6,7 +6,6 @@
#include <linux/ptrace.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bpf_trace_helpers.h"
struct task_struct;
@@ -17,11 +16,9 @@ int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec)
}
SEC("kretprobe/__set_task_comm")
-int BPF_KRETPROBE(prog2,
- struct task_struct *tsk, const char *buf, bool exec,
- int ret)
+int BPF_KRETPROBE(prog2, int ret)
{
- return !PT_REGS_PARM1(ctx) && ret;
+ return ret;
}
SEC("raw_tp/task_rename")
diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c
new file mode 100644
index 000000000000..a1ccc831c882
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stddef.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int valid = 0;
+int required_size_out = 0;
+int written_stack_out = 0;
+int written_global_out = 0;
+
+struct {
+ __u64 _a;
+ __u64 _b;
+ __u64 _c;
+} fpbe[30] = {0};
+
+SEC("perf_event")
+int perf_branches(void *ctx)
+{
+ __u64 entries[4 * 3] = {0};
+ int required_size, written_stack, written_global;
+
+ /* write to stack */
+ written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
+ /* ignore spurious events */
+ if (!written_stack)
+ return 1;
+
+ /* get required size */
+ required_size = bpf_read_branch_records(ctx, NULL, 0,
+ BPF_F_GET_BRANCH_RECORDS_SIZE);
+
+ written_global = bpf_read_branch_records(ctx, fpbe, sizeof(fpbe), 0);
+ /* ignore spurious events */
+ if (!written_global)
+ return 1;
+
+ required_size_out = required_size;
+ written_stack_out = written_stack;
+ written_global_out = written_global;
+ valid = 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index ebfcc9f50c35..ad59c4c9aba8 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -4,7 +4,7 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index d556b1572cc6..89b3532ccc75 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -7,7 +7,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bpf_trace_helpers.h"
static struct sockaddr_in old;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
new file mode 100644
index 000000000000..8f530843b4da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Cloudflare Ltd.
+// Copyright (c) 2020 Isovalent, Inc.
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
+static inline struct bpf_sock_tuple *
+get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct bpf_sock_tuple *result;
+ struct ethhdr *eth;
+ __u64 tuple_len;
+ __u8 proto = 0;
+ __u64 ihl_len;
+
+ eth = (struct ethhdr *)(data);
+ if (eth + 1 > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
+
+ if (iph + 1 > data_end)
+ return NULL;
+ if (iph->ihl != 5)
+ /* Options are not supported */
+ return NULL;
+ ihl_len = iph->ihl * 4;
+ proto = iph->protocol;
+ *ipv4 = true;
+ result = (struct bpf_sock_tuple *)&iph->saddr;
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
+
+ if (ip6h + 1 > data_end)
+ return NULL;
+ ihl_len = sizeof(*ip6h);
+ proto = ip6h->nexthdr;
+ *ipv4 = false;
+ result = (struct bpf_sock_tuple *)&ip6h->saddr;
+ } else {
+ return (struct bpf_sock_tuple *)data;
+ }
+
+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
+ return NULL;
+
+ *tcp = (proto == IPPROTO_TCP);
+ return result;
+}
+
+static inline int
+handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
+{
+ struct bpf_sock_tuple ln = {0};
+ struct bpf_sock *sk;
+ size_t tuple_len;
+ int ret;
+
+ tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+ if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
+ return TC_ACT_SHOT;
+
+ sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+ if (sk)
+ goto assign;
+
+ if (ipv4) {
+ if (tuple->ipv4.dport != bpf_htons(4321))
+ return TC_ACT_OK;
+
+ ln.ipv4.daddr = bpf_htonl(0x7f000001);
+ ln.ipv4.dport = bpf_htons(1234);
+
+ sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4),
+ BPF_F_CURRENT_NETNS, 0);
+ } else {
+ if (tuple->ipv6.dport != bpf_htons(4321))
+ return TC_ACT_OK;
+
+ /* Upper parts of daddr are already zero. */
+ ln.ipv6.daddr[3] = bpf_htonl(0x1);
+ ln.ipv6.dport = bpf_htons(1234);
+
+ sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6),
+ BPF_F_CURRENT_NETNS, 0);
+ }
+
+ /* workaround: We can't do a single socket lookup here, because then
+ * the compiler will likely spill tuple_len to the stack. This makes it
+ * lose all bounds information in the verifier, which then rejects the
+ * call as unsafe.
+ */
+ if (!sk)
+ return TC_ACT_SHOT;
+
+assign:
+ ret = bpf_sk_assign(skb, sk, 0);
+ bpf_sk_release(sk);
+ return ret;
+}
+
+static inline int
+handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
+{
+ struct bpf_sock_tuple ln = {0};
+ struct bpf_sock *sk;
+ size_t tuple_len;
+ int ret;
+
+ tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+ if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
+ return TC_ACT_SHOT;
+
+ sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+ if (sk) {
+ if (sk->state != BPF_TCP_LISTEN)
+ goto assign;
+ bpf_sk_release(sk);
+ }
+
+ if (ipv4) {
+ if (tuple->ipv4.dport != bpf_htons(4321))
+ return TC_ACT_OK;
+
+ ln.ipv4.daddr = bpf_htonl(0x7f000001);
+ ln.ipv4.dport = bpf_htons(1234);
+
+ sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4),
+ BPF_F_CURRENT_NETNS, 0);
+ } else {
+ if (tuple->ipv6.dport != bpf_htons(4321))
+ return TC_ACT_OK;
+
+ /* Upper parts of daddr are already zero. */
+ ln.ipv6.daddr[3] = bpf_htonl(0x1);
+ ln.ipv6.dport = bpf_htons(1234);
+
+ sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6),
+ BPF_F_CURRENT_NETNS, 0);
+ }
+
+ /* workaround: We can't do a single socket lookup here, because then
+ * the compiler will likely spill tuple_len to the stack. This makes it
+ * lose all bounds information in the verifier, which then rejects the
+ * call as unsafe.
+ */
+ if (!sk)
+ return TC_ACT_SHOT;
+
+ if (sk->state != BPF_TCP_LISTEN) {
+ bpf_sk_release(sk);
+ return TC_ACT_SHOT;
+ }
+
+assign:
+ ret = bpf_sk_assign(skb, sk, 0);
+ bpf_sk_release(sk);
+ return ret;
+}
+
+SEC("classifier/sk_assign_test")
+int bpf_sk_assign_test(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple *tuple, ln = {0};
+ bool ipv4 = false;
+ bool tcp = false;
+ int tuple_len;
+ int ret = 0;
+
+ tuple = get_tuple(skb, &ipv4, &tcp);
+ if (!tuple)
+ return TC_ACT_SHOT;
+
+ /* Note that the verifier socket return type for bpf_skc_lookup_tcp()
+ * differs from bpf_sk_lookup_udp(), so even though the C-level type is
+ * the same here, if we try to share the implementations they will
+ * fail to verify because we're crossing pointer types.
+ */
+ if (tcp)
+ ret = handle_tcp(skb, tuple, ipv4);
+ else
+ ret = handle_udp(skb, tuple, ipv4);
+
+ return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index 202de3938494..b02ea589ce7e 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -23,6 +23,8 @@ int process(struct __sk_buff *skb)
return 1;
if (skb->gso_segs != 8)
return 1;
+ if (skb->gso_size != 10)
+ return 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
new file mode 100644
index 000000000000..a3a366c57ce1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+
+#include <errno.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, unsigned int);
+} verdict_map SEC(".maps");
+
+static volatile bool test_sockmap; /* toggled by user-space */
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ unsigned int *count;
+ __u32 zero = 0;
+ int verdict;
+
+ if (test_sockmap)
+ verdict = bpf_sk_redirect_map(skb, &sock_map, zero, 0);
+ else
+ verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero, 0);
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+ unsigned int *count;
+ __u32 zero = 0;
+ int verdict;
+
+ if (test_sockmap)
+ verdict = bpf_msg_redirect_map(msg, &sock_map, zero, 0);
+ else
+ verdict = bpf_msg_redirect_hash(msg, &sock_hash, &zero, 0);
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
+SEC("sk_reuseport")
+int prog_reuseport(struct sk_reuseport_md *reuse)
+{
+ unsigned int *count;
+ int err, verdict;
+ __u32 zero = 0;
+
+ if (test_sockmap)
+ err = bpf_sk_select_reuseport(reuse, &sock_map, &zero, 0);
+ else
+ err = bpf_sk_select_reuseport(reuse, &sock_hash, &zero, 0);
+ verdict = err ? SK_DROP : SK_PASS;
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
index e51e6e3a81c2..f030e469d05b 100644
--- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -2,7 +2,8 @@
#include <stdbool.h>
#include <stddef.h>
#include <linux/bpf.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
struct task_struct;
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
new file mode 100644
index 000000000000..5611b564d3b1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <asm/unistd.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MY_TV_NSEC 1337
+
+bool tp_called = false;
+bool raw_tp_called = false;
+bool tp_btf_called = false;
+bool kprobe_called = false;
+bool fentry_called = false;
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle__tp(struct trace_event_raw_sys_enter *args)
+{
+ struct __kernel_timespec *ts;
+
+ if (args->id != __NR_nanosleep)
+ return 0;
+
+ ts = (void *)args->args[0];
+ if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ return 0;
+
+ tp_called = true;
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
+{
+ struct __kernel_timespec *ts;
+
+ if (id != __NR_nanosleep)
+ return 0;
+
+ ts = (void *)PT_REGS_PARM1_CORE(regs);
+ if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ return 0;
+
+ raw_tp_called = true;
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
+{
+ struct __kernel_timespec *ts;
+
+ if (id != __NR_nanosleep)
+ return 0;
+
+ ts = (void *)PT_REGS_PARM1_CORE(regs);
+ if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ return 0;
+
+ tp_btf_called = true;
+ return 0;
+}
+
+SEC("kprobe/hrtimer_nanosleep")
+int BPF_KPROBE(handle__kprobe,
+ ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+{
+ if (rqtp == MY_TV_NSEC)
+ kprobe_called = true;
+ return 0;
+}
+
+SEC("fentry/hrtimer_nanosleep")
+int BPF_PROG(handle__fentry,
+ ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+{
+ if (rqtp == MY_TV_NSEC)
+ fentry_called = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index cb8a04ab7a78..a038e827f850 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
+#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+
+char _license[] SEC("license") = "GPL";
struct net_device {
/* Structure does not need to contain all entries,
@@ -27,16 +29,38 @@ struct xdp_buff {
struct xdp_rxq_info *rxq;
} __attribute__((preserve_access_index));
+struct meta {
+ int ifindex;
+ int pkt_len;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
__u64 test_result_fentry = 0;
-SEC("fentry/_xdp_tx_iptunnel")
+SEC("fentry/FUNC")
int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
{
+ struct meta meta;
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ meta.ifindex = xdp->rxq->dev->ifindex;
+ meta.pkt_len = data_end - data;
+ bpf_xdp_output(xdp, &perf_buf_map,
+ ((__u64) meta.pkt_len << 32) |
+ BPF_F_CURRENT_CPU,
+ &meta, sizeof(meta));
+
test_result_fentry = xdp->rxq->dev->ifindex;
return 0;
}
__u64 test_result_fexit = 0;
-SEC("fexit/_xdp_tx_iptunnel")
+SEC("fexit/FUNC")
int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret)
{
test_result_fexit = ret;
diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py
new file mode 100644
index 000000000000..4fed2dc25c0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool.py
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2020 SUSE LLC.
+
+import collections
+import functools
+import json
+import os
+import socket
+import subprocess
+import unittest
+
+
+# Add the source tree of bpftool and /usr/local/sbin to PATH
+cur_dir = os.path.dirname(os.path.realpath(__file__))
+bpftool_dir = os.path.abspath(os.path.join(cur_dir, "..", "..", "..", "..",
+ "tools", "bpf", "bpftool"))
+os.environ["PATH"] = bpftool_dir + ":/usr/local/sbin:" + os.environ["PATH"]
+
+
+class IfaceNotFoundError(Exception):
+ pass
+
+
+class UnprivilegedUserError(Exception):
+ pass
+
+
+def _bpftool(args, json=True):
+ _args = ["bpftool"]
+ if json:
+ _args.append("-j")
+ _args.extend(args)
+
+ return subprocess.check_output(_args)
+
+
+def bpftool(args):
+ return _bpftool(args, json=False).decode("utf-8")
+
+
+def bpftool_json(args):
+ res = _bpftool(args)
+ return json.loads(res)
+
+
+def get_default_iface():
+ for iface in socket.if_nameindex():
+ if iface[1] != "lo":
+ return iface[1]
+ raise IfaceNotFoundError("Could not find any network interface to probe")
+
+
+def default_iface(f):
+ @functools.wraps(f)
+ def wrapper(*args, **kwargs):
+ iface = get_default_iface()
+ return f(*args, iface, **kwargs)
+ return wrapper
+
+
+class TestBpftool(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ if os.getuid() != 0:
+ raise UnprivilegedUserError(
+ "This test suite needs root privileges")
+
+ @default_iface
+ def test_feature_dev_json(self, iface):
+ unexpected_helpers = [
+ "bpf_probe_write_user",
+ "bpf_trace_printk",
+ ]
+ expected_keys = [
+ "syscall_config",
+ "program_types",
+ "map_types",
+ "helpers",
+ "misc",
+ ]
+
+ res = bpftool_json(["feature", "probe", "dev", iface])
+ # Check if the result has all expected keys.
+ self.assertCountEqual(res.keys(), expected_keys)
+ # Check if unexpected helpers are not included in helpers probes
+ # result.
+ for helpers in res["helpers"].values():
+ for unexpected_helper in unexpected_helpers:
+ self.assertNotIn(unexpected_helper, helpers)
+
+ def test_feature_kernel(self):
+ test_cases = [
+ bpftool_json(["feature", "probe", "kernel"]),
+ bpftool_json(["feature", "probe"]),
+ bpftool_json(["feature"]),
+ ]
+ unexpected_helpers = [
+ "bpf_probe_write_user",
+ "bpf_trace_printk",
+ ]
+ expected_keys = [
+ "syscall_config",
+ "system_config",
+ "program_types",
+ "map_types",
+ "helpers",
+ "misc",
+ ]
+
+ for tc in test_cases:
+ # Check if the result has all expected keys.
+ self.assertCountEqual(tc.keys(), expected_keys)
+ # Check if unexpected helpers are not included in helpers probes
+ # result.
+ for helpers in tc["helpers"].values():
+ for unexpected_helper in unexpected_helpers:
+ self.assertNotIn(unexpected_helper, helpers)
+
+ def test_feature_kernel_full(self):
+ test_cases = [
+ bpftool_json(["feature", "probe", "kernel", "full"]),
+ bpftool_json(["feature", "probe", "full"]),
+ ]
+ expected_helpers = [
+ "bpf_probe_write_user",
+ "bpf_trace_printk",
+ ]
+
+ for tc in test_cases:
+ # Check if expected helpers are included at least once in any
+ # helpers list for any program type. Unfortunately we cannot assume
+ # that they will be included in all program types or a specific
+ # subset of programs. It depends on the kernel version and
+ # configuration.
+ found_helpers = False
+
+ for helpers in tc["helpers"].values():
+ if all(expected_helper in helpers
+ for expected_helper in expected_helpers):
+ found_helpers = True
+ break
+
+ self.assertTrue(found_helpers)
+
+ def test_feature_kernel_full_vs_not_full(self):
+ full_res = bpftool_json(["feature", "probe", "full"])
+ not_full_res = bpftool_json(["feature", "probe"])
+ not_full_set = set()
+ full_set = set()
+
+ for helpers in full_res["helpers"].values():
+ for helper in helpers:
+ full_set.add(helper)
+
+ for helpers in not_full_res["helpers"].values():
+ for helper in helpers:
+ not_full_set.add(helper)
+
+ self.assertCountEqual(full_set - not_full_set,
+ {"bpf_probe_write_user", "bpf_trace_printk"})
+ self.assertCountEqual(not_full_set - full_set, set())
+
+ def test_feature_macros(self):
+ expected_patterns = [
+ r"/\*\*\* System call availability \*\*\*/",
+ r"#define HAVE_BPF_SYSCALL",
+ r"/\*\*\* eBPF program types \*\*\*/",
+ r"#define HAVE.*PROG_TYPE",
+ r"/\*\*\* eBPF map types \*\*\*/",
+ r"#define HAVE.*MAP_TYPE",
+ r"/\*\*\* eBPF helper functions \*\*\*/",
+ r"#define HAVE.*HELPER",
+ r"/\*\*\* eBPF misc features \*\*\*/",
+ ]
+
+ res = bpftool(["feature", "probe", "macros"])
+ for pattern in expected_patterns:
+ self.assertRegex(res, pattern)
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
new file mode 100755
index 000000000000..66690778e36d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2020 SUSE LLC.
+
+python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
new file mode 100644
index 000000000000..ed253f252cd0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
+#define _GNU_SOURCE
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sched.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include "test_progs.h"
+
+#define CHECK_NEWNS(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s:FAIL:%s ", __func__, tag); \
+ printf(format); \
+ } else { \
+ printf("%s:PASS:%s\n", __func__, tag); \
+ } \
+ __ret; \
+})
+
+struct bss {
+ __u64 dev;
+ __u64 ino;
+ __u64 pid_tgid;
+ __u64 user_pid_tgid;
+};
+
+int main(int argc, char **argv)
+{
+ pid_t pid;
+ int exit_code = 1;
+ struct stat st;
+
+ printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n");
+
+ if (stat("/proc/self/ns/pid", &st)) {
+ perror("stat failed on /proc/self/ns/pid ns\n");
+ printf("%s:FAILED\n", argv[0]);
+ return exit_code;
+ }
+
+ if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS),
+ "unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno))
+ return exit_code;
+
+ pid = fork();
+ if (pid == -1) {
+ perror("Fork() failed\n");
+ printf("%s:FAILED\n", argv[0]);
+ return exit_code;
+ }
+
+ if (pid > 0) {
+ int status;
+
+ usleep(5);
+ waitpid(pid, &status, 0);
+ return 0;
+ } else {
+
+ pid = fork();
+ if (pid == -1) {
+ perror("Fork() failed\n");
+ printf("%s:FAILED\n", argv[0]);
+ return exit_code;
+ }
+
+ if (pid > 0) {
+ int status;
+ waitpid(pid, &status, 0);
+ return 0;
+ } else {
+ if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL),
+ "Unmounting proc", "Cannot umount proc! errno=%d\n", errno))
+ return exit_code;
+
+ if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL),
+ "Mounting proc", "Cannot mount proc! errno=%d\n", errno))
+ return exit_code;
+
+ const char *probe_name = "raw_tracepoint/sys_enter";
+ const char *file = "test_ns_current_pid_tgid.o";
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_map *bss_map;
+ struct bpf_object *obj;
+ int exit_code = 1;
+ int err, key = 0;
+ struct bss bss;
+ struct stat st;
+ __u64 id;
+
+ obj = bpf_object__open_file(file, NULL);
+ if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ return exit_code;
+
+ err = bpf_object__load(obj);
+ if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
+ if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_title(obj, probe_name);
+ if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n",
+ probe_name))
+ goto cleanup;
+
+ memset(&bss, 0, sizeof(bss));
+ pid_t tid = syscall(SYS_gettid);
+ pid_t pid = getpid();
+
+ id = (__u64) tid << 32 | pid;
+ bss.user_pid_tgid = id;
+
+ if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st),
+ "stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno))
+ goto cleanup;
+
+ bss.dev = st.st_dev;
+ bss.ino = st.st_ino;
+
+ err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
+ if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err))
+ goto cleanup;
+
+ link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+ if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n",
+ PTR_ERR(link))) {
+ link = NULL;
+ goto cleanup;
+ }
+
+ /* trigger some syscalls */
+ usleep(1);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
+ if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err))
+ goto cleanup;
+
+ if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
+ "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
+ goto cleanup;
+
+ exit_code = 0;
+ printf("%s:PASS\n", argv[0]);
+cleanup:
+ if (!link) {
+ bpf_link__destroy(link);
+ link = NULL;
+ }
+ bpf_object__close(obj);
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 02eae1e864c2..c6766b2cff85 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -756,11 +756,7 @@ static void test_sockmap(unsigned int tasks, void *data)
/* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
- if (i < 2 && !err) {
- printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
- i, sfd[i]);
- goto out_sockmap;
- } else if (i >= 2 && err) {
+ if (err) {
printf("Failed noprog update sockmap '%i:%i'\n",
i, sfd[i]);
goto out_sockmap;
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index bab1e6f1d8f1..b521e0a512b6 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1,11 +1,16 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2017 Facebook
*/
+#define _GNU_SOURCE
#include "test_progs.h"
#include "cgroup_helpers.h"
#include "bpf_rlimit.h"
#include <argp.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
#include <string.h>
+#include <execinfo.h> /* backtrace */
/* defined in test_progs.h */
struct test_env env = {};
@@ -27,6 +32,20 @@ struct prog_test_def {
int old_error_cnt;
};
+/* Override C runtime library's usleep() implementation to ensure nanosleep()
+ * is always called. Usleep is frequently used in selftests as a way to
+ * trigger kprobe and tracepoints.
+ */
+int usleep(useconds_t usec)
+{
+ struct timespec ts = {
+ .tv_sec = usec / 1000000,
+ .tv_nsec = (usec % 1000000) * 1000,
+ };
+
+ return syscall(__NR_nanosleep, &ts, NULL);
+}
+
static bool should_run(struct test_selector *sel, int num, const char *name)
{
int i;
@@ -74,6 +93,34 @@ static void skip_account(void)
}
}
+static void stdio_restore(void);
+
+/* A bunch of tests set custom affinity per-thread and/or per-process. Reset
+ * it after each test/sub-test.
+ */
+static void reset_affinity() {
+
+ cpu_set_t cpuset;
+ int i, err;
+
+ CPU_ZERO(&cpuset);
+ for (i = 0; i < env.nr_cpus; i++)
+ CPU_SET(i, &cpuset);
+
+ err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ if (err < 0) {
+ stdio_restore();
+ fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
+ exit(-1);
+ }
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+ if (err < 0) {
+ stdio_restore();
+ fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
+ exit(-1);
+ }
+}
+
void test__end_subtest()
{
struct prog_test_def *test = env.test;
@@ -91,6 +138,8 @@ void test__end_subtest()
test->test_num, test->subtest_num,
test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
+ reset_affinity();
+
free(test->subtest_name);
test->subtest_name = NULL;
}
@@ -196,7 +245,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
map = bpf_object__find_map_by_name(obj, name);
if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
+ fprintf(stdout, "%s:FAIL:map '%s' not found\n", test, name);
test__fail();
return -1;
}
@@ -367,7 +416,7 @@ static int libbpf_print_fn(enum libbpf_print_level level,
{
if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG)
return 0;
- vprintf(format, args);
+ vfprintf(stdout, format, args);
return 0;
}
@@ -408,7 +457,7 @@ err:
int parse_num_list(const char *s, struct test_selector *sel)
{
- int i, set_len = 0, num, start = 0, end = -1;
+ int i, set_len = 0, new_len, num, start = 0, end = -1;
bool *set = NULL, *tmp, parsing_end = false;
char *next;
@@ -443,18 +492,19 @@ int parse_num_list(const char *s, struct test_selector *sel)
return -EINVAL;
if (end + 1 > set_len) {
- set_len = end + 1;
- tmp = realloc(set, set_len);
+ new_len = end + 1;
+ tmp = realloc(set, new_len);
if (!tmp) {
free(set);
return -ENOMEM;
}
+ for (i = set_len; i < start; i++)
+ tmp[i] = false;
set = tmp;
+ set_len = new_len;
}
- for (i = start; i <= end; i++) {
+ for (i = start; i <= end; i++)
set[i] = true;
- }
-
}
if (!set)
@@ -613,10 +663,27 @@ int cd_flavor_subdir(const char *exec_name)
if (!flavor)
return 0;
flavor++;
- printf("Switching to flavor '%s' subdirectory...\n", flavor);
+ fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
return chdir(flavor);
}
+#define MAX_BACKTRACE_SZ 128
+void crash_handler(int signum)
+{
+ void *bt[MAX_BACKTRACE_SZ];
+ size_t sz;
+
+ sz = backtrace(bt, ARRAY_SIZE(bt));
+
+ if (env.test)
+ dump_test_log(env.test, true);
+ if (env.stdout)
+ stdio_restore();
+
+ fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
+ backtrace_symbols_fd(bt, sz, STDERR_FILENO);
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -624,8 +691,14 @@ int main(int argc, char **argv)
.parser = parse_arg,
.doc = argp_program_doc,
};
+ struct sigaction sigact = {
+ .sa_handler = crash_handler,
+ .sa_flags = SA_RESETHAND,
+ };
int err, i;
+ sigaction(SIGSEGV, &sigact, NULL);
+
err = argp_parse(&argp, argc, argv, 0, NULL, &env);
if (err)
return err;
@@ -639,6 +712,12 @@ int main(int argc, char **argv)
srand(time(NULL));
env.jit_enabled = is_jit_enabled();
+ env.nr_cpus = libbpf_num_possible_cpus();
+ if (env.nr_cpus < 0) {
+ fprintf(stderr, "Failed to get number of CPUs: %d!\n",
+ env.nr_cpus);
+ return -1;
+ }
stdio_hijack();
for (i = 0; i < prog_test_cnt; i++) {
@@ -669,12 +748,13 @@ int main(int argc, char **argv)
test->test_num, test->test_name,
test->error_cnt ? "FAIL" : "OK");
+ reset_affinity();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
}
stdio_restore();
- printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
- env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+ fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+ env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
free(env.test_selector.blacklist.strs);
free(env.test_selector.whitelist.strs);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index bcfa9ef23fda..f4aff6b8284b 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -71,6 +71,7 @@ struct test_env {
FILE *stderr;
char *log_buf;
size_t log_cnt;
+ int nr_cpus;
int succ_cnt; /* successful tests */
int sub_succ_cnt; /* successful sub-tests */
@@ -109,10 +110,10 @@ extern struct ipv6_packet pkt_v6;
int __save_errno = errno; \
if (__ret) { \
test__fail(); \
- printf("%s:FAIL:%s ", __func__, tag); \
- printf(format); \
+ fprintf(stdout, "%s:FAIL:%s ", __func__, tag); \
+ fprintf(stdout, ##format); \
} else { \
- printf("%s:PASS:%s %d nsec\n", \
+ fprintf(stdout, "%s:PASS:%s %d nsec\n", \
__func__, tag, duration); \
} \
errno = __save_errno; \
@@ -124,7 +125,7 @@ extern struct ipv6_packet pkt_v6;
int __save_errno = errno; \
if (__ret) { \
test__fail(); \
- printf("%s:FAIL:%d\n", __func__, __LINE__); \
+ fprintf(stdout, "%s:FAIL:%d\n", __func__, __LINE__); \
} \
errno = __save_errno; \
__ret; \
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 7f989b3e4e22..4d0e913bbb22 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -4,12 +4,15 @@
#include <string.h>
#include <assert.h>
#include <errno.h>
+#include <fcntl.h>
#include <poll.h>
#include <unistd.h>
#include <linux/perf_event.h>
#include <sys/mman.h>
#include "trace_helpers.h"
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
#define MAX_SYMS 300000
static struct ksym syms[MAX_SYMS];
static int sym_cnt;
@@ -86,3 +89,23 @@ long ksym_get_addr(const char *name)
return 0;
}
+
+void read_trace_pipe(void)
+{
+ int trace_fd;
+
+ trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+ if (trace_fd < 0)
+ return;
+
+ while (1) {
+ static char buf[4096];
+ ssize_t sz;
+
+ sz = read(trace_fd, buf, sizeof(buf) - 1);
+ if (sz > 0) {
+ buf[sz] = 0;
+ puts(buf);
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 0383c9b8adc1..25ef597dd03f 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -12,5 +12,6 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
+void read_trace_pipe(void);
#endif
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index d55f476f2237..4d0d09574bf4 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -257,17 +257,15 @@
* [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
*/
BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* no-op or OOB pointer computation */
+ /* error on OOB pointer computation */
BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* potentially OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
/* exit */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
/* not actually fully unbounded, but the bound is very high */
- .errstr = "R0 unbounded memory access",
+ .errstr = "value 72057594021150720 makes map_value pointer be out of bounds",
.result = REJECT
},
{
@@ -299,17 +297,15 @@
* [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
*/
BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* no-op or OOB pointer computation */
+ /* error on OOB pointer computation */
BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* potentially OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
/* exit */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
/* not actually fully unbounded, but the bound is very high */
- .errstr = "R0 unbounded memory access",
+ .errstr = "value 72057594021150720 makes map_value pointer be out of bounds",
.result = REJECT
},
{
@@ -411,16 +407,14 @@
BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31),
/* r1 = 0xffff'fffe (NOT 0!) */
BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2),
- /* computes OOB pointer */
+ /* error on computing OOB pointer */
BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
/* exit */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 invalid mem access",
+ .errstr = "math between map_value pointer and 4294967294 is not allowed",
.result = REJECT,
},
{
@@ -506,3 +500,42 @@
.errstr = "map_value pointer and 1000000000000",
.result = REJECT
},
+{
+ "bounds check mixed 32bit and 64bit arithmatic. test1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ /* r1 = 0xffffFFFF00000001 */
+ BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 1, 3),
+ /* check ALU64 op keeps 32bit bounds */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 2, 1),
+ BPF_JMP_A(1),
+ /* invalid ldx if bounds are lost above */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT
+},
+{
+ "bounds check mixed 32bit and 64bit arithmatic. test2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ /* r1 = 0xffffFFFF00000001 */
+ BPF_MOV64_IMM(BPF_REG_2, 3),
+ /* r1 = 0x2 */
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
+ /* check ALU32 op zero extends 64bit bounds */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 1),
+ BPF_JMP_A(1),
+ /* invalid ldx if bounds are lost above */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT
+},
diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
index f24d50f09dbe..69b048cf46d9 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
@@ -9,17 +9,17 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)),
+ BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)/2),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)),
+ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)/2),
BPF_MOV64_IMM(BPF_REG_4, 256),
BPF_EMIT_CALL(BPF_FUNC_get_stack),
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
- BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_1, BPF_REG_8, 16),
BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
@@ -29,7 +29,7 @@
BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)),
+ BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)/2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c
index 92762c08f5e3..93d6b1641481 100644
--- a/tools/testing/selftests/bpf/verifier/ctx.c
+++ b/tools/testing/selftests/bpf/verifier/ctx.c
@@ -91,3 +91,108 @@
.result = REJECT,
.errstr = "variable ctx access var_off=(0x0; 0x4)",
},
+{
+ "pass ctx or null check, 1: ctx",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 2: null",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 3: 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+ .result = REJECT,
+ .errstr = "R1 type=inv expected=ctx",
+},
+{
+ "pass ctx or null check, 4: ctx - const",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+ .result = REJECT,
+ .errstr = "dereference of modified ctx ptr",
+},
+{
+ "pass ctx or null check, 5: null (connect)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_INET4_CONNECT,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 6: null (bind)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 7: ctx (bind)",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_socket_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 8: null (bind)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_socket_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = REJECT,
+ .errstr = "R1 type=inv expected=ctx",
+},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index d438193804b2..2e16b8e268f2 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -1011,6 +1011,53 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "read gso_size from CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "read gso_size from CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "write gso_size from CGROUP_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access off=176 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "read gso_size from CLS",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
"check wire_len is not readable by sockets",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
index 5ba5bef44d5b..bdffe698e1d1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
@@ -45,6 +45,7 @@ ALL_TESTS="
blackhole_ipv6
"
NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
source $lib_dir/tc_common.sh
source $lib_dir/lib.sh
@@ -123,7 +124,7 @@ blackhole_ipv4()
skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \
action pass
- ip -4 route show 198.51.100.0/30 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload ip -4 route show 198.51.100.0/30
check_err $? "route not marked as offloaded when should"
ping_do $h1 198.51.100.1
@@ -147,7 +148,7 @@ blackhole_ipv6()
skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \
ip_proto icmpv6 action pass
- ip -6 route show 2001:db8:2::/120 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload ip -6 route show 2001:db8:2::/120
check_err $? "route not marked as offloaded when should"
ping6_do $h1 2001:db8:2::1
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
new file mode 100755
index 000000000000..26044e397157
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap ACL drops functionality over mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ ingress_flow_action_drop_test
+ egress_flow_action_drop_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ingress_flow_action_drop_test()
+{
+ local mz_pid
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower src_mac $h1mac action pass
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+ flower dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -d 1msec -q &
+ mz_pid=$!
+
+ RET=0
+
+ devlink_trap_drop_test ingress_flow_action_drop acl_drops $swp2 101
+
+ log_test "ingress_flow_action_drop"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+egress_flow_action_drop_test()
+{
+ local mz_pid
+
+ tc filter add dev $swp2 egress protocol ip pref 2 handle 102 \
+ flower src_mac $h1mac action pass
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -d 1msec -q &
+ mz_pid=$!
+
+ RET=0
+
+ devlink_trap_drop_test egress_flow_action_drop acl_drops $swp2 102
+
+ log_test "egress_flow_action_drop"
+
+ tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 2 102
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index 58cdbfb608e9..e7aecb065409 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -107,11 +107,11 @@ source_mac_is_multicast_test()
RET=0
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $group_name $swp2 101
log_test "Source MAC is multicast"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
}
__vlan_tag_mismatch_test()
@@ -132,7 +132,7 @@ __vlan_tag_mismatch_test()
$MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $group_name $swp2 101
# Add PVID and make sure packets are no longer dropped.
bridge vlan add vid 1 dev $swp1 pvid untagged master
@@ -148,7 +148,7 @@ __vlan_tag_mismatch_test()
devlink_trap_action_set $trap_name "drop"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
}
vlan_tag_mismatch_untagged_test()
@@ -193,7 +193,7 @@ ingress_vlan_filter_test()
$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $group_name $swp2 101
# Add the VLAN on the bridge port and make sure packets are no longer
# dropped.
@@ -212,7 +212,7 @@ ingress_vlan_filter_test()
log_test "Ingress VLAN filter"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
bridge vlan del vid $vid dev $swp1 master
bridge vlan del vid $vid dev $swp2 master
@@ -237,7 +237,7 @@ __ingress_stp_filter_test()
$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $group_name $swp2 101
# Change STP state to forwarding and make sure packets are no longer
# dropped.
@@ -254,7 +254,7 @@ __ingress_stp_filter_test()
devlink_trap_action_set $trap_name "drop"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
bridge vlan del vid $vid dev $swp1 master
bridge vlan del vid $vid dev $swp2 master
@@ -308,7 +308,7 @@ port_list_is_empty_uc_test()
$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $group_name $swp2 101
# Allow packets to be flooded to one port.
ip link set dev $swp2 type bridge_slave flood on
@@ -326,7 +326,7 @@ port_list_is_empty_uc_test()
log_test "Port list is empty - unicast"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
ip link set dev $swp1 type bridge_slave flood on
}
@@ -354,7 +354,7 @@ port_list_is_empty_mc_test()
$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $group_name $swp2 101
# Allow packets to be flooded to one port.
ip link set dev $swp2 type bridge_slave mcast_flood on
@@ -372,7 +372,7 @@ port_list_is_empty_mc_test()
log_test "Port list is empty - multicast"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
ip link set dev $swp1 type bridge_slave mcast_flood on
}
@@ -401,7 +401,7 @@ port_loopback_filter_uc_test()
$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $group_name $swp2 101
# Allow packets to be flooded.
ip link set dev $swp2 type bridge_slave flood on
@@ -419,7 +419,7 @@ port_loopback_filter_uc_test()
log_test "Port loopback filter - unicast"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
}
port_loopback_filter_test()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index d88d8e47d11b..616f47d86a61 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -176,11 +176,11 @@ non_ip_test()
00:00 de:ad:be:ef" &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "Non IP"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
}
__uc_dip_over_mc_dmac_test()
@@ -206,11 +206,11 @@ __uc_dip_over_mc_dmac_test()
-B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "Unicast destination IP over multicast destination MAC: $desc"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
}
uc_dip_over_mc_dmac_test()
@@ -242,11 +242,11 @@ __sip_is_loopback_test()
-b $rp1mac -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "Source IP is loopback address: $desc"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
}
sip_is_loopback_test()
@@ -277,11 +277,11 @@ __dip_is_loopback_test()
-B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "Destination IP is loopback address: $desc"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
}
dip_is_loopback_test()
@@ -313,11 +313,11 @@ __sip_is_mc_test()
-b $rp1mac -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "Source IP is multicast: $desc"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
}
sip_is_mc_test()
@@ -345,11 +345,11 @@ ipv4_sip_is_limited_bc_test()
-B $h2_ipv4 -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "IPv4 source IP is limited broadcast"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
}
ipv4_payload_get()
@@ -399,11 +399,11 @@ __ipv4_header_corrupted_test()
$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "IP header corrupted: $desc: IPv4"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
}
ipv6_payload_get()
@@ -446,11 +446,11 @@ __ipv6_header_corrupted_test()
$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "IP header corrupted: $desc: IPv6"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
}
ip_header_corrupted_test()
@@ -485,11 +485,11 @@ ipv6_mc_dip_reserved_scope_test()
"33:33:00:00:00:00" -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "IPv6 multicast destination IP reserved scope"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
}
ipv6_mc_dip_interface_local_scope_test()
@@ -511,11 +511,11 @@ ipv6_mc_dip_interface_local_scope_test()
"33:33:00:00:00:00" -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "IPv6 multicast destination IP interface-local scope"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
}
__blackhole_route_test()
@@ -542,10 +542,10 @@ __blackhole_route_test()
-B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $group_name $rp2 101
log_test "Blackhole route: IPv$flags"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
ip -$flags route del blackhole $subnet
}
@@ -641,13 +641,9 @@ erif_disabled_test()
mz_pid=$!
sleep 5
- # In order to see this trap we need a route that points to disabled RIF.
- # When ipv6 address is flushed, there is a delay and the routes are
- # deleted before the RIF and we cannot get state that we have route
- # to disabled RIF.
- # Delete IPv6 address first and then check this trap with flushing IPv4.
- ip -6 add flush dev br0
- ip -4 add flush dev br0
+ # Unlinking the port from the bridge will disable the RIF associated
+ # with br0 as it is no longer an upper of any mlxsw port.
+ ip link set dev $rp1 nomaster
t1_packets=$(devlink_trap_rx_packets_get $trap_name)
t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
@@ -659,7 +655,6 @@ erif_disabled_test()
log_test "Egress RIF disabled"
kill $mz_pid && wait $mz_pid &> /dev/null
- ip link set dev $rp1 nomaster
__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
ip link del dev br0 type bridge
devlink_trap_action_set $trap_name "drop"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
new file mode 100755
index 000000000000..47edf099a17e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -0,0 +1,384 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap policer functionality over mlxsw.
+
+# +---------------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/24 |
+# | | |
+# | | default via 192.0.2.2 |
+# +----|----------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | + $rp1 |
+# | 192.0.2.2/24 |
+# | |
+# | 198.51.100.2/24 |
+# | + $rp2 |
+# | | |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------+
+# | | default via 198.51.100.2 |
+# | | |
+# | | 198.51.100.1/24 |
+# | + $h2 |
+# | H2 (vrf) |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ rate_limits_test
+ burst_limits_test
+ rate_test
+ burst_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+ mtu_set $h1 10000
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ mtu_restore $h1
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24
+ mtu_set $h2 10000
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ mtu_restore $h2
+ simple_if_fini $h2 198.51.100.1/24
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ __addr_add_del $rp1 add 192.0.2.2/24
+ __addr_add_del $rp2 add 198.51.100.2/24
+ mtu_set $rp1 10000
+ mtu_set $rp2 10000
+
+ ip -4 route add blackhole 198.51.100.100
+
+ devlink trap set $DEVLINK_DEV trap blackhole_route action trap
+}
+
+router_destroy()
+{
+ devlink trap set $DEVLINK_DEV trap blackhole_route action drop
+
+ ip -4 route del blackhole 198.51.100.100
+
+ mtu_restore $rp2
+ mtu_restore $rp1
+ __addr_add_del $rp2 del 198.51.100.2/24
+ __addr_add_del $rp1 del 192.0.2.2/24
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1_mac=$(mac_get $rp1)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ # Reload to ensure devlink-trap settings are back to default.
+ devlink_reload
+}
+
+rate_limits_test()
+{
+ RET=0
+
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null
+ check_fail $? "Policer rate was changed to rate lower than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 \
+ rate 2000000001 &> /dev/null
+ check_fail $? "Policer rate was changed to rate higher than limit"
+
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 1
+ check_err $? "Failed to set policer rate to minimum"
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 2000000000
+ check_err $? "Failed to set policer rate to maximum"
+
+ log_test "Trap policer rate limits"
+}
+
+burst_limits_test()
+{
+ RET=0
+
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 0 &> /dev/null
+ check_fail $? "Policer burst size was changed to 0"
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 17 &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size that is not power of 2"
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 8 &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size lower than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 \
+ burst $((2**25)) &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size higher than limit"
+
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 16
+ check_err $? "Failed to set policer burst size to minimum"
+ devlink trap policer set $DEVLINK_DEV policer 1 burst $((2**24))
+ check_err $? "Failed to set policer burst size to maximum"
+
+ log_test "Trap policer burst size limits"
+}
+
+trap_rate_get()
+{
+ local t0 t1
+
+ t0=$(devlink_trap_rx_packets_get blackhole_route)
+ sleep 10
+ t1=$(devlink_trap_rx_packets_get blackhole_route)
+
+ echo $(((t1 - t0) / 10))
+}
+
+policer_drop_rate_get()
+{
+ local id=$1; shift
+ local t0 t1
+
+ t0=$(devlink_trap_policer_rx_dropped_get $id)
+ sleep 10
+ t1=$(devlink_trap_policer_rx_dropped_get $id)
+
+ echo $(((t1 - t0) / 10))
+}
+
+__rate_test()
+{
+ local rate pct drop_rate
+ local id=$1; shift
+
+ RET=0
+
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16
+ devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+ # Send packets at highest possible rate and make sure they are dropped
+ # by the policer. Make sure measured received rate is about 1000 pps
+ log_info "=== Tx rate: Highest, Policer rate: 1000 pps ==="
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+
+ sleep 5 # Take measurements when rate is stable
+
+ rate=$(trap_rate_get)
+ pct=$((100 * (rate - 1000) / 1000))
+ ((-5 <= pct && pct <= 5))
+ check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%"
+ log_info "Expected rate 1000 pps, measured rate $rate pps"
+
+ drop_rate=$(policer_drop_rate_get $id)
+ (( drop_rate > 0 ))
+ check_err $? "Expected non-zero policer drop rate, got 0"
+ log_info "Measured policer drop rate of $drop_rate pps"
+
+ stop_traffic
+
+ # Send packets at a rate of 1000 pps and make sure they are not dropped
+ # by the policer
+ log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ==="
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec
+
+ sleep 5 # Take measurements when rate is stable
+
+ drop_rate=$(policer_drop_rate_get $id)
+ (( drop_rate == 0 ))
+ check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+ log_info "Measured policer drop rate of $drop_rate pps"
+
+ stop_traffic
+
+ # Unbind the policer and send packets at highest possible rate. Make
+ # sure they are not dropped by the policer and that the measured
+ # received rate is higher than 1000 pps
+ log_info "=== Tx rate: Highest, Policer rate: No policer ==="
+
+ devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+
+ rate=$(trap_rate_get)
+ (( rate > 1000 ))
+ check_err $? "Expected rate higher than 1000 pps, got $rate pps"
+ log_info "Measured rate $rate pps"
+
+ drop_rate=$(policer_drop_rate_get $id)
+ (( drop_rate == 0 ))
+ check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+ log_info "Measured policer drop rate of $drop_rate pps"
+
+ stop_traffic
+
+ log_test "Trap policer rate"
+}
+
+rate_test()
+{
+ local id
+
+ for id in $(devlink_trap_policer_ids_get); do
+ echo
+ log_info "Running rate test for policer $id"
+ __rate_test $id
+ done
+}
+
+__burst_test()
+{
+ local t0_rx t0_drop t1_rx t1_drop rx drop
+ local id=$1; shift
+
+ RET=0
+
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32
+ devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+ # Send a burst of 64 packets and make sure that about 32 are received
+ # and the rest are dropped by the policer
+ log_info "=== Tx burst size: 64, Policer burst size: 32 pps ==="
+
+ t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
+
+ t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ rx=$((t1_rx - t0_rx))
+ pct=$((100 * (rx - 32) / 32))
+ ((-20 <= pct && pct <= 20))
+ check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%"
+ log_info "Expected burst size of 32 packets, measured burst size of $rx packets"
+
+ drop=$((t1_drop - t0_drop))
+ (( drop > 0 ))
+ check_err $? "Expected non-zero policer drops, got 0"
+ log_info "Measured policer drops of $drop packets"
+
+ # Send a burst of 16 packets and make sure that 16 are received
+ # and that none are dropped by the policer
+ log_info "=== Tx burst size: 16, Policer burst size: 32 pps ==="
+
+ t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 16
+
+ t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ rx=$((t1_rx - t0_rx))
+ (( rx == 16 ))
+ check_err $? "Expected burst size of 16 packets, got $rx packets"
+ log_info "Expected burst size of 16 packets, measured burst size of $rx packets"
+
+ drop=$((t1_drop - t0_drop))
+ (( drop == 0 ))
+ check_err $? "Expected zero policer drops, got $drop"
+ log_info "Measured policer drops of $drop packets"
+
+ # Unbind the policer and send a burst of 64 packets. Make sure that
+ # 64 packets are received and that none are dropped by the policer
+ log_info "=== Tx burst size: 64, Policer burst size: No policer ==="
+
+ devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+ t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
+
+ t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ rx=$((t1_rx - t0_rx))
+ (( rx == 64 ))
+ check_err $? "Expected burst size of 64 packets, got $rx packets"
+ log_info "Expected burst size of 64 packets, measured burst size of $rx packets"
+
+ drop=$((t1_drop - t0_drop))
+ (( drop == 0 ))
+ check_err $? "Expected zero policer drops, got $drop"
+ log_info "Measured policer drops of $drop packets"
+
+ log_test "Trap policer burst size"
+}
+
+burst_test()
+{
+ local id
+
+ for id in $(devlink_trap_policer_ids_get); do
+ echo
+ log_info "Running burst size test for policer $id"
+ __burst_test $id
+ done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
index fd19161dd4ec..e11a416323cf 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -314,11 +314,11 @@ overlay_smac_is_mc_test()
-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp1
+ devlink_trap_drop_test $trap_name $group_name $swp1 101
log_test "Overlay source MAC is multicast"
- devlink_trap_drop_cleanup $mz_pid $swp1 "ip"
+ devlink_trap_drop_cleanup $mz_pid $swp1 "ip" 1 101
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
index d72d8488a3b2..7a0a99c1d22f 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
@@ -8,7 +8,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
netdev_pre_up_test
vxlan_vlan_add_test
- port_vlan_add_test
+ vxlan_bridge_create_test
+ bridge_create_test
"
NUM_NETIFS=2
source $lib_dir/lib.sh
@@ -106,32 +107,56 @@ vxlan_vlan_add_test()
ip link del dev br1
}
-port_vlan_add_test()
+vxlan_bridge_create_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
-
# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
ip link add name vx1 up type vxlan id 1000 \
local 192.0.2.17 remote 192.0.2.18 \
dstport 4789 tos inherit ttl 100
- ip link set dev $swp1 master br1
- check_err $?
-
- bridge vlan del dev $swp1 vid 1
+ # Test with VLAN-aware bridge.
+ ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
ip link set dev vx1 master br1
+
+ ip link set dev $swp1 master br1 2>&1 > /dev/null \
+ | grep -q mlxsw_spectrum
check_err $?
- bridge vlan add dev $swp1 vid 1 pvid untagged 2>&1 >/dev/null \
+ # Test with VLAN-unaware bridge.
+ ip link set dev br1 type bridge vlan_filtering 0
+
+ ip link set dev $swp1 master br1 2>&1 > /dev/null \
| grep -q mlxsw_spectrum
check_err $?
- log_test "extack - map VLAN at port"
+ log_test "extack - bridge creation with VXLAN"
+ ip link del dev br1
ip link del dev vx1
+}
+
+bridge_create_test()
+{
+ RET=0
+
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link add name br2 up type bridge vlan_filtering 1
+
+ ip link set dev $swp1 master br1
+ check_err $?
+
+ # Only one VLAN-aware bridge is supported, so this should fail with
+ # an extack.
+ ip link set dev $swp2 master br2 2>&1 > /dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $?
+
+ log_test "extack - multiple VLAN-aware bridges creation"
+
+ ip link del dev br2
ip link del dev br1
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
new file mode 100644
index 000000000000..cbe50f260a40
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+if [[ ! -v MLXSW_CHIP ]]; then
+ MLXSW_CHIP=$(devlink -j dev info $DEVLINK_DEV | jq -r '.[][]["driver"]')
+ if [ -z "$MLXSW_CHIP" ]; then
+ echo "SKIP: Device $DEVLINK_DEV doesn't support devlink info command"
+ exit 1
+ fi
+fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
index eff6393ce974..71066bc4b886 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -114,23 +114,12 @@ ping_ipv4()
ping_test $h1 192.0.2.2
}
-wait_for_packets()
-{
- local t0=$1; shift
- local prio_observe=$1; shift
-
- local t1=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
- local delta=$((t1 - t0))
- echo $delta
- ((delta >= 10))
-}
-
__test_defprio()
{
local prio_install=$1; shift
local prio_observe=$1; shift
- local delta
local key
+ local t1
local i
RET=0
@@ -139,9 +128,10 @@ __test_defprio()
local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
mausezahn -q $h1 -d 100m -c 10 -t arp reply
- delta=$(busywait "$HIT_TIMEOUT" wait_for_packets $t0 $prio_observe)
+ t1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((t0 + 10))" \
+ ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
- check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $delta."
+ check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))."
log_test "Default priority $prio_install/$prio_observe"
defprio_uninstall $swp1 $prio_install
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
index c745ce3befee..4cb2aa65278a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -31,6 +31,7 @@ ALL_TESTS="
ping_ipv4
test_update
test_no_update
+ test_pedit_norewrite
test_dscp_leftover
"
@@ -56,6 +57,11 @@ zero()
echo 0
}
+three()
+{
+ echo 3
+}
+
h1_create()
{
simple_if_init $h1 192.0.2.1/28
@@ -103,6 +109,9 @@ switch_create()
simple_if_init $swp1 192.0.2.2/28
__simple_if_init $swp2 v$swp1 192.0.2.17/28
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+
lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
lldpad_app_wait_set $swp1
@@ -115,6 +124,9 @@ switch_destroy()
lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
lldpad_app_wait_del
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
__simple_if_fini $swp2 192.0.2.17/28
simple_if_fini $swp1 192.0.2.2/28
}
@@ -223,18 +235,36 @@ __test_update()
test_update()
{
+ echo "Test net.ipv4.ip_forward_update_priority=1"
__test_update 1 reprioritize
}
test_no_update()
{
+ echo "Test net.ipv4.ip_forward_update_priority=0"
__test_update 0 echo
}
+# Test that when DSCP is updated in pedit, the DSCP rewrite is turned off.
+test_pedit_norewrite()
+{
+ echo "Test no DSCP rewrite after DSCP is updated by pedit"
+
+ tc filter add dev $swp1 ingress handle 101 pref 1 prot ip flower \
+ action pedit ex munge ip dsfield set $((3 << 2)) retain 0xfc \
+ action skbedit priority 3
+
+ __test_update 0 three
+
+ tc filter del dev $swp1 ingress pref 1
+}
+
# Test that when the last APP rule is removed, the prio->DSCP map is properly
# set to zeroes, and that the last APP rule does not stay active in the ASIC.
test_dscp_leftover()
{
+ echo "Test that last removed DSCP rule is deconfigured correctly"
+
lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
lldpad_app_wait_del
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
index d231649b4f01..e93878d42596 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -2,16 +2,15 @@
# SPDX-License-Identifier: GPL-2.0
ROUTER_NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
router_h1_create()
{
simple_if_init $h1 192.0.1.1/24
- ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1
}
router_h1_destroy()
{
- ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1
simple_if_fini $h1 192.0.1.1/24
}
@@ -64,13 +63,15 @@ router_setup_prepare()
router_create
}
-router_offload_validate()
+wait_for_routes()
{
- local route_count=$1
- local offloaded_count
+ local t0=$1; shift
+ local route_count=$1; shift
- offloaded_count=$(ip route | grep -o 'offload' | wc -l)
- [[ $offloaded_count -ge $route_count ]]
+ local t1=$(ip route | grep -o 'offload' | wc -l)
+ local delta=$((t1 - t0))
+ echo $delta
+ [[ $delta -ge $route_count ]]
}
router_routes_create()
@@ -90,8 +91,8 @@ router_routes_create()
break 3
fi
- echo route add 193.${i}.${j}.${k}/32 via \
- 192.0.2.1 dev $rp2 >> $ROUTE_FILE
+ echo route add 193.${i}.${j}.${k}/32 dev $rp2 \
+ >> $ROUTE_FILE
((count++))
done
done
@@ -111,45 +112,19 @@ router_test()
{
local route_count=$1
local should_fail=$2
- local count=0
+ local delta
RET=0
+ local t0=$(ip route | grep -o 'offload' | wc -l)
router_routes_create $route_count
+ delta=$(busywait "$TIMEOUT" wait_for_routes $t0 $route_count)
- router_offload_validate $route_count
- check_err_fail $should_fail $? "Offload of $route_count routes"
+ check_err_fail $should_fail $? "Offload routes: Expected $route_count, got $delta."
if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
return
fi
- tc filter add dev $h2 ingress protocol ip pref 1 flower \
- skip_sw dst_ip 193.0.0.0/8 action drop
-
- for i in {0..255}
- do
- for j in {0..255}
- do
- for k in {0..255}
- do
- if [[ $count -eq $route_count ]]; then
- break 3
- fi
-
- $MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \
- -A 192.0.1.1 -B 193.${i}.${j}.${k} \
- -t ip -q
- ((count++))
- done
- done
- done
-
- tc_check_packets "dev $h2 ingress" 1 $route_count
- check_err $? "Offload mismatch"
-
- tc filter del dev $h2 ingress protocol ip pref 1 flower \
- skip_sw dst_ip 193.0.0.0/8 action drop
-
router_routes_destroy
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index 5c39e5f6a480..f4031002d5e9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -32,6 +32,7 @@ ALL_TESTS="
devlink_reload_test
"
NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
@@ -360,20 +361,24 @@ vlan_rif_refcount_test()
ip link add link br0 name br0.10 up type vlan id 10
ip -6 address add 2001:db8:1::1/64 dev br0.10
- ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
check_err $? "vlan rif was not created before adding port to vlan"
bridge vlan add vid 10 dev $swp1
- ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
check_err $? "vlan rif was destroyed after adding port to vlan"
bridge vlan del vid 10 dev $swp1
- ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
check_err $? "vlan rif was destroyed after removing port from vlan"
ip link set dev $swp1 nomaster
- ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
- check_fail $? "vlan rif was not destroyed after unlinking port from bridge"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+ check_err $? "vlan rif was not destroyed after unlinking port from bridge"
log_test "vlan rif refcount"
@@ -401,22 +406,28 @@ subport_rif_refcount_test()
ip -6 address add 2001:db8:1::1/64 dev bond1
ip -6 address add 2001:db8:2::1/64 dev bond1.10
- ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
check_err $? "subport rif was not created on lag device"
- ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
check_err $? "subport rif was not created on vlan device"
ip link set dev $swp1 nomaster
- ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
check_err $? "subport rif of lag device was destroyed when should not"
- ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
check_err $? "subport rif of vlan device was destroyed when should not"
ip link set dev $swp2 nomaster
- ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload
- check_fail $? "subport rif of lag device was not destroyed when should"
- ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload
- check_fail $? "subport rif of vlan device was not destroyed when should"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+ check_err $? "subport rif of lag device was not destroyed when should"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
+ check_err $? "subport rif of vlan device was not destroyed when should"
log_test "subport rif refcount"
@@ -575,7 +586,8 @@ bridge_extern_learn_test()
bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn
- bridge fdb show brport $swp1 | grep de:ad:be:ef:13:37 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ bridge fdb show brport $swp1 de:ad:be:ef:13:37
check_err $? "fdb entry not marked as offloaded when should"
log_test "externally learned fdb entry"
@@ -595,9 +607,11 @@ neigh_offload_test()
ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \
dev $swp1
- ip -4 neigh show dev $swp1 | grep 192.0.2.2 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -4 neigh show dev $swp1 192.0.2.2
check_err $? "ipv4 neigh entry not marked as offloaded when should"
- ip -6 neigh show dev $swp1 | grep 2001:db8:1::2 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 neigh show dev $swp1 2001:db8:1::2
check_err $? "ipv6 neigh entry not marked as offloaded when should"
log_test "neighbour offload indication"
@@ -623,25 +637,31 @@ nexthop_offload_test()
ip -6 route add 2001:db8:2::/64 vrf v$swp1 \
nexthop via 2001:db8:1::2 dev $swp1
- ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -4 route show 198.51.100.0/24 vrf v$swp1
check_err $? "ipv4 nexthop not marked as offloaded when should"
- ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route show 2001:db8:2::/64 vrf v$swp1
check_err $? "ipv6 nexthop not marked as offloaded when should"
ip link set dev $swp2 down
sleep 1
- ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload
- check_fail $? "ipv4 nexthop marked as offloaded when should not"
- ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload
- check_fail $? "ipv6 nexthop marked as offloaded when should not"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -4 route show 198.51.100.0/24 vrf v$swp1
+ check_err $? "ipv4 nexthop marked as offloaded when should not"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -6 route show 2001:db8:2::/64 vrf v$swp1
+ check_err $? "ipv6 nexthop marked as offloaded when should not"
ip link set dev $swp2 up
setup_wait
- ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -4 route show 198.51.100.0/24 vrf v$swp1
check_err $? "ipv4 nexthop not marked as offloaded after neigh add"
- ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route show 2001:db8:2::/64 vrf v$swp1
check_err $? "ipv6 nexthop not marked as offloaded after neigh add"
log_test "nexthop offload indication"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index c9fc4d4885c1..94c37124a840 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -56,11 +56,19 @@ switch_destroy()
}
# Callback from sch_ets_tests.sh
-get_stats()
+collect_stats()
{
- local band=$1; shift
+ local -a streams=("$@")
+ local stream
- ethtool_stats_get "$h2" rx_octets_prio_$band
+ # Wait for qdisc counter update so that we don't get it mid-way through.
+ busywait_for_counter 1000 +1 \
+ qdisc_parent_stats_get $swp2 10:$((${streams[0]} + 1)) .bytes \
+ > /dev/null
+
+ for stream in ${streams[@]}; do
+ qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes
+ done
}
bail_on_lldpad
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
new file mode 100644
index 000000000000..0d347d48c112
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -0,0 +1,533 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends a >1Gbps stream of traffic from H1, to the switch, which
+# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the
+# switch and forwarded to the port under test $swp3, which is also 1Gbps.
+#
+# This way, $swp3 should be 100% filled with traffic without any of it spilling
+# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That
+# is what H2 is used for--it sends the extra traffic to create backlog.
+#
+# A RED Qdisc is installed on $swp3. The configuration is such that the minimum
+# and maximum size are 1 byte apart, so there is a very clear border under which
+# no marking or dropping takes place, and above which everything is marked or
+# dropped.
+#
+# The test uses the buffer build-up behavior to test the installed RED.
+#
+# In order to test WRED, $swp3 actually contains RED under PRIO, with two
+# different configurations. Traffic is prioritized using 802.1p and relies on
+# the implicit mlxsw configuration, where packet priority is taken 1:1 from the
+# 802.1p marking.
+#
+# +--------------------------+ +--------------------------+
+# | H1 | | H2 |
+# | + $h1.10 | | + $h2.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | | | | |
+# | | $h1.11 + | | | $h2.11 + |
+# | | 192.0.2.17/28 | | | | 192.0.2.18/28 | |
+# | | | | | | | |
+# | \______ ______/ | | \______ ______/ |
+# | \ / | | \ / |
+# | + $h1 | | + $h2 |
+# +-------------|------------+ +-------------|------------+
+# | >1Gbps |
+# +-------------|------------------------------------------------|------------+
+# | SW + $swp1 + $swp2 |
+# | _______/ \___________ ___________/ \_______ |
+# | / \ / \ |
+# | +-|-----------------+ | +-|-----------------+ | |
+# | | + $swp1.10 | | | + $swp2.10 | | |
+# | | | | .-------------+ $swp5.10 | | |
+# | | BR1_10 | | | | | | |
+# | | | | | | BR2_10 | | |
+# | | + $swp2.10 | | | | | | |
+# | +-|-----------------+ | | | + $swp3.10 | | |
+# | | | | +-|-----------------+ | |
+# | | +-----------------|-+ | | +-----------------|-+ |
+# | | | $swp1.11 + | | | | $swp2.11 + | |
+# | | | | | .-----------------+ $swp5.11 | |
+# | | | BR1_11 | | | | | | |
+# | | | | | | | | BR2_11 | |
+# | | | $swp2.11 + | | | | | | |
+# | | +-----------------|-+ | | | | $swp3.11 + | |
+# | | | | | | +-----------------|-+ |
+# | \_______ ___________/ | | \___________ _______/ |
+# | \ / \ / \ / |
+# | + $swp4 + $swp5 + $swp3 |
+# +-------------|----------------------|-------------------------|------------+
+# | | | 1Gbps
+# \________1Gbps_________/ |
+# +----------------------------|------------+
+# | H3 + $h3 |
+# | _____________________/ \_______ |
+# | / \ |
+# | | | |
+# | + $h3.10 $h3.11 + |
+# | 192.0.2.3/28 192.0.2.19/28 |
+# +-----------------------------------------+
+
+NUM_NETIFS=8
+CHECK_TC="yes"
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+ipaddr()
+{
+ local host=$1; shift
+ local vlan=$1; shift
+
+ echo 192.0.2.$((16 * (vlan - 10) + host))
+}
+
+host_create()
+{
+ local dev=$1; shift
+ local host=$1; shift
+
+ simple_if_init $dev
+ mtu_set $dev 10000
+
+ vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
+ ip link set dev $dev.10 type vlan egress 0:0
+
+ vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
+ ip link set dev $dev.11 type vlan egress 0:1
+}
+
+host_destroy()
+{
+ local dev=$1; shift
+
+ vlan_destroy $dev 11
+ vlan_destroy $dev 10
+ mtu_restore $dev
+ simple_if_fini $dev
+}
+
+h1_create()
+{
+ host_create $h1 1
+}
+
+h1_destroy()
+{
+ host_destroy $h1
+}
+
+h2_create()
+{
+ host_create $h2 2
+
+ # Some of the tests in this suite use multicast traffic. As this traffic
+ # enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
+ # e.g. traffic ingressing through $swp2 is flooded to $swp3 (the
+ # intended destination) and $swp5 (which is intended as ingress for
+ # another stream of traffic).
+ #
+ # This is generally not a problem, but if the $swp5 throughput is lower
+ # than $swp2 throughput, there will be a build-up at $swp5. That may
+ # cause packets to fail to queue up at $swp3 due to shared buffer
+ # quotas, and the test to spuriously fail.
+ #
+ # Prevent this by setting the speed of $h2 to 1Gbps.
+
+ ethtool -s $h2 speed 1000 autoneg off
+}
+
+h2_destroy()
+{
+ ethtool -s $h2 autoneg on
+ host_destroy $h2
+}
+
+h3_create()
+{
+ host_create $h3 3
+ ethtool -s $h3 speed 1000 autoneg off
+}
+
+h3_destroy()
+{
+ ethtool -s $h3 autoneg on
+ host_destroy $h3
+}
+
+switch_create()
+{
+ local intf
+ local vlan
+
+ ip link add dev br1_10 type bridge
+ ip link add dev br1_11 type bridge
+
+ ip link add dev br2_10 type bridge
+ ip link add dev br2_11 type bridge
+
+ for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
+ ip link set dev $intf up
+ mtu_set $intf 10000
+ done
+
+ for intf in $swp1 $swp4; do
+ for vlan in 10 11; do
+ vlan_create $intf $vlan
+ ip link set dev $intf.$vlan master br1_$vlan
+ ip link set dev $intf.$vlan up
+ done
+ done
+
+ for intf in $swp2 $swp3 $swp5; do
+ for vlan in 10 11; do
+ vlan_create $intf $vlan
+ ip link set dev $intf.$vlan master br2_$vlan
+ ip link set dev $intf.$vlan up
+ done
+ done
+
+ ip link set dev $swp4.10 type vlan egress 0:0
+ ip link set dev $swp4.11 type vlan egress 0:1
+ for intf in $swp1 $swp2 $swp5; do
+ for vlan in 10 11; do
+ ip link set dev $intf.$vlan type vlan ingress 0:0 1:1
+ done
+ done
+
+ for intf in $swp2 $swp3 $swp4 $swp5; do
+ ethtool -s $intf speed 1000 autoneg off
+ done
+
+ ip link set dev br1_10 up
+ ip link set dev br1_11 up
+ ip link set dev br2_10 up
+ ip link set dev br2_11 up
+
+ local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+ devlink_port_pool_th_set $swp3 8 $size
+}
+
+switch_destroy()
+{
+ local intf
+ local vlan
+
+ devlink_port_pool_th_restore $swp3 8
+
+ tc qdisc del dev $swp3 root 2>/dev/null
+
+ ip link set dev br2_11 down
+ ip link set dev br2_10 down
+ ip link set dev br1_11 down
+ ip link set dev br1_10 down
+
+ for intf in $swp5 $swp4 $swp3 $swp2; do
+ ethtool -s $intf autoneg on
+ done
+
+ for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
+ for vlan in 11 10; do
+ ip link set dev $intf.$vlan down
+ ip link set dev $intf.$vlan nomaster
+ vlan_destroy $intf $vlan
+ done
+
+ mtu_restore $intf
+ ip link set dev $intf down
+ done
+
+ ip link del dev br2_11
+ ip link del dev br2_10
+ ip link del dev br1_11
+ ip link del dev br1_10
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ swp4=${NETIFS[p7]}
+ swp5=${NETIFS[p8]}
+
+ h3_mac=$(mac_get $h3)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
+ ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11"
+ ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10"
+ ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11"
+}
+
+get_tc()
+{
+ local vlan=$1; shift
+
+ echo $((vlan - 10))
+}
+
+get_qdisc_handle()
+{
+ local vlan=$1; shift
+
+ local tc=$(get_tc $vlan)
+ local band=$((8 - tc))
+
+ # Handle is 107: for TC1, 108: for TC0.
+ echo "10$band:"
+}
+
+get_qdisc_backlog()
+{
+ local vlan=$1; shift
+
+ qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog
+}
+
+get_mc_transmit_queue()
+{
+ local vlan=$1; shift
+
+ local tc=$(($(get_tc $vlan) + 8))
+ ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc
+}
+
+get_nmarked()
+{
+ local vlan=$1; shift
+
+ ethtool_stats_get $swp3 ecn_marked
+}
+
+get_qdisc_npackets()
+{
+ local vlan=$1; shift
+
+ busywait_for_counter 1100 +1 \
+ qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+ local vlan=$1; shift
+ local size=$1; shift
+ local proto=$1; shift
+
+ local tc=$((vlan - 10))
+ local band=$((8 - tc))
+ local cur=-1
+ local i=0
+
+ while :; do
+ local cur=$(busywait 1100 until_counter_is "> $cur" \
+ get_qdisc_backlog $vlan)
+ local diff=$((size - cur))
+ local pkts=$(((diff + 7999) / 8000))
+
+ if ((cur >= size)); then
+ echo $cur
+ return 0
+ elif ((i++ > 10)); then
+ echo $cur
+ return 1
+ fi
+
+ $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+ -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+ -t $proto -q -c $pkts "$@"
+ done
+}
+
+check_marking()
+{
+ local vlan=$1; shift
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets $vlan)
+ local nmarked_0=$(get_nmarked $vlan)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets $vlan)
+ local nmarked_1=$(get_nmarked $vlan)
+
+ local nmarked_d=$((nmarked_1 - nmarked_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmarked_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+ecn_test_common()
+{
+ local name=$1; shift
+ local vlan=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Build the below-the-limit backlog using UDP. We could use TCP just
+ # fine, but this way we get a proof that UDP is accepted when queue
+ # length is below the limit. The main stream is using TCP, and if the
+ # limit is misconfigured, we would see this traffic being ECN marked.
+ RET=0
+ backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking $vlan "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "TC $((vlan - 10)): $name backlog < limit"
+
+ # Now push TCP, because non-TCP traffic would be early-dropped after the
+ # backlog crosses the limit, and we want to make sure that the backlog
+ # is above the limit.
+ RET=0
+ backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking $vlan ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+ log_test "TC $((vlan - 10)): $name backlog > limit"
+}
+
+do_ecn_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local name=ECN
+
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+ $h3_mac tos=0x01
+ sleep 1
+
+ ecn_test_common "$name" $vlan $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, it should all get dropped, and backlog
+ # building should fail.
+ RET=0
+ build_backlog $vlan $((2 * limit)) udp >/dev/null
+ check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+ log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_nodrop_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local name="ECN nodrop"
+
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+ $h3_mac tos=0x01
+ sleep 1
+
+ ecn_test_common "$name" $vlan $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, in nodrop mode, make sure it goes to
+ # backlog as well.
+ RET=0
+ build_backlog $vlan $((2 * limit)) udp >/dev/null
+ check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+ log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Use ECN-capable TCP to verify there's no marking even though the queue
+ # is above limit.
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+ $h3_mac tos=0x01
+
+ # Pushing below the queue limit should work.
+ RET=0
+ backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking $vlan "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "TC $((vlan - 10)): RED backlog < limit"
+
+ # Pushing above should not.
+ RET=0
+ backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+ check_fail $? "Traffic went into backlog instead of being early-dropped"
+ pct=$(check_marking $vlan "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ local diff=$((limit - backlog))
+ pct=$((100 * diff / limit))
+ ((0 <= pct && pct <= 5))
+ check_err $? "backlog $backlog / $limit expected <= 5% distance"
+ log_test "TC $((vlan - 10)): RED backlog > limit"
+
+ stop_traffic
+ sleep 1
+}
+
+do_mc_backlog_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ RET=0
+
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
+ start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
+
+ qbl=$(busywait 5000 until_counter_is ">= 500000" \
+ get_qdisc_backlog $vlan)
+ check_err $? "Could not build MC backlog"
+
+ # Verify that we actually see the backlog on BUM TC. Do a busywait as
+ # well, performance blips might cause false fail.
+ local ebl
+ ebl=$(busywait 5000 until_counter_is ">= 500000" \
+ get_mc_transmit_queue $vlan)
+ check_err $? "MC backlog reported by qdisc not visible in ethtool"
+
+ stop_traffic
+ stop_traffic
+
+ log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
new file mode 100755
index 000000000000..1c36c576613b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ecn_test
+ ecn_nodrop_test
+ red_test
+ mc_backlog_test
+"
+: ${QDISC:=ets}
+source sch_red_core.sh
+
+# do_ecn_test first build 2/3 of the requested backlog and expects no marking,
+# and then builds 3/2 of it and does expect marking. The values of $BACKLOG1 and
+# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do
+# see (do not see) marking, it is actually due to the configuration of that one
+# TC, and not due to configuration of the other TC leaking over.
+BACKLOG1=200000
+BACKLOG2=500000
+
+install_qdisc()
+{
+ local -a args=("$@")
+
+ tc qdisc add dev $swp3 root handle 10: $QDISC \
+ bands 8 priomap 7 6 5 4 3 2 1 0
+ tc qdisc add dev $swp3 parent 10:8 handle 108: red \
+ limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \
+ probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+ tc qdisc add dev $swp3 parent 10:7 handle 107: red \
+ limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \
+ probability 1.0 avpkt 8000 burst 63 "${args[@]}"
+ sleep 1
+}
+
+uninstall_qdisc()
+{
+ tc qdisc del dev $swp3 parent 10:7
+ tc qdisc del dev $swp3 parent 10:8
+ tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+ install_qdisc ecn
+
+ do_ecn_test 10 $BACKLOG1
+ do_ecn_test 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+ install_qdisc ecn nodrop
+
+ do_ecn_nodrop_test 10 $BACKLOG1
+ do_ecn_nodrop_test 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
+red_test()
+{
+ install_qdisc
+
+ do_red_test 10 $BACKLOG1
+ do_red_test 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+ install_qdisc
+
+ # Note that the backlog numbers here do not correspond to RED
+ # configuration, but are arbitrary.
+ do_mc_backlog_test 10 $BACKLOG1
+ do_mc_backlog_test 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
new file mode 100755
index 000000000000..76820a0e9a1b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+QDISC=prio
+source sch_red_ets.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
new file mode 100755
index 000000000000..558667ea11ec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ecn_test
+ ecn_nodrop_test
+ red_test
+ mc_backlog_test
+"
+source sch_red_core.sh
+
+BACKLOG=300000
+
+install_qdisc()
+{
+ local -a args=("$@")
+
+ tc qdisc add dev $swp3 root handle 108: red \
+ limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \
+ probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+ sleep 1
+}
+
+uninstall_qdisc()
+{
+ tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+ install_qdisc ecn
+ do_ecn_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+ install_qdisc ecn nodrop
+ do_ecn_nodrop_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
+red_test()
+{
+ install_qdisc
+ do_red_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+ install_qdisc
+ # Note that the backlog value here does not correspond to RED
+ # configuration, but is arbitrary.
+ do_mc_backlog_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
new file mode 100755
index 000000000000..58f3a05f08af
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ port_pool_test
+ port_tc_ip_test
+ port_tc_arp_test
+"
+
+NUM_NETIFS=2
+source ../../../net/forwarding/lib.sh
+source ../../../net/forwarding/devlink_lib.sh
+source mlxsw_lib.sh
+
+SB_POOL_ING=0
+SB_POOL_EGR_CPU=10
+
+SB_ITC_CPU_IP=3
+SB_ITC_CPU_ARP=2
+SB_ITC=0
+
+h1_create()
+{
+ simple_if_init $h1 192.0.1.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.1.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.1.2/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.1.2/24
+}
+
+sb_occ_pool_check()
+{
+ local dl_port=$1; shift
+ local pool=$1; shift
+ local exp_max_occ=$1
+ local max_occ
+ local err=0
+
+ max_occ=$(devlink sb -j occupancy show $dl_port \
+ | jq -e ".[][][\"pool\"][\"$pool\"][\"max\"]")
+
+ if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+ err=1
+ fi
+
+ echo $max_occ
+ return $err
+}
+
+sb_occ_itc_check()
+{
+ local dl_port=$1; shift
+ local itc=$1; shift
+ local exp_max_occ=$1
+ local max_occ
+ local err=0
+
+ max_occ=$(devlink sb -j occupancy show $dl_port \
+ | jq -e ".[][][\"itc\"][\"$itc\"][\"max\"]")
+
+ if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+ err=1
+ fi
+
+ echo $max_occ
+ return $err
+}
+
+sb_occ_etc_check()
+{
+ local dl_port=$1; shift
+ local etc=$1; shift
+ local exp_max_occ=$1; shift
+ local max_occ
+ local err=0
+
+ max_occ=$(devlink sb -j occupancy show $dl_port \
+ | jq -e ".[][][\"etc\"][\"$etc\"][\"max\"]")
+
+ if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+ err=1
+ fi
+
+ echo $max_occ
+ return $err
+}
+
+port_pool_test()
+{
+ local exp_max_occ=288
+ local max_occ
+
+ devlink sb occupancy clearmax $DEVLINK_DEV
+
+ $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ -t ip -q
+
+ devlink sb occupancy snapshot $DEVLINK_DEV
+
+ RET=0
+ max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ)
+ check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h1) ingress pool"
+
+ RET=0
+ max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ)
+ check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h2) ingress pool"
+
+ RET=0
+ max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ)
+ check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "CPU port's egress pool"
+}
+
+port_tc_ip_test()
+{
+ local exp_max_occ=288
+ local max_occ
+
+ devlink sb occupancy clearmax $DEVLINK_DEV
+
+ $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ -t ip -q
+
+ devlink sb occupancy snapshot $DEVLINK_DEV
+
+ RET=0
+ max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+ check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h1) ingress TC - IP packet"
+
+ RET=0
+ max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+ check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h2) ingress TC - IP packet"
+
+ RET=0
+ max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ)
+ check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "CPU port's egress TC - IP packet"
+}
+
+port_tc_arp_test()
+{
+ local exp_max_occ=96
+ local max_occ
+
+ if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
+ exp_max_occ=144
+ fi
+
+ devlink sb occupancy clearmax $DEVLINK_DEV
+
+ $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+
+ devlink sb occupancy snapshot $DEVLINK_DEV
+
+ RET=0
+ max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+ check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h1) ingress TC - ARP packet"
+
+ RET=0
+ max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+ check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h2) ingress TC - ARP packet"
+
+ RET=0
+ max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ)
+ check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "CPU port's egress TC - ARP packet"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ dl_port1=$(devlink_port_by_netdev $h1)
+ dl_port2=$(devlink_port_by_netdev $h2)
+
+ cpu_dl_port=$(devlink_cpu_port_get)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
new file mode 100755
index 000000000000..0d4b9327c9b3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
@@ -0,0 +1,416 @@
+#!/usr/bin/python
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import json as j
+import random
+
+
+class SkipTest(Exception):
+ pass
+
+
+class RandomValuePicker:
+ """
+ Class for storing shared buffer configuration. Can handle 3 different
+ objects, pool, tcbind and portpool. Provide an interface to get random
+ values for a specific object type as the follow:
+ 1. Pool:
+ - random size
+
+ 2. TcBind:
+ - random pool number
+ - random threshold
+
+ 3. PortPool:
+ - random threshold
+ """
+ def __init__(self, pools):
+ self._pools = []
+ for pool in pools:
+ self._pools.append(pool)
+
+ def _cell_size(self):
+ return self._pools[0]["cell_size"]
+
+ def _get_static_size(self, th):
+ # For threshold of 16, this works out to be about 12MB on Spectrum-1,
+ # and about 17MB on Spectrum-2.
+ return th * 8000 * self._cell_size()
+
+ def _get_size(self):
+ return self._get_static_size(16)
+
+ def _get_thtype(self):
+ return "static"
+
+ def _get_th(self, pool):
+ # Threshold value could be any integer between 3 to 16
+ th = random.randint(3, 16)
+ if pool["thtype"] == "dynamic":
+ return th
+ else:
+ return self._get_static_size(th)
+
+ def _get_pool(self, direction):
+ ing_pools = []
+ egr_pools = []
+ for pool in self._pools:
+ if pool["type"] == "ingress":
+ ing_pools.append(pool)
+ else:
+ egr_pools.append(pool)
+ if direction == "ingress":
+ arr = ing_pools
+ else:
+ arr = egr_pools
+ return arr[random.randint(0, len(arr) - 1)]
+
+ def get_value(self, objid):
+ if isinstance(objid, Pool):
+ if objid["pool"] in [4, 8, 9, 10]:
+ # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+ raise SkipTest()
+ else:
+ return (self._get_size(), self._get_thtype())
+ if isinstance(objid, TcBind):
+ if objid["tc"] >= 8:
+ # Multicast TCs cannot be changed
+ raise SkipTest()
+ else:
+ pool = self._get_pool(objid["type"])
+ th = self._get_th(pool)
+ pool_n = pool["pool"]
+ return (pool_n, th)
+ if isinstance(objid, PortPool):
+ pool_n = objid["pool"]
+ pool = self._pools[pool_n]
+ assert pool["pool"] == pool_n
+ th = self._get_th(pool)
+ return (th,)
+
+
+class RecordValuePickerException(Exception):
+ pass
+
+
+class RecordValuePicker:
+ """
+ Class for storing shared buffer configuration. Can handle 2 different
+ objects, pool and tcbind. Provide an interface to get the stored values per
+ object type.
+ """
+ def __init__(self, objlist):
+ self._recs = []
+ for item in objlist:
+ self._recs.append({"objid": item, "value": item.var_tuple()})
+
+ def get_value(self, objid):
+ if isinstance(objid, Pool) and objid["pool"] in [4, 8, 9, 10]:
+ # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+ raise SkipTest()
+ if isinstance(objid, TcBind) and objid["tc"] >= 8:
+ # Multicast TCs cannot be changed
+ raise SkipTest()
+ for rec in self._recs:
+ if rec["objid"].weak_eq(objid):
+ return rec["value"]
+ raise RecordValuePickerException()
+
+
+def run_cmd(cmd, json=False):
+ out = subprocess.check_output(cmd, shell=True)
+ if json:
+ return j.loads(out)
+ return out
+
+
+def run_json_cmd(cmd):
+ return run_cmd(cmd, json=True)
+
+
+def log_test(test_name, err_msg=None):
+ if err_msg:
+ print("\t%s" % err_msg)
+ print("TEST: %-80s [FAIL]" % test_name)
+ else:
+ print("TEST: %-80s [ OK ]" % test_name)
+
+
+class CommonItem(dict):
+ varitems = []
+
+ def var_tuple(self):
+ ret = []
+ self.varitems.sort()
+ for key in self.varitems:
+ ret.append(self[key])
+ return tuple(ret)
+
+ def weak_eq(self, other):
+ for key in self:
+ if key in self.varitems:
+ continue
+ if self[key] != other[key]:
+ return False
+ return True
+
+
+class CommonList(list):
+ def get_by(self, by_obj):
+ for item in self:
+ if item.weak_eq(by_obj):
+ return item
+ return None
+
+ def del_by(self, by_obj):
+ for item in self:
+ if item.weak_eq(by_obj):
+ self.remove(item)
+
+
+class Pool(CommonItem):
+ varitems = ["size", "thtype"]
+
+ def dl_set(self, dlname, size, thtype):
+ run_cmd("devlink sb pool set {} sb {} pool {} size {} thtype {}".format(dlname, self["sb"],
+ self["pool"],
+ size, thtype))
+
+
+class PoolList(CommonList):
+ pass
+
+
+def get_pools(dlname, direction=None):
+ d = run_json_cmd("devlink sb pool show -j")
+ pools = PoolList()
+ for pooldict in d["pool"][dlname]:
+ if not direction or direction == pooldict["type"]:
+ pools.append(Pool(pooldict))
+ return pools
+
+
+def do_check_pools(dlname, pools, vp):
+ for pool in pools:
+ pre_pools = get_pools(dlname)
+ try:
+ (size, thtype) = vp.get_value(pool)
+ except SkipTest:
+ continue
+ pool.dl_set(dlname, size, thtype)
+ post_pools = get_pools(dlname)
+ pool = post_pools.get_by(pool)
+
+ err_msg = None
+ if pool["size"] != size:
+ err_msg = "Incorrect pool size (got {}, expected {})".format(pool["size"], size)
+ if pool["thtype"] != thtype:
+ err_msg = "Incorrect pool threshold type (got {}, expected {})".format(pool["thtype"], thtype)
+
+ pre_pools.del_by(pool)
+ post_pools.del_by(pool)
+ if pre_pools != post_pools:
+ err_msg = "Other pool setup changed as well"
+ log_test("pool {} of sb {} set verification".format(pool["pool"],
+ pool["sb"]), err_msg)
+
+
+def check_pools(dlname, pools):
+ # Save defaults
+ record_vp = RecordValuePicker(pools)
+
+ # For each pool, set random size and static threshold type
+ do_check_pools(dlname, pools, RandomValuePicker(pools))
+
+ # Restore defaults
+ do_check_pools(dlname, pools, record_vp)
+
+
+class TcBind(CommonItem):
+ varitems = ["pool", "threshold"]
+
+ def __init__(self, port, d):
+ super(TcBind, self).__init__(d)
+ self["dlportname"] = port.name
+
+ def dl_set(self, pool, th):
+ run_cmd("devlink sb tc bind set {} sb {} tc {} type {} pool {} th {}".format(self["dlportname"],
+ self["sb"],
+ self["tc"],
+ self["type"],
+ pool, th))
+
+
+class TcBindList(CommonList):
+ pass
+
+
+def get_tcbinds(ports, verify_existence=False):
+ d = run_json_cmd("devlink sb tc bind show -j -n")
+ tcbinds = TcBindList()
+ for port in ports:
+ err_msg = None
+ if port.name not in d["tc_bind"] or len(d["tc_bind"][port.name]) == 0:
+ err_msg = "No tc bind for port"
+ else:
+ for tcbinddict in d["tc_bind"][port.name]:
+ tcbinds.append(TcBind(port, tcbinddict))
+ if verify_existence:
+ log_test("tc bind existence for port {} verification".format(port.name), err_msg)
+ return tcbinds
+
+
+def do_check_tcbind(ports, tcbinds, vp):
+ for tcbind in tcbinds:
+ pre_tcbinds = get_tcbinds(ports)
+ try:
+ (pool, th) = vp.get_value(tcbind)
+ except SkipTest:
+ continue
+ tcbind.dl_set(pool, th)
+ post_tcbinds = get_tcbinds(ports)
+ tcbind = post_tcbinds.get_by(tcbind)
+
+ err_msg = None
+ if tcbind["pool"] != pool:
+ err_msg = "Incorrect pool (got {}, expected {})".format(tcbind["pool"], pool)
+ if tcbind["threshold"] != th:
+ err_msg = "Incorrect threshold (got {}, expected {})".format(tcbind["threshold"], th)
+
+ pre_tcbinds.del_by(tcbind)
+ post_tcbinds.del_by(tcbind)
+ if pre_tcbinds != post_tcbinds:
+ err_msg = "Other tc bind setup changed as well"
+ log_test("tc bind {}-{} of sb {} set verification".format(tcbind["dlportname"],
+ tcbind["tc"],
+ tcbind["sb"]), err_msg)
+
+
+def check_tcbind(dlname, ports, pools):
+ tcbinds = get_tcbinds(ports, verify_existence=True)
+
+ # Save defaults
+ record_vp = RecordValuePicker(tcbinds)
+
+ # Bind each port and unicast TC (TCs < 8) to a random pool and a random
+ # threshold
+ do_check_tcbind(ports, tcbinds, RandomValuePicker(pools))
+
+ # Restore defaults
+ do_check_tcbind(ports, tcbinds, record_vp)
+
+
+class PortPool(CommonItem):
+ varitems = ["threshold"]
+
+ def __init__(self, port, d):
+ super(PortPool, self).__init__(d)
+ self["dlportname"] = port.name
+
+ def dl_set(self, th):
+ run_cmd("devlink sb port pool set {} sb {} pool {} th {}".format(self["dlportname"],
+ self["sb"],
+ self["pool"], th))
+
+
+class PortPoolList(CommonList):
+ pass
+
+
+def get_portpools(ports, verify_existence=False):
+ d = run_json_cmd("devlink sb port pool -j -n")
+ portpools = PortPoolList()
+ for port in ports:
+ err_msg = None
+ if port.name not in d["port_pool"] or len(d["port_pool"][port.name]) == 0:
+ err_msg = "No port pool for port"
+ else:
+ for portpooldict in d["port_pool"][port.name]:
+ portpools.append(PortPool(port, portpooldict))
+ if verify_existence:
+ log_test("port pool existence for port {} verification".format(port.name), err_msg)
+ return portpools
+
+
+def do_check_portpool(ports, portpools, vp):
+ for portpool in portpools:
+ pre_portpools = get_portpools(ports)
+ (th,) = vp.get_value(portpool)
+ portpool.dl_set(th)
+ post_portpools = get_portpools(ports)
+ portpool = post_portpools.get_by(portpool)
+
+ err_msg = None
+ if portpool["threshold"] != th:
+ err_msg = "Incorrect threshold (got {}, expected {})".format(portpool["threshold"], th)
+
+ pre_portpools.del_by(portpool)
+ post_portpools.del_by(portpool)
+ if pre_portpools != post_portpools:
+ err_msg = "Other port pool setup changed as well"
+ log_test("port pool {}-{} of sb {} set verification".format(portpool["dlportname"],
+ portpool["pool"],
+ portpool["sb"]), err_msg)
+
+
+def check_portpool(dlname, ports, pools):
+ portpools = get_portpools(ports, verify_existence=True)
+
+ # Save defaults
+ record_vp = RecordValuePicker(portpools)
+
+ # For each port pool, set a random threshold
+ do_check_portpool(ports, portpools, RandomValuePicker(pools))
+
+ # Restore defaults
+ do_check_portpool(ports, portpools, record_vp)
+
+
+class Port:
+ def __init__(self, name):
+ self.name = name
+
+
+class PortList(list):
+ pass
+
+
+def get_ports(dlname):
+ d = run_json_cmd("devlink port show -j")
+ ports = PortList()
+ for name in d["port"]:
+ if name.find(dlname) == 0 and d["port"][name]["flavour"] == "physical":
+ ports.append(Port(name))
+ return ports
+
+
+def get_device():
+ devices_info = run_json_cmd("devlink -j dev info")["info"]
+ for d in devices_info:
+ if "mlxsw_spectrum" in devices_info[d]["driver"]:
+ return d
+ return None
+
+
+class UnavailableDevlinkNameException(Exception):
+ pass
+
+
+def test_sb_configuration():
+ # Use static seed
+ random.seed(0)
+
+ dlname = get_device()
+ if not dlname:
+ raise UnavailableDevlinkNameException()
+
+ ports = get_ports(dlname)
+ pools = get_pools(dlname)
+
+ check_pools(dlname, pools)
+ check_tcbind(dlname, ports, pools)
+ check_portpool(dlname, ports, pools)
+
+
+test_sb_configuration()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 7b2acba82a49..fd583a171db7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -8,8 +8,9 @@ source $lib_dir/lib.sh
source $lib_dir/tc_common.sh
source $lib_dir/devlink_lib.sh
-if [ "$DEVLINK_VIDDID" != "15b3:cf6c" ]; then
- echo "SKIP: test is tailored for Mellanox Spectrum-2"
+if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \
+ "$DEVLINK_VIDDID" != "15b3:cf70" ]]; then
+ echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3"
exit 1
fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
index a0795227216e..efd798a85931 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -8,9 +8,9 @@ tc_flower_get_target()
# The driver associates a counter with each tc filter, which means the
# number of supported filters is bounded by the number of available
# counters.
- # Currently, the driver supports 12K (12,288) flow counters and six of
+ # Currently, the driver supports 30K (30,720) flow counters and six of
# these are used for multicast routing.
- local target=12282
+ local target=30714
if ((! should_fail)); then
echo $target
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
new file mode 100755
index 000000000000..20ed98fe5a60
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ default_hw_stats_test
+ immediate_hw_stats_test
+ delayed_hw_stats_test
+ disabled_hw_stats_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+hw_stats_test()
+{
+ RET=0
+
+ local name=$1
+ local action_hw_stats=$2
+ local occ_delta=$3
+ local expected_packet_count=$4
+
+ local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats
+ check_err $? "Failed to add rule with $name hw_stats"
+
+ local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+ local expected_occ=$((orig_occ + occ_delta))
+ [ "$new_occ" == "$expected_occ" ]
+ check_err $? "Expected occupancy of $expected_occ, got $new_occ"
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count
+ check_err $? "Did not match incoming packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "$name hw_stats"
+}
+
+default_hw_stats_test()
+{
+ hw_stats_test "default" "" 2 1
+}
+
+immediate_hw_stats_test()
+{
+ hw_stats_test "immediate" "hw_stats immediate" 2 1
+}
+
+delayed_hw_stats_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed
+ check_fail $? "Unexpected success in adding rule with delayed hw_stats"
+
+ log_test "delayed hw_stats"
+}
+
+disabled_hw_stats_test()
+{
+ hw_stats_test "disabled" "hw_stats disabled" 0 0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ h1mac=$(mac_get $h1)
+ swp1mac=$(mac_get $swp1)
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+check_tc_action_hw_stats_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
new file mode 100755
index 000000000000..68c80d0ec1ec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ shared_block_drop_test
+ egress_redirect_test
+ multi_mirror_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.1/24
+ simple_if_init $swp2 192.0.2.2/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.2/24
+ simple_if_fini $swp1 192.0.2.1/24
+}
+
+shared_block_drop_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have mixed-bound
+ # shared block with a drop rule.
+
+ tc qdisc add dev $swp1 ingress_block 22 clsact
+ check_err $? "Failed to create clsact with ingress block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add drop rule to ingress bound block"
+
+ tc qdisc add dev $swp2 ingress_block 22 clsact
+ check_err $? "Failed to create another clsact with ingress shared block"
+
+ tc qdisc del dev $swp2 clsact
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_fail $? "Incorrect success to add drop rule to mixed bound block"
+
+ tc qdisc del dev $swp1 clsact
+
+ tc qdisc add dev $swp1 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add drop rule to egress bound shared block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ log_test "shared block drop"
+}
+
+egress_redirect_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have mirred redirect on
+ # egress-bound block.
+
+ tc qdisc add dev $swp1 ingress_block 22 clsact
+ check_err $? "Failed to create clsact with ingress block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_err $? "Failed to add redirect rule to ingress bound block"
+
+ tc qdisc add dev $swp2 ingress_block 22 clsact
+ check_err $? "Failed to create another clsact with ingress shared block"
+
+ tc qdisc del dev $swp2 clsact
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_fail $? "Incorrect success to add redirect rule to mixed bound block"
+
+ tc qdisc del dev $swp1 clsact
+
+ tc qdisc add dev $swp1 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_fail $? "Incorrect success to add redirect rule to egress bound shared block"
+
+ tc qdisc del dev $swp2 clsact
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_fail $? "Incorrect success to add redirect rule to egress bound block"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "shared block drop"
+}
+
+multi_mirror_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have multiple mirror
+ # actions in a single rule.
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress mirror dev $swp2
+ check_err $? "Failed to add rule with single mirror action"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress mirror dev $swp2 \
+ action mirred egress mirror dev $swp1
+ check_fail $? "Incorrect success to add rule with two mirror actions"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "multi mirror"
+}
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ vrf_cleanup
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
index a6d733d2a4b4..cc0f07e72cf2 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -2,9 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
# Test for resource limit of offloaded flower rules. The test adds a given
-# number of flower matches for different IPv6 addresses, then generates traffic,
-# and ensures each was hit exactly once. This file contains functions to set up
-# a testing topology and run the test, and is meant to be sourced from a test
+# number of flower matches for different IPv6 addresses, then check the offload
+# indication for all of the tc flower rules. This file contains functions to set
+# up a testing topology and run the test, and is meant to be sourced from a test
# script that calls the testing routine with a given number of rules.
TC_FLOWER_NUM_NETIFS=2
@@ -94,22 +94,15 @@ __tc_flower_test()
tc_flower_rules_create $count $should_fail
- for ((i = 0; i < count; ++i)); do
- $MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \
- -A 2001:db8:2::1 \
- -B $(tc_flower_addr $i)
- done
-
- MISMATCHES=$(
- tc -j -s filter show dev $h2 ingress |
- jq -r '[ .[] | select(.kind == "flower") | .options |
- values as $rule | .actions[].stats.packets |
- select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] |
- join(", ")'
- )
-
- test -z "$MISMATCHES"
- check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES"
+ offload_count=$(tc -j -s filter show dev $h2 ingress |
+ jq -r '[ .[] | select(.kind == "flower") |
+ .options | .in_hw ]' | jq .[] | wc -l)
+ [[ $((offload_count - 1)) -eq $count ]]
+ if [[ $should_fail -eq 0 ]]; then
+ check_err $? "Offload mismatch"
+ else
+ check_err_fail $should_fail $? "Offload more than expacted"
+ fi
}
tc_flower_test()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index 4632f51af7ab..729a86cc4ede 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -9,6 +9,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="sanitization_test offload_indication_test \
sanitization_vlan_aware_test offload_indication_vlan_aware_test"
NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
source $lib_dir/lib.sh
setup_prepare()
@@ -470,8 +471,8 @@ offload_indication_fdb_flood_test()
bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2
- bridge fdb show brport vxlan0 | grep 00:00:00:00:00:00 \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \
+ bridge fdb show brport vxlan0
check_err $?
bridge fdb del 00:00:00:00:00:00 dev vxlan0 self
@@ -486,11 +487,11 @@ offload_indication_fdb_bridge_test()
bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \
dst 198.51.100.2
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
check_err $?
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
check_err $?
log_test "vxlan entry offload indication - initial state"
@@ -500,9 +501,9 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+ check_err $?
log_test "vxlan entry offload indication - after removal from bridge"
@@ -511,11 +512,11 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
check_err $?
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
check_err $?
log_test "vxlan entry offload indication - after re-add to bridge"
@@ -525,9 +526,9 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+ check_err $?
log_test "vxlan entry offload indication - after removal from vxlan"
@@ -536,11 +537,11 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
check_err $?
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
check_err $?
log_test "vxlan entry offload indication - after re-add to vxlan"
@@ -558,27 +559,32 @@ offload_indication_decap_route_test()
{
RET=0
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link set dev vxlan0 down
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link set dev vxlan1 down
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
log_test "vxlan decap route - vxlan device down"
RET=0
ip link set dev vxlan1 up
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link set dev vxlan0 up
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
log_test "vxlan decap route - vxlan device up"
@@ -586,11 +592,13 @@ offload_indication_decap_route_test()
RET=0
ip address delete 198.51.100.1/32 dev lo
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
ip address add 198.51.100.1/32 dev lo
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
log_test "vxlan decap route - add local route"
@@ -598,16 +606,19 @@ offload_indication_decap_route_test()
RET=0
ip link set dev $swp1 nomaster
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link set dev $swp2 nomaster
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
log_test "vxlan decap route - local ports enslavement"
@@ -615,12 +626,14 @@ offload_indication_decap_route_test()
RET=0
ip link del dev br0
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link del dev br1
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
log_test "vxlan decap route - bridge device deletion"
@@ -632,16 +645,19 @@ offload_indication_decap_route_test()
ip link set dev $swp2 master br1
ip link set dev vxlan0 master br0
ip link set dev vxlan1 master br1
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link del dev vxlan0
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link del dev vxlan1
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
log_test "vxlan decap route - vxlan device deletion"
@@ -656,12 +672,15 @@ check_fdb_offloaded()
local mac=00:11:22:33:44:55
local zmac=00:00:00:00:00:00
- bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac self \
+ bridge fdb show dev vxlan0
check_err $?
- bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac master \
+ bridge fdb show dev vxlan0
check_err $?
- bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show dev vxlan0
check_err $?
}
@@ -672,13 +691,15 @@ check_vxlan_fdb_not_offloaded()
bridge fdb show dev vxlan0 | grep $mac | grep -q self
check_err $?
- bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac self \
+ bridge fdb show dev vxlan0
+ check_err $?
bridge fdb show dev vxlan0 | grep $zmac | grep -q self
check_err $?
- bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show dev vxlan0
+ check_err $?
}
check_bridge_fdb_not_offloaded()
@@ -688,8 +709,9 @@ check_bridge_fdb_not_offloaded()
bridge fdb show dev vxlan0 | grep $mac | grep -q master
check_err $?
- bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac master \
+ bridge fdb show dev vxlan0
+ check_err $?
}
__offload_indication_join_vxlan_first()
@@ -771,12 +793,14 @@ __offload_indication_join_vxlan_last()
ip link set dev $swp1 master br0
- bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show dev vxlan0
+ check_err $?
ip link set dev vxlan0 master br0
- bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show dev vxlan0
check_err $?
log_test "offload indication - attach vxlan last"
@@ -854,20 +878,26 @@ sanitization_vlan_aware_test()
bridge vlan del vid 10 dev vxlan20
bridge vlan add vid 20 dev vxlan20 pvid untagged
- # Test that offloading of an unsupported tunnel fails when it is
- # triggered by addition of VLAN to a local port
- RET=0
+ # Test that when two VXLAN tunnels with conflicting configurations
+ # (i.e., different TTL) are enslaved to the same VLAN-aware bridge,
+ # then the enslavement of a port to the bridge is denied.
- # TOS must be set to inherit
- ip link set dev vxlan10 type vxlan tos 42
+ # Use the offload indication of the local route to ensure the VXLAN
+ # configuration was correctly rollbacked.
+ ip address add 198.51.100.1/32 dev lo
- ip link set dev $swp1 master br0
- bridge vlan add vid 10 dev $swp1 &> /dev/null
+ ip link set dev vxlan10 type vxlan ttl 10
+ ip link set dev $swp1 master br0 &> /dev/null
check_fail $?
- log_test "vlan-aware - failed vlan addition to a local port"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
+
+ log_test "vlan-aware - failed enslavement to bridge due to conflict"
- ip link set dev vxlan10 type vxlan tos inherit
+ ip link set dev vxlan10 type vxlan ttl 20
+ ip address del 198.51.100.1/32 dev lo
ip link del dev vxlan20
ip link del dev vxlan10
@@ -924,11 +954,11 @@ offload_indication_vlan_aware_fdb_test()
bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \
dst 198.51.100.2 vlan 10
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
check_err $?
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
check_err $?
log_test "vxlan entry offload indication - initial state"
@@ -938,9 +968,9 @@ offload_indication_vlan_aware_fdb_test()
RET=0
bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+ check_err $?
log_test "vxlan entry offload indication - after removal from bridge"
@@ -949,11 +979,11 @@ offload_indication_vlan_aware_fdb_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
check_err $?
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
check_err $?
log_test "vxlan entry offload indication - after re-add to bridge"
@@ -963,9 +993,9 @@ offload_indication_vlan_aware_fdb_test()
RET=0
bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+ check_err $?
log_test "vxlan entry offload indication - after removal from vxlan"
@@ -974,11 +1004,11 @@ offload_indication_vlan_aware_fdb_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
check_err $?
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
check_err $?
log_test "vxlan entry offload indication - after re-add to vxlan"
@@ -990,28 +1020,31 @@ offload_indication_vlan_aware_decap_route_test()
{
RET=0
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
# Toggle PVID flag on one VxLAN device and make sure route is still
# marked as offloaded
bridge vlan add vid 10 dev vxlan10 untagged
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
# Toggle PVID flag on second VxLAN device and make sure route is no
# longer marked as offloaded
bridge vlan add vid 20 dev vxlan20 untagged
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
# Toggle PVID flag back and make sure route is marked as offloaded
bridge vlan add vid 10 dev vxlan10 pvid untagged
bridge vlan add vid 20 dev vxlan20 pvid untagged
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1
check_err $?
log_test "vxlan decap route - vni map/unmap"
@@ -1064,35 +1097,33 @@ offload_indication_vlan_aware_l3vni_test()
ip link set dev vxlan0 master br0
bridge vlan add dev vxlan0 vid 10 pvid untagged
- # No local port or router port is member in the VLAN, so tunnel should
- # not be offloaded
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
- check_fail $? "vxlan tunnel offloaded when should not"
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
+ check_err $? "vxlan tunnel not offloaded when should"
# Configure a VLAN interface and make sure tunnel is offloaded
ip link add link br0 name br10 up type vlan id 10
sysctl_set net.ipv6.conf.br10.disable_ipv6 0
ip -6 address add 2001:db8:1::1/64 dev br10
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
check_err $? "vxlan tunnel not offloaded when should"
# Unlink the VXLAN device, make sure tunnel is no longer offloaded,
# then add it back to the bridge and make sure it is offloaded
ip link set dev vxlan0 nomaster
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
- check_fail $? "vxlan tunnel offloaded after unlinked from bridge"
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
+ check_err $? "vxlan tunnel offloaded after unlinked from bridge"
ip link set dev vxlan0 master br0
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
- check_fail $? "vxlan tunnel offloaded despite no matching vid"
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
+ check_err $? "vxlan tunnel offloaded despite no matching vid"
bridge vlan add dev vxlan0 vid 10 pvid untagged
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
check_err $? "vxlan tunnel not offloaded after adding vid"
log_test "vxlan - l3 vni"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 025a84c2ab5a..9f9741444549 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -141,6 +141,16 @@ regions_test()
check_region_snapshot_count dummy post-first-delete 2
+ devlink region new $DL_HANDLE/dummy snapshot 25
+ check_err $? "Failed to create a new snapshot with id 25"
+
+ check_region_snapshot_count dummy post-first-request 3
+
+ devlink region del $DL_HANDLE/dummy snapshot 25
+ check_err $? "Failed to delete snapshot with id 25"
+
+ check_region_snapshot_count dummy post-second-delete 2
+
log_test "regions test"
}
@@ -367,6 +377,11 @@ dummy_reporter_test()
{
RET=0
+ check_reporter_info dummy healthy 0 0 0 true
+
+ devlink health set $DL_HANDLE reporter dummy auto_recover false
+ check_err $? "Failed to dummy reporter auto_recover option"
+
check_reporter_info dummy healthy 0 0 0 false
local BREAK_MSG="foo bar"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index f101ab9441e2..dbd1e014ba17 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -16,6 +16,8 @@ ALL_TESTS="
trap_group_action_test
bad_trap_group_test
trap_group_stats_test
+ trap_policer_test
+ trap_policer_bind_test
port_del_test
dev_del_test
"
@@ -23,6 +25,7 @@ NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
DEVLINK_DEV=netdevsim/${DEV}
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
SLEEP_TIME=1
NETDEV=""
NUM_NETIFS=0
@@ -103,6 +106,11 @@ trap_metadata_test()
for trap_name in $(devlink_traps_get); do
devlink_trap_metadata_test $trap_name "input_port"
check_err $? "Input port not reported as metadata of trap $trap_name"
+ if [ $trap_name == "ingress_flow_action_drop" ] ||
+ [ $trap_name == "egress_flow_action_drop" ]; then
+ devlink_trap_metadata_test $trap_name "flow_action_cookie"
+ check_err $? "Flow action cookie not reported as metadata of trap $trap_name"
+ fi
done
log_test "Trap metadata"
@@ -251,6 +259,119 @@ trap_group_stats_test()
log_test "Trap group statistics"
}
+trap_policer_test()
+{
+ local packets_t0
+ local packets_t1
+
+ if [ $(devlink_trap_policers_num_get) -eq 0 ]; then
+ check_err 1 "Failed to dump policers"
+ fi
+
+ devlink trap policer set $DEVLINK_DEV policer 1337 &> /dev/null
+ check_fail $? "Did not get an error for setting a non-existing policer"
+ devlink trap policer show $DEVLINK_DEV policer 1337 &> /dev/null
+ check_fail $? "Did not get an error for getting a non-existing policer"
+
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 2000 burst 16
+ check_err $? "Failed to set valid parameters for a valid policer"
+ if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then
+ check_err 1 "Policer rate was not changed"
+ fi
+ if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then
+ check_err 1 "Policer burst size was not changed"
+ fi
+
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null
+ check_fail $? "Policer rate was changed to rate lower than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 9000 &> /dev/null
+ check_fail $? "Policer rate was changed to rate higher than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 2 &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size lower than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 65537 &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size higher than limit"
+ echo "y" > $DEBUGFS_DIR/fail_trap_policer_set
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 3000 &> /dev/null
+ check_fail $? "Managed to set policer rate when should not"
+ echo "n" > $DEBUGFS_DIR/fail_trap_policer_set
+ if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then
+ check_err 1 "Policer rate was changed to an invalid value"
+ fi
+ if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then
+ check_err 1 "Policer burst size was changed to an invalid value"
+ fi
+
+ packets_t0=$(devlink_trap_policer_rx_dropped_get 1)
+ sleep .5
+ packets_t1=$(devlink_trap_policer_rx_dropped_get 1)
+ if [ ! $packets_t1 -gt $packets_t0 ]; then
+ check_err 1 "Policer drop counter was not incremented"
+ fi
+
+ echo "y"> $DEBUGFS_DIR/fail_trap_policer_counter_get
+ devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null
+ check_fail $? "Managed to read policer drop counter when should not"
+ echo "n"> $DEBUGFS_DIR/fail_trap_policer_counter_get
+ devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null
+ check_err $? "Did not manage to read policer drop counter when should"
+
+ log_test "Trap policer"
+}
+
+trap_group_check_policer()
+{
+ local group_name=$1; shift
+
+ devlink -j -p trap group show $DEVLINK_DEV group $group_name \
+ | jq -e '.[][][]["policer"]' &> /dev/null
+}
+
+trap_policer_bind_test()
+{
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+ check_err $? "Failed to bind a valid policer"
+ if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then
+ check_err 1 "Bound policer was not changed"
+ fi
+
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 1337 \
+ &> /dev/null
+ check_fail $? "Did not get an error for binding a non-existing policer"
+ if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then
+ check_err 1 "Bound policer was changed when should not"
+ fi
+
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 0
+ check_err $? "Failed to unbind a policer when using ID 0"
+ trap_group_check_policer "l2_drops"
+ check_fail $? "Trap group has a policer after unbinding with ID 0"
+
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+ check_err $? "Failed to bind a valid policer"
+
+ devlink trap group set $DEVLINK_DEV group l2_drops nopolicer
+ check_err $? "Failed to unbind a policer when using 'nopolicer' keyword"
+ trap_group_check_policer "l2_drops"
+ check_fail $? "Trap group has a policer after unbinding with 'nopolicer' keyword"
+
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+ check_err $? "Failed to bind a valid policer"
+
+ echo "y"> $DEBUGFS_DIR/fail_trap_group_set
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 2 \
+ &> /dev/null
+ check_fail $? "Managed to bind a policer when should not"
+ echo "n"> $DEBUGFS_DIR/fail_trap_group_set
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 2
+ check_err $? "Did not manage to bind a policer when should"
+
+ devlink trap group set $DEVLINK_DEV group l2_drops action drop \
+ policer 1337 &> /dev/null
+ check_fail $? "Did not get an error for partially modified trap group"
+
+ log_test "Trap policer binding"
+}
+
port_del_test()
{
local group_name
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index ecc52d4c034d..997c65dcad68 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -23,3 +23,8 @@ so_txtime
tcp_fastopen_backup_key
nettest
fin_ack_lat
+reuseaddr_ports_exhausted
+hwtstamp_config
+rxtimestamp
+timestamping
+txtimestamp
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 4c1bd03ffa1c..3f386eb9e7d7 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -14,6 +14,8 @@ TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
TEST_PROGS += route_localnet.sh
+TEST_PROGS += reuseaddr_ports_exhausted.sh
+TEST_PROGS += txtimestamp.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -22,6 +24,8 @@ TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
+TEST_GEN_FILES += reuseaddr_ports_exhausted
+TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index b8503a8119b0..3b42c06b5985 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -12,6 +12,7 @@ CONFIG_IPV6_VTI=y
CONFIG_DUMMY=y
CONFIG_BRIDGE=y
CONFIG_VLAN_8021Q=y
+CONFIG_IFB=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
@@ -27,5 +28,6 @@ CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_NETEM=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_KALLSYMS=y
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 40b076983239..155d48bd4d9e 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -35,6 +35,12 @@ if [ $? -ne 0 ]; then
exit 1
fi
+devlink dev help 2>&1 | grep info &> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing devlink dev info support"
+ exit 1
+fi
+
##############################################################################
# Devlink helpers
@@ -373,6 +379,7 @@ devlink_trap_drop_test()
local trap_name=$1; shift
local group_name=$1; shift
local dev=$1; shift
+ local handle=$1; shift
# This is the common part of all the tests. It checks that stats are
# initially idle, then non-idle after changing the trap action and
@@ -397,7 +404,7 @@ devlink_trap_drop_test()
devlink_trap_group_stats_idle_test $group_name
check_err $? "Trap group stats not idle after setting action to drop"
- tc_check_packets "dev $dev egress" 101 0
+ tc_check_packets "dev $dev egress" $handle 0
check_err $? "Packets were not dropped"
}
@@ -406,7 +413,68 @@ devlink_trap_drop_cleanup()
local mz_pid=$1; shift
local dev=$1; shift
local proto=$1; shift
+ local pref=$1; shift
+ local handle=$1; shift
kill $mz_pid && wait $mz_pid &> /dev/null
- tc filter del dev $dev egress protocol $proto pref 1 handle 101 flower
+ tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
+}
+
+devlink_trap_policers_num_get()
+{
+ devlink -j -p trap policer show | jq '.[]["'$DEVLINK_DEV'"] | length'
+}
+
+devlink_trap_policer_rate_get()
+{
+ local policer_id=$1; shift
+
+ devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \
+ | jq '.[][][]["rate"]'
+}
+
+devlink_trap_policer_burst_get()
+{
+ local policer_id=$1; shift
+
+ devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \
+ | jq '.[][][]["burst"]'
+}
+
+devlink_trap_policer_rx_dropped_get()
+{
+ local policer_id=$1; shift
+
+ devlink -j -p -s trap policer show $DEVLINK_DEV policer $policer_id \
+ | jq '.[][][]["stats"]["rx"]["dropped"]'
+}
+
+devlink_trap_group_policer_get()
+{
+ local group_name=$1; shift
+
+ devlink -j -p trap group show $DEVLINK_DEV group $group_name \
+ | jq '.[][][]["policer"]'
+}
+
+devlink_trap_policer_ids_get()
+{
+ devlink -j -p trap policer show \
+ | jq '.[]["'$DEVLINK_DEV'"][]["policer"]'
+}
+
+devlink_port_by_netdev()
+{
+ local if_name=$1
+
+ devlink -j port show $if_name | jq -e '.[] | keys' | jq -r '.[]'
+}
+
+devlink_cpu_port_get()
+{
+ local cpu_dl_port_num=$(devlink port list | grep "$DEVLINK_DEV" |
+ grep cpu | cut -d/ -f3 | cut -d: -f1 |
+ sed -n '1p')
+
+ echo "$DEVLINK_DEV/$cpu_dl_port_num"
}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 2f5da414aaa7..977fc2b326a2 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -60,6 +60,15 @@ check_tc_chain_support()
fi
}
+check_tc_action_hw_stats_support()
+{
+ tc actions help 2>&1 | grep -q hw_stats
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
+ exit 1
+ fi
+}
+
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
exit 0
@@ -248,13 +257,40 @@ busywait()
done
}
+not()
+{
+ "$@"
+ [[ $? != 0 ]]
+}
+
+grep_bridge_fdb()
+{
+ local addr=$1; shift
+ local word
+ local flag
+
+ if [ "$1" == "self" ] || [ "$1" == "master" ]; then
+ word=$1; shift
+ if [ "$1" == "-v" ]; then
+ flag=$1; shift
+ fi
+ fi
+
+ $@ | grep $addr | grep $flag "$word"
+}
+
+wait_for_offload()
+{
+ "$@" | grep -q offload
+}
+
until_counter_is()
{
- local value=$1; shift
+ local expr=$1; shift
local current=$("$@")
echo $((current))
- ((current >= value))
+ ((current $expr))
}
busywait_for_counter()
@@ -263,7 +299,7 @@ busywait_for_counter()
local delta=$1; shift
local base=$("$@")
- busywait "$timeout" until_counter_is $((base + delta)) "$@"
+ busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
}
setup_wait_dev()
@@ -599,6 +635,17 @@ tc_rule_stats_get()
| jq ".[1].options.actions[].stats$selector"
}
+tc_rule_handle_stats_get()
+{
+ local id=$1; shift
+ local handle=$1; shift
+ local selector=${1:-.packets}; shift
+
+ tc -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats$selector"
+}
+
ethtool_stats_get()
{
local dev=$1; shift
@@ -607,6 +654,26 @@ ethtool_stats_get()
ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
}
+qdisc_stats_get()
+{
+ local dev=$1; shift
+ local handle=$1; shift
+ local selector=$1; shift
+
+ tc -j -s qdisc show dev "$dev" \
+ | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
+}
+
+qdisc_parent_stats_get()
+{
+ local dev=$1; shift
+ local parent=$1; shift
+ local selector=$1; shift
+
+ tc -j -s qdisc show dev "$dev" invisible \
+ | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
+}
+
humanize()
{
local speed=$1; shift
@@ -1132,18 +1199,29 @@ flood_test()
flood_multicast_test $br_port $host1_if $host2_if
}
-start_traffic()
+__start_traffic()
{
+ local proto=$1; shift
local h_in=$1; shift # Where the traffic egresses the host
local sip=$1; shift
local dip=$1; shift
local dmac=$1; shift
$MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
- -a own -b $dmac -t udp -q &
+ -a own -b $dmac -t "$proto" -q "$@" &
sleep 1
}
+start_traffic()
+{
+ __start_traffic udp "$@"
+}
+
+start_tcp_traffic()
+{
+ __start_traffic tcp "$@"
+}
+
stop_traffic()
{
# Suppress noise from killing mausezahn.
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
new file mode 100755
index 000000000000..b50081855913
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -0,0 +1,238 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by a pedit action. An ingress
+# filter installed on $h2 verifies that the packet looks like expected.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_ip_dsfield
+ test_ip_dscp
+ test_ip_ecn
+ test_ip_dscp_ecn
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+do_test_pedit_dsfield_common()
+{
+ local pedit_locus=$1; shift
+ local pedit_action=$1; shift
+ local mz_flags=$1; shift
+
+ RET=0
+
+ # TOS 125: DSCP 31, ECN 1. Used for testing that the relevant part is
+ # overwritten when zero is selected.
+ $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \
+ -a own -b $h2mac -q -t tcp tos=0x7d,sp=54321,dp=12345
+
+ local pkts
+ pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+ tc_rule_handle_stats_get "dev $h2 ingress" 101)
+ check_err $? "Expected to get 10 packets, but got $pkts."
+ log_test "$pedit_locus pedit $pedit_action"
+}
+
+do_test_pedit_dsfield()
+{
+ local pedit_locus=$1; shift
+ local pedit_action=$1; shift
+ local match_prot=$1; shift
+ local match_flower=$1; shift
+ local mz_flags=$1; shift
+ local saddr=$1; shift
+ local daddr=$1; shift
+
+ tc filter add $pedit_locus handle 101 pref 1 \
+ flower action pedit ex munge $pedit_action
+ tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+ flower skip_hw $match_flower action pass
+
+ do_test_pedit_dsfield_common "$pedit_locus" "$pedit_action" "$mz_flags"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $pedit_locus pref 1
+}
+
+do_test_ip_dsfield()
+{
+ local locus=$1; shift
+ local dsfield
+
+ for dsfield in 0 1 2 3 128 252 253 254 255; do
+ do_test_pedit_dsfield "$locus" \
+ "ip dsfield set $dsfield" \
+ ip "ip_tos $dsfield" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_ip_dsfield()
+{
+ do_test_ip_dsfield "dev $swp1 ingress"
+ do_test_ip_dsfield "dev $swp2 egress"
+}
+
+do_test_ip_dscp()
+{
+ local locus=$1; shift
+ local dscp
+
+ for dscp in 0 1 2 3 32 61 62 63; do
+ do_test_pedit_dsfield "$locus" \
+ "ip dsfield set $((dscp << 2)) retain 0xfc" \
+ ip "ip_tos $(((dscp << 2) | 1))" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_ip_dscp()
+{
+ do_test_ip_dscp "dev $swp1 ingress"
+ do_test_ip_dscp "dev $swp2 egress"
+}
+
+do_test_ip_ecn()
+{
+ local locus=$1; shift
+ local ecn
+
+ for ecn in 0 1 2 3; do
+ do_test_pedit_dsfield "$locus" \
+ "ip dsfield set $ecn retain 0x03" \
+ ip "ip_tos $((124 | $ecn))" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_ip_ecn()
+{
+ do_test_ip_ecn "dev $swp1 ingress"
+ do_test_ip_ecn "dev $swp2 egress"
+}
+
+do_test_ip_dscp_ecn()
+{
+ local locus=$1; shift
+
+ tc filter add $locus handle 101 pref 1 \
+ flower action pedit ex munge ip dsfield set 124 retain 0xfc \
+ action pedit ex munge ip dsfield set 1 retain 0x03
+ tc filter add dev $h2 ingress handle 101 pref 1 prot ip \
+ flower skip_hw ip_tos 125 action pass
+
+ do_test_pedit_dsfield_common "$locus" "set DSCP + set ECN" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $locus pref 1
+}
+
+test_ip_dscp_ecn()
+{
+ do_test_ip_dscp_ecn "dev $swp1 ingress"
+ do_test_ip_dscp_ecn "dev $swp2 egress"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
index 40e0ad1bc4f2..e60c8b4818cc 100755
--- a/tools/testing/selftests/net/forwarding/sch_ets.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -34,11 +34,14 @@ switch_destroy()
}
# Callback from sch_ets_tests.sh
-get_stats()
+collect_stats()
{
- local stream=$1; shift
+ local -a streams=("$@")
+ local stream
- link_stats_get $h2.1$stream rx bytes
+ for stream in ${streams[@]}; do
+ qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes
+ done
}
ets_run
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index 3c3b204d47e8..cdf689e99458 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -2,7 +2,7 @@
# Global interface:
# $put -- port under test (e.g. $swp2)
-# get_stats($band) -- A function to collect stats for band
+# collect_stats($streams...) -- A function to get stats for individual streams
# ets_start_traffic($band) -- Start traffic for this band
# ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc
@@ -94,15 +94,11 @@ __ets_dwrr_test()
sleep 10
- t0=($(for stream in ${streams[@]}; do
- get_stats $stream
- done))
+ t0=($(collect_stats "${streams[@]}"))
sleep 10
- t1=($(for stream in ${streams[@]}; do
- get_stats $stream
- done))
+ t1=($(collect_stats "${streams[@]}"))
d=($(for ((i = 0; i < ${#streams[@]}; i++)); do
echo $((${t1[$i]} - ${t0[$i]}))
done))
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
new file mode 100755
index 000000000000..e3bd8a6bb8b4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by an action skbedit priority. The
+# new priority should be taken into account when classifying traffic on the PRIO
+# qdisc at $swp2. The test verifies that for different priority values, the
+# traffic ends up in expected PRIO band.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | | PRIO | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_ingress
+ test_egress
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+ tc qdisc add dev $swp2 root handle 10: \
+ prio bands 8 priomap 7 6 5 4 3 2 1 0
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 root
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+test_skbedit_priority_one()
+{
+ local locus=$1; shift
+ local prio=$1; shift
+ local classid=$1; shift
+
+ RET=0
+
+ tc filter add $locus handle 101 pref 1 \
+ flower action skbedit priority $prio
+
+ local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets)
+ local pkt2=$(tc_rule_handle_stats_get "$locus" 101)
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \
+ -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q
+
+ local pkt1
+ pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \
+ qdisc_parent_stats_get $swp2 $classid .packets)
+ check_err $? "Expected to get 10 packets on class $classid, but got $((pkt1 - pkt0))."
+
+ local pkt3=$(tc_rule_handle_stats_get "$locus" 101)
+ ((pkt3 >= pkt2 + 10))
+ check_err $? "Expected to get 10 packets on skbedit rule but got $((pkt3 - pkt2))."
+
+ log_test "$locus skbedit priority $prio -> classid $classid"
+
+ tc filter del $locus pref 1
+}
+
+test_ingress()
+{
+ local prio
+
+ for prio in {0..7}; do
+ test_skbedit_priority_one "dev $swp1 ingress" \
+ $prio 10:$((8 - prio))
+ done
+}
+
+test_egress()
+{
+ local prio
+
+ for prio in {0..7}; do
+ test_skbedit_priority_one "dev $swp2 egress" \
+ $prio 10:$((8 - prio))
+ done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index 64f652633585..0e18e8be6e2a 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -6,39 +6,14 @@ CHECK_TC="yes"
# Can be overridden by the configuration file. See lib.sh
TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms
-__tc_check_packets()
-{
- local id=$1
- local handle=$2
- local count=$3
- local operator=$4
-
- start_time="$(date -u +%s%3N)"
- while true
- do
- cmd_jq "tc -j -s filter show $id" \
- ".[] | select(.options.handle == $handle) | \
- select(.options.actions[0].stats.packets $operator $count)" \
- &> /dev/null
- ret=$?
- if [[ $ret -eq 0 ]]; then
- return $ret
- fi
- current_time="$(date -u +%s%3N)"
- diff=$(expr $current_time - $start_time)
- if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then
- return 1
- fi
- done
-}
-
tc_check_packets()
{
local id=$1
local handle=$2
local count=$3
- __tc_check_packets "$id" "$handle" "$count" "=="
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "== $count" \
+ tc_rule_handle_stats_get "$id" "$handle" > /dev/null
}
tc_check_packets_hitting()
@@ -46,5 +21,6 @@ tc_check_packets_hitting()
local id=$1
local handle=$2
- __tc_check_packets "$id" "$handle" 0 ">"
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "> 0" \
+ tc_rule_handle_stats_get "$id" "$handle" > /dev/null
}
diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c
index e1fdee841021..e1fdee841021 100644
--- a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
+++ b/tools/testing/selftests/net/hwtstamp_config.c
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index d72f07642738..ea13b255a99d 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,2 +1,3 @@
mptcp_connect
+pm_nl_ctl
*.pcap
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index ba450e62dc5b..f50976ee7d44 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -1,12 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
top_srcdir = ../../../../..
+KSFT_KHDR_INSTALL := 1
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
-TEST_PROGS := mptcp_connect.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh
-TEST_GEN_FILES = mptcp_connect
+TEST_GEN_FILES = mptcp_connect pm_nl_ctl
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 99579c0223c1..cedee5b952ba 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -34,8 +34,8 @@ extern int optind;
#define TCP_ULP 31
#endif
+static int poll_timeout = 10 * 1000;
static bool listen_mode;
-static int poll_timeout;
enum cfg_mode {
CFG_MODE_POLL,
@@ -50,11 +50,21 @@ static int cfg_sock_proto = IPPROTO_MPTCP;
static bool tcpulp_audit;
static int pf = AF_INET;
static int cfg_sndbuf;
+static int cfg_rcvbuf;
+static bool cfg_join;
static void die_usage(void)
{
- fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]"
- "[ -l ] [ -t timeout ] connect_address\n");
+ fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
+ "[-l] connect_address\n");
+ fprintf(stderr, "\t-6 use ipv6\n");
+ fprintf(stderr, "\t-t num -- set poll timeout to num\n");
+ fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+ fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
+ fprintf(stderr, "\t-p num -- use port num\n");
+ fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
+ fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
+ fprintf(stderr, "\t-u -- check mptcp ulp\n");
exit(1);
}
@@ -97,6 +107,17 @@ static void xgetaddrinfo(const char *node, const char *service,
}
}
+static void set_rcvbuf(int fd, unsigned int size)
+{
+ int err;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size));
+ if (err) {
+ perror("set SO_RCVBUF");
+ exit(1);
+ }
+}
+
static void set_sndbuf(int fd, unsigned int size)
{
int err;
@@ -230,6 +251,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
static size_t do_rnd_write(const int fd, char *buf, const size_t len)
{
+ static bool first = true;
unsigned int do_w;
ssize_t bw;
@@ -237,10 +259,19 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
if (do_w == 0 || do_w > len)
do_w = len;
+ if (cfg_join && first && do_w > 100)
+ do_w = 100;
+
bw = write(fd, buf, do_w);
if (bw < 0)
perror("write");
+ /* let the join handshake complete, before going on */
+ if (cfg_join && first) {
+ usleep(200000);
+ first = false;
+ }
+
return bw;
}
@@ -365,8 +396,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
break;
/* ... but we still receive.
- * Close our write side.
+ * Close our write side, ev. give some time
+ * for address notification
*/
+ if (cfg_join)
+ usleep(400000);
shutdown(peerfd, SHUT_WR);
} else {
if (errno == EINTR)
@@ -383,6 +417,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
}
+ /* leave some time for late join/announce */
+ if (cfg_join)
+ usleep(400000);
+
close(peerfd);
return 0;
}
@@ -638,7 +676,7 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
- if (r & 1)
+ if (!cfg_join && (r & 1))
close(fd);
}
@@ -704,6 +742,8 @@ int main_loop(void)
check_getpeername_connect(fd);
+ if (cfg_rcvbuf)
+ set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
@@ -745,7 +785,7 @@ int parse_mode(const char *mode)
return 0;
}
-int parse_sndbuf(const char *size)
+static int parse_int(const char *size)
{
unsigned long s;
@@ -765,17 +805,19 @@ int parse_sndbuf(const char *size)
die_usage();
}
- cfg_sndbuf = s;
-
- return 0;
+ return (int)s;
}
static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) {
+ while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) {
switch (c) {
+ case 'j':
+ cfg_join = true;
+ cfg_mode = CFG_MODE_POLL;
+ break;
case 'l':
listen_mode = true;
break;
@@ -802,8 +844,11 @@ static void parse_opts(int argc, char **argv)
case 'm':
cfg_mode = parse_mode(optarg);
break;
- case 'b':
- cfg_sndbuf = parse_sndbuf(optarg);
+ case 'S':
+ cfg_sndbuf = parse_int(optarg);
+ break;
+ case 'R':
+ cfg_rcvbuf = parse_int(optarg);
break;
}
}
@@ -831,6 +876,8 @@ int main(int argc, char *argv[])
if (fd < 0)
return 1;
+ if (cfg_rcvbuf)
+ set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index d573a0feb98d..acf02e156d20 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="b:d:e:l:r:h4cm:"
+optstring="S:R:d:e:l:r:h4cm:"
ret=0
sin=""
sout=""
@@ -19,6 +19,7 @@ tc_loss=$((RANDOM%101))
tc_reorder=""
testmode=""
sndbuf=0
+rcvbuf=0
options_log=true
if [ $tc_loss -eq 100 ];then
@@ -39,7 +40,8 @@ usage() {
echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
- echo -e "\t-b: set sndbuf value (default: use kernel default)"
+ echo -e "\t-S: set sndbuf value (default: use kernel default)"
+ echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
}
@@ -73,11 +75,19 @@ while getopts "$optstring" option;do
"c")
capture=true
;;
- "b")
+ "S")
if [ $OPTARG -ge 0 ];then
sndbuf="$OPTARG"
else
- echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2
+ echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
+ exit 1
+ fi
+ ;;
+ "R")
+ if [ $OPTARG -ge 0 ];then
+ rcvbuf="$OPTARG"
+ else
+ echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
exit 1
fi
;;
@@ -342,8 +352,12 @@ do_transfer()
port=$((10000+$TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))
+ if [ "$rcvbuf" -gt 0 ]; then
+ extra_args="$extra_args -R $rcvbuf"
+ fi
+
if [ "$sndbuf" -gt 0 ]; then
- extra_args="$extra_args -b $sndbuf"
+ extra_args="$extra_args -S $sndbuf"
fi
if [ -n "$testmode" ]; then
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
new file mode 100755
index 000000000000..dd42c2f692d0
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -0,0 +1,357 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ret=0
+sin=""
+sout=""
+cin=""
+cout=""
+ksft_skip=4
+timeout=30
+capture=0
+
+TEST_COUNT=0
+
+init()
+{
+ capout=$(mktemp)
+
+ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+
+ ns1="ns1-$rndh"
+ ns2="ns2-$rndh"
+
+ for netns in "$ns1" "$ns2";do
+ ip netns add $netns || exit $ksft_skip
+ ip -net $netns link set lo up
+ ip netns exec $netns sysctl -q net.mptcp.enabled=1
+ ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
+ ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+ done
+
+ # ns1 ns2
+ # ns1eth1 ns2eth1
+ # ns1eth2 ns2eth2
+ # ns1eth3 ns2eth3
+ # ns1eth4 ns2eth4
+
+ for i in `seq 1 4`; do
+ ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
+ ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
+ ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
+ ip -net "$ns1" link set ns1eth$i up
+
+ ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
+ ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
+ ip -net "$ns2" link set ns2eth$i up
+
+ # let $ns2 reach any $ns1 address from any interface
+ ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+ done
+}
+
+cleanup_partial()
+{
+ rm -f "$capout"
+
+ for netns in "$ns1" "$ns2"; do
+ ip netns del $netns
+ done
+}
+
+cleanup()
+{
+ rm -f "$cin" "$cout"
+ rm -f "$sin" "$sout"
+ cleanup_partial
+}
+
+reset()
+{
+ cleanup_partial
+ init
+}
+
+for arg in "$@"; do
+ if [ "$arg" = "-c" ]; then
+ capture=1
+ fi
+done
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+
+check_transfer()
+{
+ in=$1
+ out=$2
+ what=$3
+
+ cmp "$in" "$out" > /dev/null 2>&1
+ if [ $? -ne 0 ] ;then
+ echo "[ FAIL ] $what does not match (in, out):"
+ print_file_err "$in"
+ print_file_err "$out"
+
+ return 1
+ fi
+
+ return 0
+}
+
+do_ping()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ connect_addr="$3"
+
+ ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null
+ if [ $? -ne 0 ] ; then
+ echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
+ ret=1
+ fi
+}
+
+do_transfer()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ cl_proto="$3"
+ srv_proto="$4"
+ connect_addr="$5"
+
+ port=$((10000+$TEST_COUNT))
+ TEST_COUNT=$((TEST_COUNT+1))
+
+ :> "$cout"
+ :> "$sout"
+ :> "$capout"
+
+ if [ $capture -eq 1 ]; then
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ capfile="mp_join-${listener_ns}.pcap"
+
+ echo "Capturing traffic for test $TEST_COUNT into $capfile"
+ ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+ cappid=$!
+
+ sleep 1
+ fi
+
+ ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
+ spid=$!
+
+ sleep 1
+
+ ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+ cpid=$!
+
+ wait $cpid
+ retc=$?
+ wait $spid
+ rets=$?
+
+ if [ $capture -eq 1 ]; then
+ sleep 1
+ kill $cappid
+ fi
+
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " client exit code $retc, server $rets" 1>&2
+ echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+ ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
+ echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+ ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+
+ cat "$capout"
+ return 1
+ fi
+
+ check_transfer $sin $cout "file received by client"
+ retc=$?
+ check_transfer $cin $sout "file received by server"
+ rets=$?
+
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+ cat "$capout"
+ return 0
+ fi
+
+ cat "$capout"
+ return 1
+}
+
+make_file()
+{
+ name=$1
+ who=$2
+
+ SIZE=1
+
+ dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+ echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+
+ echo "Created $name (size $SIZE KB) containing data sent by $who"
+}
+
+run_tests()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ connect_addr="$3"
+ lret=0
+
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return
+ fi
+}
+
+chk_join_nr()
+{
+ local msg="$1"
+ local syn_nr=$2
+ local syn_ack_nr=$3
+ local ack_nr=$4
+ local count
+ local dump_stats
+
+ printf "%-36s %s" "$msg" "syn"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$syn_nr" ]; then
+ echo "[fail] got $count JOIN[s] syn expected $syn_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - synack"
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$syn_ack_nr" ]; then
+ echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - ack"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$ack_nr" ]; then
+ echo "[fail] got $count JOIN[s] ack expected $ack_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
+sin=$(mktemp)
+sout=$(mktemp)
+cin=$(mktemp)
+cout=$(mktemp)
+init
+make_file "$cin" "client"
+make_file "$sin" "server"
+trap cleanup EXIT
+
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "no JOIN" "0" "0" "0"
+
+# subflow limted by client
+reset
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow, limited by client" 0 0 0
+
+# subflow limted by server
+reset
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow, limited by server" 1 1 0
+
+# subflow
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow" 1 1 1
+
+# multiple subflows
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows" 2 2 2
+
+# multiple subflows limited by serverf
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows, limited by server" 2 2 1
+
+# add_address, unused
+reset
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "unused signal address" 0 0 0
+
+# accept and use add_addr
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "signal address" 1 1 1
+
+# accept and use add_addr with an additional subflow
+# note: signal address in server ns and local addresses in client ns must
+# belong to different subnets or one of the listed local address could be
+# used for 'add_addr' subflow
+reset
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflow and signal" 2 2 2
+
+# accept and use add_addr with additional subflows
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows and signal" 3 3 3
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
new file mode 100755
index 000000000000..9172746b6cf0
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ksft_skip=4
+ret=0
+
+usage() {
+ echo "Usage: $0 [ -h ]"
+}
+
+
+while getopts "$optstring" option;do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "?")
+ usage $0
+ exit 1
+ ;;
+ esac
+done
+
+sec=$(date +%s)
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+err=$(mktemp)
+ret=0
+
+cleanup()
+{
+ rm -f $out
+ ip netns del $ns1
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add $ns1 || exit $ksft_skip
+ip -net $ns1 link set lo up
+ip netns exec $ns1 sysctl -q net.mptcp.enabled=1
+
+check()
+{
+ local cmd="$1"
+ local expected="$2"
+ local msg="$3"
+ local out=`$cmd 2>$err`
+ local cmd_ret=$?
+
+ printf "%-50s %s" "$msg"
+ if [ $cmd_ret -ne 0 ]; then
+ echo "[FAIL] command execution '$cmd' stderr "
+ cat $err
+ ret=1
+ elif [ "$out" = "$expected" ]; then
+ echo "[ OK ]"
+ else
+ echo -n "[FAIL] "
+ echo "expected '$expected' got '$out'"
+ ret=1
+ fi
+}
+
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 0" "defaults limits"
+
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup
+check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr"
+
+check "ip netns exec $ns1 ./pm_nl_ctl dump" \
+"id 1 flags 10.0.1.1
+id 2 flags subflow dev lo 10.0.1.2
+id 3 flags signal,backup 10.0.1.3" "dump addrs"
+
+ip netns exec $ns1 ./pm_nl_ctl del 2
+check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr"
+check "ip netns exec $ns1 ./pm_nl_ctl dump" \
+"id 1 flags 10.0.1.1
+id 3 flags signal,backup 10.0.1.3" "dump addrs after del"
+
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3
+check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
+
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal
+check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
+
+for i in `seq 5 9`; do
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
+done
+check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
+check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
+
+for i in `seq 9 256`; do
+ ip netns exec $ns1 ./pm_nl_ctl del $i
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9
+done
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
+id 3 flags signal,backup 10.0.1.3
+id 4 flags signal 10.0.1.4
+id 5 flags signal 10.0.1.5
+id 6 flags signal 10.0.1.6
+id 7 flags signal 10.0.1.7
+id 8 flags signal 10.0.1.8" "id limit"
+
+ip netns exec $ns1 ./pm_nl_ctl flush
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
+
+ip netns exec $ns1 ./pm_nl_ctl limits 9 1
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 0" "rcv addrs above hard limit"
+
+ip netns exec $ns1 ./pm_nl_ctl limits 1 9
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 0" "subflows above hard limit"
+
+ip netns exec $ns1 ./pm_nl_ctl limits 8 8
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
+subflows 8" "set limits"
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
new file mode 100644
index 000000000000..b24a2f17d415
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -0,0 +1,616 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <linux/rtnetlink.h>
+#include <linux/genetlink.h>
+
+#include "linux/mptcp.h"
+
+#ifndef MPTCP_PM_NAME
+#define MPTCP_PM_NAME "mptcp_pm"
+#endif
+
+static void syntax(char *argv[])
+{
+ fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]);
+ fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
+ fprintf(stderr, "\tdel <id>\n");
+ fprintf(stderr, "\tget <id>\n");
+ fprintf(stderr, "\tflush\n");
+ fprintf(stderr, "\tdump\n");
+ fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
+ exit(0);
+}
+
+static int init_genl_req(char *data, int family, int cmd, int version)
+{
+ struct nlmsghdr *nh = (void *)data;
+ struct genlmsghdr *gh;
+ int off = 0;
+
+ nh->nlmsg_type = family;
+ nh->nlmsg_flags = NLM_F_REQUEST;
+ nh->nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ off += NLMSG_ALIGN(sizeof(*nh));
+
+ gh = (void *)(data + off);
+ gh->cmd = cmd;
+ gh->version = version;
+ off += NLMSG_ALIGN(sizeof(*gh));
+ return off;
+}
+
+static void nl_error(struct nlmsghdr *nh)
+{
+ struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ int len = nh->nlmsg_len - sizeof(*nh);
+ uint32_t off;
+
+ if (len < sizeof(struct nlmsgerr))
+ error(1, 0, "netlink error message truncated %d min %ld", len,
+ sizeof(struct nlmsgerr));
+
+ if (!err->error) {
+ /* check messages from kernel */
+ struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh);
+
+ while (RTA_OK(attrs, len)) {
+ if (attrs->rta_type == NLMSGERR_ATTR_MSG)
+ fprintf(stderr, "netlink ext ack msg: %s\n",
+ (char *)RTA_DATA(attrs));
+ if (attrs->rta_type == NLMSGERR_ATTR_OFFS) {
+ memcpy(&off, RTA_DATA(attrs), 4);
+ fprintf(stderr, "netlink err off %d\n",
+ (int)off);
+ }
+ attrs = RTA_NEXT(attrs, len);
+ }
+ } else {
+ fprintf(stderr, "netlink error %d", err->error);
+ }
+}
+
+/* do a netlink command and, if max > 0, fetch the reply */
+static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ socklen_t addr_len;
+ void *data = nh;
+ int rem, ret;
+ int err = 0;
+
+ nh->nlmsg_len = len;
+ ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr));
+ if (ret != len)
+ error(1, errno, "send netlink: %uB != %uB\n", ret, len);
+ if (max == 0)
+ return 0;
+
+ addr_len = sizeof(nladdr);
+ rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len);
+ if (ret < 0)
+ error(1, errno, "recv netlink: %uB\n", ret);
+
+ /* Beware: the NLMSG_NEXT macro updates the 'rem' argument */
+ for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) {
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ nl_error(nh);
+ err = 1;
+ }
+ }
+ if (err)
+ error(1, 0, "bailing out due to netlink error[s]");
+ return ret;
+}
+
+static int genl_parse_getfamily(struct nlmsghdr *nlh)
+{
+ struct genlmsghdr *ghdr = NLMSG_DATA(nlh);
+ int len = nlh->nlmsg_len;
+ struct rtattr *attrs;
+
+ if (nlh->nlmsg_type != GENL_ID_CTRL)
+ error(1, errno, "Not a controller message, len=%d type=0x%x\n",
+ nlh->nlmsg_len, nlh->nlmsg_type);
+
+ len -= NLMSG_LENGTH(GENL_HDRLEN);
+
+ if (len < 0)
+ error(1, errno, "wrong controller message len %d\n", len);
+
+ if (ghdr->cmd != CTRL_CMD_NEWFAMILY)
+ error(1, errno, "Unknown controller command %d\n", ghdr->cmd);
+
+ attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+ while (RTA_OK(attrs, len)) {
+ if (attrs->rta_type == CTRL_ATTR_FAMILY_ID)
+ return *(__u16 *)RTA_DATA(attrs);
+ attrs = RTA_NEXT(attrs, len);
+ }
+
+ error(1, errno, "can't find CTRL_ATTR_FAMILY_ID attr");
+ return -1;
+}
+
+static int resolve_mptcp_pm_netlink(int fd)
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ int namelen;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 0);
+
+ rta = (void *)(data + off);
+ namelen = strlen(MPTCP_PM_NAME) + 1;
+ rta->rta_type = CTRL_ATTR_FAMILY_NAME;
+ rta->rta_len = RTA_LENGTH(namelen);
+ memcpy(RTA_DATA(rta), MPTCP_PM_NAME, namelen);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ do_nl_req(fd, nh, off, sizeof(data));
+ return genl_parse_getfamily((void *)data);
+}
+
+int add_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct rtattr *rta, *nest;
+ struct nlmsghdr *nh;
+ u_int16_t family;
+ u_int32_t flags;
+ int nest_start;
+ u_int8_t id;
+ int off = 0;
+ int arg;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ADD_ADDR,
+ MPTCP_PM_VER);
+
+ if (argc < 3)
+ syntax(argv);
+
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else
+ error(1, errno, "can't parse ip %s", argv[2]);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ for (arg = 3; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "flags")) {
+ char *tok, *str;
+
+ /* flags */
+ flags = 0;
+ if (++arg >= argc)
+ error(1, 0, " missing flags value");
+
+ /* do not support flag list yet */
+ for (str = argv[arg]; (tok = strtok(str, ","));
+ str = NULL) {
+ if (!strcmp(tok, "subflow"))
+ flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ else if (!strcmp(tok, "signal"))
+ flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
+ else if (!strcmp(tok, "backup"))
+ flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ error(1, errno,
+ "unknown flag %s", argv[arg]);
+ }
+
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &flags, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "id")) {
+ if (++arg >= argc)
+ error(1, 0, " missing id value");
+
+ id = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "dev")) {
+ int32_t ifindex;
+
+ if (++arg >= argc)
+ error(1, 0, " missing dev name");
+
+ ifindex = if_nametoindex(argv[arg]);
+ if (!ifindex)
+ error(1, errno, "unknown device %s", argv[arg]);
+
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &ifindex, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else
+ error(1, 0, "unknown keyword %s", argv[arg]);
+ }
+ nest->rta_len = off - nest_start;
+
+ do_nl_req(fd, nh, off, 0);
+ return 0;
+}
+
+int del_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct rtattr *rta, *nest;
+ struct nlmsghdr *nh;
+ int nest_start;
+ u_int8_t id;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR,
+ MPTCP_PM_VER);
+
+ /* the only argument is the address id */
+ if (argc != 3)
+ syntax(argv);
+
+ id = atoi(argv[2]);
+
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ /* build a dummy addr with only the ID set */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ nest->rta_len = off - nest_start;
+
+ do_nl_req(fd, nh, off, 0);
+ return 0;
+}
+
+static void print_addr(struct rtattr *attrs, int len)
+{
+ uint16_t family = 0;
+ char str[1024];
+ uint32_t flags;
+ uint8_t id;
+
+ while (RTA_OK(attrs, len)) {
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY)
+ memcpy(&family, RTA_DATA(attrs), 2);
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) {
+ if (family != AF_INET)
+ error(1, errno, "wrong IP (v4) for family %d",
+ family);
+ inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str));
+ printf("%s", str);
+ }
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) {
+ if (family != AF_INET6)
+ error(1, errno, "wrong IP (v6) for family %d",
+ family);
+ inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str));
+ printf("%s", str);
+ }
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) {
+ memcpy(&id, RTA_DATA(attrs), 1);
+ printf("id %d ", id);
+ }
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FLAGS) {
+ memcpy(&flags, RTA_DATA(attrs), 4);
+
+ printf("flags ");
+ if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ printf("signal");
+ flags &= ~MPTCP_PM_ADDR_FLAG_SIGNAL;
+ if (flags)
+ printf(",");
+ }
+
+ if (flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+ printf("subflow");
+ flags &= ~MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ if (flags)
+ printf(",");
+ }
+
+ if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) {
+ printf("backup");
+ flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ if (flags)
+ printf(",");
+ }
+
+ /* bump unknown flags, if any */
+ if (flags)
+ printf("0x%x", flags);
+ printf(" ");
+ }
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_IF_IDX) {
+ char name[IF_NAMESIZE], *ret;
+ int32_t ifindex;
+
+ memcpy(&ifindex, RTA_DATA(attrs), 4);
+ ret = if_indextoname(ifindex, name);
+ if (ret)
+ printf("dev %s ", ret);
+ else
+ printf("dev unknown/%d", ifindex);
+ }
+
+ attrs = RTA_NEXT(attrs, len);
+ }
+ printf("\n");
+}
+
+static void print_addrs(struct nlmsghdr *nh, int pm_family, int total_len)
+{
+ struct rtattr *attrs;
+
+ for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) {
+ int len = nh->nlmsg_len;
+
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ if (nh->nlmsg_type == NLMSG_ERROR)
+ nl_error(nh);
+ if (nh->nlmsg_type != pm_family)
+ continue;
+
+ len -= NLMSG_LENGTH(GENL_HDRLEN);
+ attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) +
+ GENL_HDRLEN);
+ while (RTA_OK(attrs, len)) {
+ if (attrs->rta_type ==
+ (MPTCP_PM_ATTR_ADDR | NLA_F_NESTED))
+ print_addr((void *)RTA_DATA(attrs),
+ attrs->rta_len);
+ attrs = RTA_NEXT(attrs, len);
+ }
+ }
+}
+
+int get_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct rtattr *rta, *nest;
+ struct nlmsghdr *nh;
+ int nest_start;
+ u_int8_t id;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
+ MPTCP_PM_VER);
+
+ /* the only argument is the address id */
+ if (argc != 3)
+ syntax(argv);
+
+ id = atoi(argv[2]);
+
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ /* build a dummy addr with only the ID set */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ nest->rta_len = off - nest_start;
+
+ print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
+ return 0;
+}
+
+int dump_addrs(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ pid_t pid = getpid();
+ struct nlmsghdr *nh;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
+ MPTCP_PM_VER);
+ nh->nlmsg_flags |= NLM_F_DUMP;
+ nh->nlmsg_seq = 1;
+ nh->nlmsg_pid = pid;
+ nh->nlmsg_len = off;
+
+ print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
+ return 0;
+}
+
+int flush_addrs(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct nlmsghdr *nh;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_FLUSH_ADDRS,
+ MPTCP_PM_VER);
+
+ do_nl_req(fd, nh, off, 0);
+ return 0;
+}
+
+static void print_limits(struct nlmsghdr *nh, int pm_family, int total_len)
+{
+ struct rtattr *attrs;
+ uint32_t max;
+
+ for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) {
+ int len = nh->nlmsg_len;
+
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ if (nh->nlmsg_type == NLMSG_ERROR)
+ nl_error(nh);
+ if (nh->nlmsg_type != pm_family)
+ continue;
+
+ len -= NLMSG_LENGTH(GENL_HDRLEN);
+ attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) +
+ GENL_HDRLEN);
+ while (RTA_OK(attrs, len)) {
+ int type = attrs->rta_type;
+
+ if (type != MPTCP_PM_ATTR_RCV_ADD_ADDRS &&
+ type != MPTCP_PM_ATTR_SUBFLOWS)
+ goto next;
+
+ memcpy(&max, RTA_DATA(attrs), 4);
+ printf("%s %u\n", type == MPTCP_PM_ATTR_SUBFLOWS ?
+ "subflows" : "accept", max);
+
+next:
+ attrs = RTA_NEXT(attrs, len);
+ }
+ }
+}
+
+int get_set_limits(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ uint32_t rcv_addr = 0, subflows = 0;
+ int cmd, len = sizeof(data);
+ struct nlmsghdr *nh;
+ int off = 0;
+
+ /* limit */
+ if (argc == 4) {
+ rcv_addr = atoi(argv[2]);
+ subflows = atoi(argv[3]);
+ cmd = MPTCP_PM_CMD_SET_LIMITS;
+ } else {
+ cmd = MPTCP_PM_CMD_GET_LIMITS;
+ }
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, cmd, MPTCP_PM_VER);
+
+ /* limit */
+ if (cmd == MPTCP_PM_CMD_SET_LIMITS) {
+ struct rtattr *rta = (void *)(data + off);
+
+ rta->rta_type = MPTCP_PM_ATTR_RCV_ADD_ADDRS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &rcv_addr, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_SUBFLOWS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &subflows, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* do not expect a reply */
+ len = 0;
+ }
+
+ len = do_nl_req(fd, nh, off, len);
+ if (cmd == MPTCP_PM_CMD_GET_LIMITS)
+ print_limits(nh, pm_family, len);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int fd, pm_family;
+
+ if (argc < 2)
+ syntax(argv);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (fd == -1)
+ error(1, errno, "socket netlink");
+
+ pm_family = resolve_mptcp_pm_netlink(fd);
+
+ if (!strcmp(argv[1], "add"))
+ return add_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "del"))
+ return del_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "flush"))
+ return flush_addrs(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "get"))
+ return get_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "dump"))
+ return dump_addrs(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "limits"))
+ return get_set_limits(fd, pm_family, argc, argv);
+
+ fprintf(stderr, "unknown sub-command: %s", argv[1]);
+ syntax(argv);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
new file mode 100644
index 000000000000..7b01b7c2ec10
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Check if we can fully utilize 4-tuples for connect().
+ *
+ * Rules to bind sockets to the same port when all ephemeral ports are
+ * exhausted.
+ *
+ * 1. if there are TCP_LISTEN sockets on the port, fail to bind.
+ * 2. if there are sockets without SO_REUSEADDR, fail to bind.
+ * 3. if SO_REUSEADDR is disabled, fail to bind.
+ * 4. if SO_REUSEADDR is enabled and SO_REUSEPORT is disabled,
+ * succeed to bind.
+ * 5. if SO_REUSEADDR and SO_REUSEPORT are enabled and
+ * there is no socket having the both options and the same EUID,
+ * succeed to bind.
+ * 6. fail to bind.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+
+struct reuse_opts {
+ int reuseaddr[2];
+ int reuseport[2];
+};
+
+struct reuse_opts unreusable_opts[12] = {
+ {0, 0, 0, 0},
+ {0, 0, 0, 1},
+ {0, 0, 1, 0},
+ {0, 0, 1, 1},
+ {0, 1, 0, 0},
+ {0, 1, 0, 1},
+ {0, 1, 1, 0},
+ {0, 1, 1, 1},
+ {1, 0, 0, 0},
+ {1, 0, 0, 1},
+ {1, 0, 1, 0},
+ {1, 0, 1, 1},
+};
+
+struct reuse_opts reusable_opts[4] = {
+ {1, 1, 0, 0},
+ {1, 1, 0, 1},
+ {1, 1, 1, 0},
+ {1, 1, 1, 1},
+};
+
+int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport)
+{
+ struct sockaddr_in local_addr;
+ int len = sizeof(local_addr);
+ int fd, ret;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_NE(-1, fd) TH_LOG("failed to open socket.");
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int));
+ ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEADDR.");
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(int));
+ ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEPORT.");
+
+ local_addr.sin_family = AF_INET;
+ local_addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ local_addr.sin_port = 0;
+
+ if (bind(fd, (struct sockaddr *)&local_addr, len) == -1) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+TEST(reuseaddr_ports_exhausted_unreusable)
+{
+ struct reuse_opts *opts;
+ int i, j, fd[2];
+
+ for (i = 0; i < 12; i++) {
+ opts = &unreusable_opts[i];
+
+ for (j = 0; j < 2; j++)
+ fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+ ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind.");
+
+ for (j = 0; j < 2; j++)
+ if (fd[j] != -1)
+ close(fd[j]);
+ }
+}
+
+TEST(reuseaddr_ports_exhausted_reusable_same_euid)
+{
+ struct reuse_opts *opts;
+ int i, j, fd[2];
+
+ for (i = 0; i < 4; i++) {
+ opts = &reusable_opts[i];
+
+ for (j = 0; j < 2; j++)
+ fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+ ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+
+ if (opts->reuseport[0] && opts->reuseport[1]) {
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened.");
+ } else {
+ EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations.");
+ }
+
+ for (j = 0; j < 2; j++)
+ if (fd[j] != -1)
+ close(fd[j]);
+ }
+}
+
+TEST(reuseaddr_ports_exhausted_reusable_different_euid)
+{
+ struct reuse_opts *opts;
+ int i, j, ret, fd[2];
+ uid_t euid[2] = {10, 20};
+
+ for (i = 0; i < 4; i++) {
+ opts = &reusable_opts[i];
+
+ for (j = 0; j < 2; j++) {
+ ret = seteuid(euid[j]);
+ ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: %d.", euid[j]);
+
+ fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+ ret = seteuid(0);
+ ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: 0.");
+ }
+
+ ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+ EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind because one socket can be bound in each euid.");
+
+ if (fd[1] != -1) {
+ ret = listen(fd[0], 5);
+ ASSERT_EQ(0, ret) TH_LOG("failed to listen.");
+
+ ret = listen(fd[1], 5);
+ EXPECT_EQ(-1, ret) TH_LOG("should fail to listen because only one uid reserves the port in TCP_LISTEN.");
+ }
+
+ for (j = 0; j < 2; j++)
+ if (fd[j] != -1)
+ close(fd[j]);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh
new file mode 100755
index 000000000000..20e3a2913d06
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run tests when all ephemeral ports are exhausted.
+#
+# Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+ ip netns exec "${NETNS}" \
+ sysctl -w net.ipv4.ip_local_port_range="32768 32768" \
+ > /dev/null 2>&1
+ ip netns exec "${NETNS}" \
+ sysctl -w net.ipv4.ip_autobind_reuse=1 > /dev/null 2>&1
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+do_test() {
+ ip netns exec "${NETNS}" ./reuseaddr_ports_exhausted
+}
+
+do_test
+echo "tests done"
diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 6dee9e636a95..6dee9e636a95 100644
--- a/tools/testing/selftests/networking/timestamping/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/net/timestamping.c
index aca3491174a1..aca3491174a1 100644
--- a/tools/testing/selftests/networking/timestamping/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 7e386be47120..011b0da6b033 100644
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -41,6 +41,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/select.h>
#include <sys/socket.h>
@@ -49,6 +50,10 @@
#include <time.h>
#include <unistd.h>
+#define NSEC_PER_USEC 1000L
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000LL
+
/* command line parameters */
static int cfg_proto = SOCK_STREAM;
static int cfg_ipproto = IPPROTO_TCP;
@@ -61,12 +66,16 @@ static int cfg_delay_snd;
static int cfg_delay_ack;
static bool cfg_show_payload;
static bool cfg_do_pktinfo;
+static bool cfg_busy_poll;
+static int cfg_sleep_usec = 50 * 1000;
static bool cfg_loop_nodata;
-static bool cfg_no_delay;
static bool cfg_use_cmsg;
static bool cfg_use_pf_packet;
+static bool cfg_use_epoll;
+static bool cfg_epollet;
static bool cfg_do_listen;
static uint16_t dest_port = 9000;
+static bool cfg_print_nsec;
static struct sockaddr_in daddr;
static struct sockaddr_in6 daddr6;
@@ -75,11 +84,48 @@ static struct timespec ts_usr;
static int saved_tskey = -1;
static int saved_tskey_type = -1;
+struct timing_event {
+ int64_t min;
+ int64_t max;
+ int64_t total;
+ int count;
+};
+
+static struct timing_event usr_enq;
+static struct timing_event usr_snd;
+static struct timing_event usr_ack;
+
static bool test_failed;
+static int64_t timespec_to_ns64(struct timespec *ts)
+{
+ return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
static int64_t timespec_to_us64(struct timespec *ts)
{
- return ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000;
+ return ts->tv_sec * USEC_PER_SEC + ts->tv_nsec / NSEC_PER_USEC;
+}
+
+static void init_timing_event(struct timing_event *te)
+{
+ te->min = INT64_MAX;
+ te->max = 0;
+ te->total = 0;
+ te->count = 0;
+}
+
+static void add_timing_event(struct timing_event *te,
+ struct timespec *t_start, struct timespec *t_end)
+{
+ int64_t ts_delta = timespec_to_ns64(t_end) - timespec_to_ns64(t_start);
+
+ te->count++;
+ if (ts_delta < te->min)
+ te->min = ts_delta;
+ if (ts_delta > te->max)
+ te->max = ts_delta;
+ te->total += ts_delta;
}
static void validate_key(int tskey, int tstype)
@@ -113,25 +159,43 @@ static void validate_timestamp(struct timespec *cur, int min_delay)
start64 = timespec_to_us64(&ts_usr);
if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
- fprintf(stderr, "ERROR: delay %lu expected between %d and %d\n",
+ fprintf(stderr, "ERROR: %lu us expected between %d and %d\n",
cur64 - start64, min_delay, max_delay);
test_failed = true;
}
}
+static void __print_ts_delta_formatted(int64_t ts_delta)
+{
+ if (cfg_print_nsec)
+ fprintf(stderr, "%lu ns", ts_delta);
+ else
+ fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC);
+}
+
static void __print_timestamp(const char *name, struct timespec *cur,
uint32_t key, int payload_len)
{
+ int64_t ts_delta;
+
if (!(cur->tv_sec | cur->tv_nsec))
return;
- fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)",
- name, cur->tv_sec, cur->tv_nsec / 1000,
- key, payload_len);
-
- if (cur != &ts_usr)
- fprintf(stderr, " (USR %+" PRId64 " us)",
- timespec_to_us64(cur) - timespec_to_us64(&ts_usr));
+ if (cfg_print_nsec)
+ fprintf(stderr, " %s: %lu s %lu ns (seq=%u, len=%u)",
+ name, cur->tv_sec, cur->tv_nsec,
+ key, payload_len);
+ else
+ fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)",
+ name, cur->tv_sec, cur->tv_nsec / NSEC_PER_USEC,
+ key, payload_len);
+
+ if (cur != &ts_usr) {
+ ts_delta = timespec_to_ns64(cur) - timespec_to_ns64(&ts_usr);
+ fprintf(stderr, " (USR +");
+ __print_ts_delta_formatted(ts_delta);
+ fprintf(stderr, ")");
+ }
fprintf(stderr, "\n");
}
@@ -155,14 +219,17 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype,
case SCM_TSTAMP_SCHED:
tsname = " ENQ";
validate_timestamp(&tss->ts[0], 0);
+ add_timing_event(&usr_enq, &ts_usr, &tss->ts[0]);
break;
case SCM_TSTAMP_SND:
tsname = " SND";
validate_timestamp(&tss->ts[0], cfg_delay_snd);
+ add_timing_event(&usr_snd, &ts_usr, &tss->ts[0]);
break;
case SCM_TSTAMP_ACK:
tsname = " ACK";
validate_timestamp(&tss->ts[0], cfg_delay_ack);
+ add_timing_event(&usr_ack, &ts_usr, &tss->ts[0]);
break;
default:
error(1, 0, "unknown timestamp type: %u",
@@ -171,6 +238,21 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype,
__print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
}
+static void print_timing_event(char *name, struct timing_event *te)
+{
+ if (!te->count)
+ return;
+
+ fprintf(stderr, " %s: count=%d", name, te->count);
+ fprintf(stderr, ", avg=");
+ __print_ts_delta_formatted((int64_t)(te->total / te->count));
+ fprintf(stderr, ", min=");
+ __print_ts_delta_formatted(te->min);
+ fprintf(stderr, ", max=");
+ __print_ts_delta_formatted(te->max);
+ fprintf(stderr, "\n");
+}
+
/* TODO: convert to check_and_print payload once API is stable */
static void print_payload(char *data, int len)
{
@@ -198,6 +280,17 @@ static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
}
+static void __epoll(int epfd)
+{
+ struct epoll_event events;
+ int ret;
+
+ memset(&events, 0, sizeof(events));
+ ret = epoll_wait(epfd, &events, 1, cfg_poll_timeout);
+ if (ret != 1)
+ error(1, errno, "epoll_wait");
+}
+
static void __poll(int fd)
{
struct pollfd pollfd;
@@ -391,7 +484,11 @@ static void do_test(int family, unsigned int report_opt)
struct msghdr msg;
struct iovec iov;
char *buf;
- int fd, i, val = 1, total_len;
+ int fd, i, val = 1, total_len, epfd = 0;
+
+ init_timing_event(&usr_enq);
+ init_timing_event(&usr_snd);
+ init_timing_event(&usr_ack);
total_len = cfg_payload_len;
if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) {
@@ -418,6 +515,20 @@ static void do_test(int family, unsigned int report_opt)
if (fd < 0)
error(1, errno, "socket");
+ if (cfg_use_epoll) {
+ struct epoll_event ev;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.data.fd = fd;
+ if (cfg_epollet)
+ ev.events |= EPOLLET;
+ epfd = epoll_create(1);
+ if (epfd <= 0)
+ error(1, errno, "epoll_create");
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev))
+ error(1, errno, "epoll_ctl");
+ }
+
/* reset expected key on each new socket */
saved_tskey = -1;
@@ -525,19 +636,28 @@ static void do_test(int family, unsigned int report_opt)
error(1, errno, "send");
/* wait for all errors to be queued, else ACKs arrive OOO */
- if (!cfg_no_delay)
- usleep(50 * 1000);
+ if (cfg_sleep_usec)
+ usleep(cfg_sleep_usec);
- __poll(fd);
+ if (!cfg_busy_poll) {
+ if (cfg_use_epoll)
+ __epoll(epfd);
+ else
+ __poll(fd);
+ }
while (!recv_errmsg(fd)) {}
}
+ print_timing_event("USR-ENQ", &usr_enq);
+ print_timing_event("USR-SND", &usr_snd);
+ print_timing_event("USR-ACK", &usr_ack);
+
if (close(fd))
error(1, errno, "close");
free(buf);
- usleep(100 * 1000);
+ usleep(100 * NSEC_PER_USEC);
}
static void __attribute__((noreturn)) usage(const char *filepath)
@@ -547,18 +667,22 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -4: only IPv4\n"
" -6: only IPv6\n"
" -h: show this message\n"
+ " -b: busy poll to read from error queue\n"
" -c N: number of packets for each test\n"
" -C: use cmsg to set tstamp recording options\n"
- " -D: no delay between packets\n"
- " -F: poll() waits forever for an event\n"
+ " -e: use level-triggered epoll() instead of poll()\n"
+ " -E: use event-triggered epoll() instead of poll()\n"
+ " -F: poll()/epoll() waits forever for an event\n"
" -I: request PKTINFO\n"
" -l N: send N bytes at a time\n"
" -L listen on hostname and port\n"
" -n: set no-payload option\n"
+ " -N: print timestamps and durations in nsec (instead of usec)\n"
" -p N: connect to port N\n"
" -P: use PF_PACKET\n"
" -r: use raw\n"
" -R: use raw (IP_HDRINCL)\n"
+ " -S N: usec to sleep before reading error queue\n"
" -u: use udp\n"
" -v: validate SND delay (usec)\n"
" -V: validate ACK delay (usec)\n"
@@ -572,7 +696,8 @@ static void parse_opt(int argc, char **argv)
int proto_count = 0;
int c;
- while ((c = getopt(argc, argv, "46c:CDFhIl:Lnp:PrRuv:V:x")) != -1) {
+ while ((c = getopt(argc, argv,
+ "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
@@ -580,15 +705,21 @@ static void parse_opt(int argc, char **argv)
case '6':
do_ipv4 = 0;
break;
+ case 'b':
+ cfg_busy_poll = true;
+ break;
case 'c':
cfg_num_pkts = strtoul(optarg, NULL, 10);
break;
case 'C':
cfg_use_cmsg = true;
break;
- case 'D':
- cfg_no_delay = true;
+ case 'e':
+ cfg_use_epoll = true;
break;
+ case 'E':
+ cfg_use_epoll = true;
+ cfg_epollet = true;
case 'F':
cfg_poll_timeout = -1;
break;
@@ -604,6 +735,9 @@ static void parse_opt(int argc, char **argv)
case 'n':
cfg_loop_nodata = true;
break;
+ case 'N':
+ cfg_print_nsec = true;
+ break;
case 'p':
dest_port = strtoul(optarg, NULL, 10);
break;
@@ -623,6 +757,9 @@ static void parse_opt(int argc, char **argv)
cfg_proto = SOCK_RAW;
cfg_ipproto = IPPROTO_RAW;
break;
+ case 'S':
+ cfg_sleep_usec = strtoul(optarg, NULL, 10);
+ break;
case 'u':
proto_count++;
cfg_proto = SOCK_DGRAM;
@@ -653,6 +790,8 @@ static void parse_opt(int argc, char **argv)
error(1, 0, "pass -P, -r, -R or -u, not multiple");
if (cfg_do_pktinfo && cfg_use_pf_packet)
error(1, 0, "cannot ask for pktinfo over pf_packet");
+ if (cfg_busy_poll && cfg_use_epoll)
+ error(1, 0, "pass epoll or busy_poll, not both");
if (optind != argc - 1)
error(1, 0, "missing required hostname argument");
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh
index df0d86ca72b7..eea6f5193693 100755
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.sh
+++ b/tools/testing/selftests/net/txtimestamp.sh
@@ -43,15 +43,40 @@ run_test_tcpudpraw() {
}
run_test_all() {
+ setup
run_test_tcpudpraw # setsockopt
run_test_tcpudpraw -C # cmsg
run_test_tcpudpraw -n # timestamp w/o data
+ echo "OK. All tests passed"
+}
+
+run_test_one() {
+ setup
+ ./txtimestamp $@
+}
+
+usage() {
+ echo "Usage: $0 [ -r | --run ] <txtimestamp args> | [ -h | --help ]"
+ echo " (no args) Run all tests"
+ echo " -r|--run Run an individual test with arguments"
+ echo " -h|--help Help"
+}
+
+main() {
+ if [[ $# -eq 0 ]]; then
+ run_test_all
+ else
+ if [[ "$1" = "-r" || "$1" == "--run" ]]; then
+ shift
+ run_test_one $@
+ else
+ usage
+ fi
+ fi
}
if [[ "$(ip netns identify)" == "root" ]]; then
- ../../net/in_netns.sh $0 $@
+ ./in_netns.sh $0 $@
else
- setup
- run_test_all
- echo "OK. All tests passed"
+ main $@
fi
diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore
deleted file mode 100644
index d9355035e746..000000000000
--- a/tools/testing/selftests/networking/timestamping/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-timestamping
-rxtimestamp
-txtimestamp
-hwtstamp_config
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
deleted file mode 100644
index 1de8bd8ccf5d..000000000000
--- a/tools/testing/selftests/networking/timestamping/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include
-
-TEST_GEN_FILES := hwtstamp_config rxtimestamp timestamping txtimestamp
-TEST_PROGS := txtimestamp.sh
-
-all: $(TEST_PROGS)
-
-top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
-include ../../lib.mk
diff --git a/tools/testing/selftests/networking/timestamping/config b/tools/testing/selftests/networking/timestamping/config
deleted file mode 100644
index a13e3169b0a4..000000000000
--- a/tools/testing/selftests/networking/timestamping/config
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_IFB=y
-CONFIG_NET_SCH_NETEM=y
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index c03af4600281..c33a7aac27ff 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -31,6 +31,7 @@ CONFIG_NET_EMATCH_U32=m
CONFIG_NET_EMATCH_META=m
CONFIG_NET_EMATCH_TEXT=m
CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_EMATCH_CANID=m
CONFIG_NET_EMATCH_IPT=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=m
@@ -58,3 +59,8 @@ CONFIG_NET_IFE_SKBPRIO=m
CONFIG_NET_IFE_SKBTCINDEX=m
CONFIG_NET_SCH_FIFO=y
CONFIG_NET_SCH_ETS=m
+
+#
+## Network testing
+#
+CONFIG_CAN=m
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
index 98a20faf3198..e788c114a484 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
@@ -372,5 +372,907 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
]
+ },
+ {
+ "id": "bae4",
+ "name": "Add basic filter with u32 ematch u8/zero offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 0x0f at 0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(01000000/0f000000 at 0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e6cb",
+ "name": "Add basic filter with u32 ematch u8/zero offset and invalid value >0xFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 0x0f at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/0f000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7727",
+ "name": "Add basic filter with u32 ematch u8/positive offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0x1f at 12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17000000/1f000000 at 12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "a429",
+ "name": "Add basic filter with u32 ematch u8/invalid mask >0xFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff00 at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000/ff000000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8373",
+ "name": "Add basic filter with u32 ematch u8/missing offset",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff at)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "ab8e",
+ "name": "Add basic filter with u32 ematch u8/missing AT keyword",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "712d",
+ "name": "Add basic filter with u32 ematch u8/missing value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "350f",
+ "name": "Add basic filter with u32 ematch u8/non-numeric value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 zero 0xff at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ff000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e28f",
+ "name": "Add basic filter with u32 ematch u8/non-numeric mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 mask at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11000000/00000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6d5f",
+ "name": "Add basic filter with u32 ematch u8/negative offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at -14)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(0000a000/0000f000 at -16\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "12dc",
+ "name": "Add basic filter with u32 ematch u8/nexthdr+ offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at nexthdr+0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0000000/f0000000 at nexthdr\\+0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1d85",
+ "name": "Add basic filter with u32 ematch u16/zero offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x1122 0xffff at 0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/ffff0000 at 0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3672",
+ "name": "Add basic filter with u32 ematch u16/zero offset and invalid value >0xFFFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x112233 0xffff at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223300/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7fb0",
+ "name": "Add basic filter with u32 ematch u16/positive offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0x1fff at 12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17880000/1fff0000 at 12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "19af",
+ "name": "Add basic filter with u32 ematch u16/invalid mask >0xFFFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffffffff at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffffffff at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "446d",
+ "name": "Add basic filter with u32 ematch u16/missing offset",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff at)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "151b",
+ "name": "Add basic filter with u32 ematch u16/missing AT keyword",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "bb23",
+ "name": "Add basic filter with u32 ematch u16/missing value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "decc",
+ "name": "Add basic filter with u32 ematch u16/non-numeric value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 zero 0xffff at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e988",
+ "name": "Add basic filter with u32 ematch u16/non-numeric mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 mask at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/00000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "07d8",
+ "name": "Add basic filter with u32 ematch u16/negative offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xffff at -12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabb0000/ffff0000 at -12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "f474",
+ "name": "Add basic filter with u32 ematch u16/nexthdr+ offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xf0f0 at nexthdr+0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0b00000/f0f00000 at nexthdr\\+0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "47a0",
+ "name": "Add basic filter with u32 ematch u32/zero offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at 0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at 0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "849f",
+ "name": "Add basic filter with u32 ematch u32/positive offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0x1ffff0f0 at 12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227080/1ffff0f0 at 12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d288",
+ "name": "Add basic filter with u32 ematch u32/missing offset",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0xffffffff at)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227788/ffffffff at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4998",
+ "name": "Add basic filter with u32 ematch u32/missing AT keyword",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x77889900 0xfffff0f0 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77889900/fffff0f0 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1f0a",
+ "name": "Add basic filter with u32 ematch u32/missing value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "848e",
+ "name": "Add basic filter with u32 ematch u32/non-numeric value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 zero 0xffff at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "f748",
+ "name": "Add basic filter with u32 ematch u32/non-numeric mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11223344 mask at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223344/00000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "55a6",
+ "name": "Add basic filter with u32 ematch u32/negative offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xff00ff00 at -12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aa00cc00/ff00ff00 at -12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7282",
+ "name": "Add basic filter with u32 ematch u32/nexthdr+ offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at nexthdr+0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at nexthdr\\+0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b2b6",
+ "name": "Add basic filter with canid ematch and single SFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "f67f",
+ "name": "Add basic filter with canid ematch and single SFF with mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaabb:0x00ff)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x2BB:0xFF\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "bd5c",
+ "name": "Add basic filter with canid ematch and multiple SFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1 sff 2 sff 3)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1 sff 0x2 sff 0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "83c7",
+ "name": "Add basic filter with canid ematch and multiple SFF with masks",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaa:0x01 sff 0xbb:0x02 sff 0xcc:0x03)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0xAA:0x1 sff 0xBB:0x2 sff 0xCC:0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "a8f5",
+ "name": "Add basic filter with canid ematch and single EFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "98ae",
+ "name": "Add basic filter with canid ematch and single EFF with mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaabb:0xf1f1)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAABB:0xF1F1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6056",
+ "name": "Add basic filter with canid ematch and multiple EFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1 eff 2 eff 3)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1 eff 0x2 eff 0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d188",
+ "name": "Add basic filter with canid ematch and multiple EFF with masks",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaa:0x01 eff 0xbb:0x02 eff 0xcc:0x03)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAA:0x1 eff 0xBB:0x2 eff 0xCC:0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "25d1",
+ "name": "Add basic filter with canid ematch and a combination of SFF/EFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01 eff 0x02)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2 sff 0x1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b438",
+ "name": "Add basic filter with canid ematch and a combination of SFF/EFF with masks",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01:0xf eff 0x02:0xf)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2:0xF sff 0x1:0xF\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
new file mode 100644
index 000000000000..0703a2a255eb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
@@ -0,0 +1,185 @@
+[
+ {
+ "id": "8b6e",
+ "name": "Create RED with no flags",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "342e",
+ "name": "Create RED with adaptive flag",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red adaptive limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb adaptive $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "2d4b",
+ "name": "Create RED with ECN flag",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "650f",
+ "name": "Create RED with flags ECN, adaptive",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn adaptive limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn adaptive $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "5f15",
+ "name": "Create RED with flags ECN, harddrop",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "53e8",
+ "name": "Create RED with flags ECN, nodrop",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn nodrop limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn nodrop $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "d091",
+ "name": "Fail to create RED with only nodrop flag",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red nodrop limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "af8e",
+ "name": "Create RED with flags ECN, nodrop, harddrop",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop nodrop limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop nodrop $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ }
+]
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 1d8b93f1af31..5a4fb80fa832 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -55,6 +55,78 @@ static void test_stream_connection_reset(const struct test_opts *opts)
close(fd);
}
+static void test_stream_bind_only_client(const struct test_opts *opts)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = opts->peer_cid,
+ },
+ };
+ int ret;
+ int fd;
+
+ /* Wait for the server to be ready */
+ control_expectln("BIND");
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ timeout_begin(TIMEOUT);
+ do {
+ ret = connect(fd, &addr.sa, sizeof(addr.svm));
+ timeout_check("connect");
+ } while (ret < 0 && errno == EINTR);
+ timeout_end();
+
+ if (ret != -1) {
+ fprintf(stderr, "expected connect(2) failure, got %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+ if (errno != ECONNRESET) {
+ fprintf(stderr, "unexpected connect(2) errno %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Notify the server that the client has finished */
+ control_writeln("DONE");
+
+ close(fd);
+}
+
+static void test_stream_bind_only_server(const struct test_opts *opts)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = VMADDR_CID_ANY,
+ },
+ };
+ int fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Notify the client that the server is ready */
+ control_writeln("BIND");
+
+ /* Wait for the client to finish */
+ control_expectln("DONE");
+
+ close(fd);
+}
+
static void test_stream_client_close_client(const struct test_opts *opts)
{
int fd;
@@ -213,6 +285,11 @@ static struct test_case test_cases[] = {
.run_client = test_stream_connection_reset,
},
{
+ .name = "SOCK_STREAM bind only",
+ .run_client = test_stream_bind_only_client,
+ .run_server = test_stream_bind_only_server,
+ },
+ {
.name = "SOCK_STREAM client close",
.run_client = test_stream_client_close_client,
.run_server = test_stream_client_close_server,