diff options
Diffstat (limited to 'tools')
285 files changed, 18342 insertions, 2587 deletions
diff --git a/tools/arch/alpha/include/uapi/asm/mman.h b/tools/arch/alpha/include/uapi/asm/mman.h index c317d3e6867a..ea6a255ae61f 100644 --- a/tools/arch/alpha/include/uapi/asm/mman.h +++ b/tools/arch/alpha/include/uapi/asm/mman.h @@ -27,8 +27,6 @@ #define MAP_NONBLOCK 0x40000 #define MAP_NORESERVE 0x10000 #define MAP_POPULATE 0x20000 -#define MAP_PRIVATE 0x02 -#define MAP_SHARED 0x01 #define MAP_STACK 0x80000 #define PROT_EXEC 0x4 #define PROT_GROWSDOWN 0x01000000 diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h index 378c051fa177..3b9b41331c4f 100644 --- a/tools/arch/arm64/include/asm/barrier.h +++ b/tools/arch/arm64/include/asm/barrier.h @@ -14,6 +14,16 @@ #define wmb() asm volatile("dmb ishst" ::: "memory") #define rmb() asm volatile("dmb ishld" ::: "memory") +/* + * Kernel uses dmb variants on arm64 for smp_*() barriers. Pretty much the same + * implementation as above mb()/wmb()/rmb(), though for the latter kernel uses + * dsb. In any case, should above mb()/wmb()/rmb() change, make sure the below + * smp_*() don't. + */ +#define smp_mb() asm volatile("dmb ish" ::: "memory") +#define smp_wmb() asm volatile("dmb ishst" ::: "memory") +#define smp_rmb() asm volatile("dmb ishld" ::: "memory") + #define smp_store_release(p, v) \ do { \ union { typeof(*p) __val; char __c[1]; } __u = \ diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index dae1584cf017..4703d218663a 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -17,5 +17,7 @@ #define __ARCH_WANT_RENAMEAT #define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SET_GET_RLIMIT +#define __ARCH_WANT_TIME32_SYSCALLS #include <asm-generic/unistd.h> diff --git a/tools/arch/mips/include/uapi/asm/mman.h b/tools/arch/mips/include/uapi/asm/mman.h index de2206883abc..c8acaa138d46 100644 --- a/tools/arch/mips/include/uapi/asm/mman.h +++ b/tools/arch/mips/include/uapi/asm/mman.h @@ -28,8 +28,6 @@ #define MAP_NONBLOCK 0x20000 #define MAP_NORESERVE 0x0400 #define MAP_POPULATE 0x10000 -#define MAP_PRIVATE 0x002 -#define MAP_SHARED 0x001 #define MAP_STACK 0x40000 #define PROT_EXEC 0x04 #define PROT_GROWSDOWN 0x01000000 diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h index 1bd78758bde9..f9fd1325f5bd 100644 --- a/tools/arch/parisc/include/uapi/asm/mman.h +++ b/tools/arch/parisc/include/uapi/asm/mman.h @@ -27,8 +27,6 @@ #define MAP_NONBLOCK 0x20000 #define MAP_NORESERVE 0x4000 #define MAP_POPULATE 0x10000 -#define MAP_PRIVATE 0x02 -#define MAP_SHARED 0x01 #define MAP_STACK 0x40000 #define PROT_EXEC 0x4 #define PROT_GROWSDOWN 0x01000000 diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 8c876c166ef2..26ca425f4c2c 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -463,10 +463,12 @@ struct kvm_ppc_cpu_char { #define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) #define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) #define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) +#define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) #define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) #define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) +#define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h index 58919868473c..0adf295dd5b6 100644 --- a/tools/arch/x86/include/asm/barrier.h +++ b/tools/arch/x86/include/asm/barrier.h @@ -21,9 +21,12 @@ #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #elif defined(__x86_64__) -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") +#define mb() asm volatile("mfence" ::: "memory") +#define rmb() asm volatile("lfence" ::: "memory") #define wmb() asm volatile("sfence" ::: "memory") +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") #endif #if defined(__x86_64__) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 6d6122524711..981ff9479648 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -344,6 +344,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ diff --git a/tools/arch/xtensa/include/uapi/asm/mman.h b/tools/arch/xtensa/include/uapi/asm/mman.h index 34dde6f44dae..f2b08c990afc 100644 --- a/tools/arch/xtensa/include/uapi/asm/mman.h +++ b/tools/arch/xtensa/include/uapi/asm/mman.h @@ -27,8 +27,6 @@ #define MAP_NONBLOCK 0x20000 #define MAP_NORESERVE 0x0400 #define MAP_POPULATE 0x10000 -#define MAP_PRIVATE 0x002 -#define MAP_SHARED 0x001 #define MAP_STACK 0x40000 #define PROT_EXEC 0x4 #define PROT_GROWSDOWN 0x01000000 diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst new file mode 100644 index 000000000000..2dbc1413fabd --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -0,0 +1,222 @@ +================ +bpftool-btf +================ +------------------------------------------------------------------------------- +tool for inspection of BTF data +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **btf** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + + *COMMANDS* := { **dump** | **help** } + +BTF COMMANDS +============= + +| **bpftool** **btf dump** *BTF_SRC* +| **bpftool** **btf help** +| +| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } + +DESCRIPTION +=========== + **bpftool btf dump** *BTF_SRC* + Dump BTF entries from a given *BTF_SRC*. + + When **id** is specified, BTF object with that ID will be + loaded and all its BTF types emitted. + + When **map** is provided, it's expected that map has + associated BTF object with BTF types describing key and + value. It's possible to select whether to dump only BTF + type(s) associated with key (**key**), value (**value**), + both key and value (**kv**), or all BTF types present in + associated BTF object (**all**). If not specified, **kv** + is assumed. + + When **prog** is provided, it's expected that program has + associated BTF object with BTF types. + + When specifying *FILE*, an ELF file is expected, containing + .BTF section with well-defined BTF binary format data, + typically produced by clang or pahole. + + **bpftool btf help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -V, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + +EXAMPLES +======== +**# bpftool btf dump id 1226** +:: + + [1] PTR '(anon)' type_id=2 + [2] STRUCT 'dummy_tracepoint_args' size=16 vlen=2 + 'pad' type_id=3 bits_offset=0 + 'sock' type_id=4 bits_offset=64 + [3] INT 'long long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none) + [4] PTR '(anon)' type_id=5 + [5] FWD 'sock' fwd_kind=union + +This gives an example of default output for all supported BTF kinds. + +**$ cat prog.c** +:: + + struct fwd_struct; + + enum my_enum { + VAL1 = 3, + VAL2 = 7, + }; + + typedef struct my_struct my_struct_t; + + struct my_struct { + const unsigned int const_int_field; + int bitfield_field: 4; + char arr_field[16]; + const struct fwd_struct *restrict fwd_field; + enum my_enum enum_field; + volatile my_struct_t *typedef_ptr_field; + }; + + union my_union { + int a; + struct my_struct b; + }; + + struct my_struct struct_global_var __attribute__((section("data_sec"))) = { + .bitfield_field = 3, + .enum_field = VAL1, + }; + int global_var __attribute__((section("data_sec"))) = 7; + + __attribute__((noinline)) + int my_func(union my_union *arg1, int arg2) + { + static int static_var __attribute__((section("data_sec"))) = 123; + static_var++; + return static_var; + } + +**$ bpftool btf dump file prog.o** +:: + + [1] PTR '(anon)' type_id=2 + [2] UNION 'my_union' size=48 vlen=2 + 'a' type_id=3 bits_offset=0 + 'b' type_id=4 bits_offset=0 + [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED + [4] STRUCT 'my_struct' size=48 vlen=6 + 'const_int_field' type_id=5 bits_offset=0 + 'bitfield_field' type_id=3 bits_offset=32 bitfield_size=4 + 'arr_field' type_id=8 bits_offset=40 + 'fwd_field' type_id=10 bits_offset=192 + 'enum_field' type_id=14 bits_offset=256 + 'typedef_ptr_field' type_id=15 bits_offset=320 + [5] CONST '(anon)' type_id=6 + [6] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none) + [7] INT 'char' size=1 bits_offset=0 nr_bits=8 encoding=SIGNED + [8] ARRAY '(anon)' type_id=7 index_type_id=9 nr_elems=16 + [9] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none) + [10] RESTRICT '(anon)' type_id=11 + [11] PTR '(anon)' type_id=12 + [12] CONST '(anon)' type_id=13 + [13] FWD 'fwd_struct' fwd_kind=union + [14] ENUM 'my_enum' size=4 vlen=2 + 'VAL1' val=3 + 'VAL2' val=7 + [15] PTR '(anon)' type_id=16 + [16] VOLATILE '(anon)' type_id=17 + [17] TYPEDEF 'my_struct_t' type_id=4 + [18] FUNC_PROTO '(anon)' ret_type_id=3 vlen=2 + 'arg1' type_id=1 + 'arg2' type_id=3 + [19] FUNC 'my_func' type_id=18 + [20] VAR 'struct_global_var' type_id=4, linkage=global-alloc + [21] VAR 'global_var' type_id=3, linkage=global-alloc + [22] VAR 'my_func.static_var' type_id=3, linkage=static + [23] DATASEC 'data_sec' size=0 vlen=3 + type_id=20 offset=0 size=48 + type_id=21 offset=0 size=4 + type_id=22 offset=52 size=4 + +The following commands print BTF types associated with specified map's key, +value, both key and value, and all BTF types, respectively. By default, both +key and value types will be printed. + +**# bpftool btf dump map id 123 key** + +:: + + [39] TYPEDEF 'u32' type_id=37 + +**# bpftool btf dump map id 123 value** + +:: + + [86] PTR '(anon)' type_id=87 + +**# bpftool btf dump map id 123 kv** + +:: + + [39] TYPEDEF 'u32' type_id=37 + [86] PTR '(anon)' type_id=87 + +**# bpftool btf dump map id 123 all** + +:: + + [1] PTR '(anon)' type_id=0 + . + . + . + [2866] ARRAY '(anon)' type_id=52 index_type_id=51 nr_elems=4 + +All the standard ways to specify map or program are supported: + +**# bpftool btf dump map id 123** + +**# bpftool btf dump map pinned /sys/fs/bpf/map_name** + +**# bpftool btf dump prog id 456** + +**# bpftool btf dump prog tag b88e0a09b1d9759d** + +**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name** + +SEE ALSO +======== + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-map**\ (8), + **bpftool-prog**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 9bb9ace54ba8..ac26876389c2 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -29,7 +29,7 @@ CGROUP COMMANDS | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | -| **sendmsg4** | **sendmsg6** } +| **sendmsg4** | **sendmsg6** | **sysctl** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -85,7 +85,8 @@ DESCRIPTION **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp4 socket (since 4.18); **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an - unconnected udp6 socket (since 4.18). + unconnected udp6 socket (since 4.18); + **sysctl** sysctl access (since 5.2). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* Detach *PROG* from the cgroup *CGROUP* and attach type @@ -99,7 +100,7 @@ OPTIONS -h, --help Print short generic help message (similar to **bpftool help**). - -v, --version + -V, --version Print version number (similar to **bpftool version**). -j, --json @@ -144,4 +145,5 @@ SEE ALSO **bpftool-map**\ (8), **bpftool-feature**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 82de03dd8f52..14180e887082 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -63,7 +63,7 @@ OPTIONS -h, --help Print short generic help message (similar to **bpftool help**). - -v, --version + -V, --version Print version number (similar to **bpftool version**). -j, --json @@ -82,4 +82,5 @@ SEE ALSO **bpftool-map**\ (8), **bpftool-cgroup**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 5c984ffc9f01..13ef27b39f20 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -135,7 +135,7 @@ OPTIONS -h, --help Print short generic help message (similar to **bpftool help**). - -v, --version + -V, --version Print version number (similar to **bpftool version**). -j, --json @@ -258,4 +258,5 @@ SEE ALSO **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 779dab3650ee..934580850f42 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -55,7 +55,7 @@ OPTIONS -h, --help Print short generic help message (similar to **bpftool help**). - -v, --version + -V, --version Print version number (similar to **bpftool version**). -j, --json @@ -143,4 +143,5 @@ SEE ALSO **bpftool-map**\ (8), **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index bca5590a80d0..0c7576523a21 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -43,7 +43,7 @@ OPTIONS -h, --help Print short generic help message (similar to **bpftool help**). - -v, --version + -V, --version Print version number (similar to **bpftool version**). -j, --json @@ -85,4 +85,5 @@ SEE ALSO **bpftool-map**\ (8), **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), - **bpftool-net**\ (8) + **bpftool-net**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 9386bd6e0396..e8118544d118 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -25,7 +25,7 @@ PROG COMMANDS | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] +| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** @@ -39,7 +39,8 @@ PROG COMMANDS | **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** | | **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** | | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | -| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** +| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | +| **cgroup/sysctl** | } | *ATTACH_TYPE* := { | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** @@ -56,6 +57,14 @@ DESCRIPTION Output will start with program ID followed by program type and zero or more named attributes (depending on kernel version). + Since Linux 5.1 the kernel can collect statistics on BPF + programs (such as the total time spent running the program, + and the number of times it was run). If available, bpftool + shows such statistics. However, the kernel does not collect + them by defaults, as it slightly impacts performance on each + program run. Activation or deactivation of the feature is + performed via the **kernel.bpf_stats_enabled** sysctl knob. + **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] Dump eBPF instructions of the program from the kernel. By default, eBPF will be disassembled and printed to standard @@ -144,7 +153,7 @@ OPTIONS -h, --help Print short generic help message (similar to **bpftool help**). - -v, --version + -V, --version Print version number (similar to **bpftool version**). -j, --json @@ -262,4 +271,5 @@ SEE ALSO **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 4f2188845dd8..3e562d7fd56f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -49,7 +49,7 @@ OPTIONS -h, --help Print short help message (similar to **bpftool help**). - -v, --version + -V, --version Print version number (similar to **bpftool version**). -j, --json @@ -76,4 +76,5 @@ SEE ALSO **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index b803827d01e8..50e402a5a9c8 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -217,6 +217,7 @@ _bpftool() done cur=${words[cword]} prev=${words[cword - 1]} + pprev=${words[cword - 2]} local object=${words[1]} command=${words[2]} @@ -272,17 +273,17 @@ _bpftool() "$cur" ) ) return 0 ;; - *) - _bpftool_once_attr 'file' - if _bpftool_search_list 'xlated'; then - COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ - "$cur" ) ) - else - COMPREPLY+=( $( compgen -W 'opcodes linum' -- \ - "$cur" ) ) - fi - return 0 - ;; + *) + _bpftool_once_attr 'file' + if _bpftool_search_list 'xlated'; then + COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ + "$cur" ) ) + else + COMPREPLY+=( $( compgen -W 'opcodes linum' -- \ + "$cur" ) ) + fi + return 0 + ;; esac ;; pin) @@ -370,7 +371,8 @@ _bpftool() lirc_mode2 cgroup/bind4 cgroup/bind6 \ cgroup/connect4 cgroup/connect6 \ cgroup/sendmsg4 cgroup/sendmsg6 \ - cgroup/post_bind4 cgroup/post_bind6" -- \ + cgroup/post_bind4 cgroup/post_bind6 \ + cgroup/sysctl" -- \ "$cur" ) ) return 0 ;; @@ -606,6 +608,51 @@ _bpftool() ;; esac ;; + btf) + local PROG_TYPE='id pinned tag' + local MAP_TYPE='id pinned' + case $command in + dump) + case $prev in + $command) + COMPREPLY+=( $( compgen -W "id map prog file" -- \ + "$cur" ) ) + return 0 + ;; + prog) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + map) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + id) + case $pprev in + prog) + _bpftool_get_prog_ids + ;; + map) + _bpftool_get_map_ids + ;; + esac + return 0 + ;; + *) + if [[ $cword == 6 ]] && [[ ${words[3]} == "map" ]]; then + COMPREPLY+=( $( compgen -W 'key value kv all' -- \ + "$cur" ) ) + fi + return 0 + ;; + esac + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'dump help' -- "$cur" ) ) + ;; + esac + ;; cgroup) case $command in show|list) @@ -619,7 +666,7 @@ _bpftool() attach|detach) local ATTACH_TYPES='ingress egress sock_create sock_ops \ device bind4 bind6 post_bind4 post_bind6 connect4 \ - connect6 sendmsg4 sendmsg6' + connect6 sendmsg4 sendmsg6 sysctl' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag' case $prev in @@ -629,7 +676,7 @@ _bpftool() ;; ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ post_bind4|post_bind6|connect4|connect6|sendmsg4|\ - sendmsg6) + sendmsg6|sysctl) COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c new file mode 100644 index 000000000000..58a2cd002a4b --- /dev/null +++ b/tools/bpf/bpftool/btf.c @@ -0,0 +1,586 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Facebook */ + +#include <errno.h> +#include <fcntl.h> +#include <linux/err.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <gelf.h> +#include <bpf.h> +#include <linux/btf.h> + +#include "btf.h" +#include "json_writer.h" +#include "main.h" + +static const char * const btf_kind_str[NR_BTF_KINDS] = { + [BTF_KIND_UNKN] = "UNKNOWN", + [BTF_KIND_INT] = "INT", + [BTF_KIND_PTR] = "PTR", + [BTF_KIND_ARRAY] = "ARRAY", + [BTF_KIND_STRUCT] = "STRUCT", + [BTF_KIND_UNION] = "UNION", + [BTF_KIND_ENUM] = "ENUM", + [BTF_KIND_FWD] = "FWD", + [BTF_KIND_TYPEDEF] = "TYPEDEF", + [BTF_KIND_VOLATILE] = "VOLATILE", + [BTF_KIND_CONST] = "CONST", + [BTF_KIND_RESTRICT] = "RESTRICT", + [BTF_KIND_FUNC] = "FUNC", + [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", + [BTF_KIND_VAR] = "VAR", + [BTF_KIND_DATASEC] = "DATASEC", +}; + +static const char *btf_int_enc_str(__u8 encoding) +{ + switch (encoding) { + case 0: + return "(none)"; + case BTF_INT_SIGNED: + return "SIGNED"; + case BTF_INT_CHAR: + return "CHAR"; + case BTF_INT_BOOL: + return "BOOL"; + default: + return "UNKN"; + } +} + +static const char *btf_var_linkage_str(__u32 linkage) +{ + switch (linkage) { + case BTF_VAR_STATIC: + return "static"; + case BTF_VAR_GLOBAL_ALLOCATED: + return "global-alloc"; + default: + return "(unknown)"; + } +} + +static const char *btf_str(const struct btf *btf, __u32 off) +{ + if (!off) + return "(anon)"; + return btf__name_by_offset(btf, off) ? : "(invalid)"; +} + +static int dump_btf_type(const struct btf *btf, __u32 id, + const struct btf_type *t) +{ + json_writer_t *w = json_wtr; + int kind, safe_kind; + + kind = BTF_INFO_KIND(t->info); + safe_kind = kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN; + + if (json_output) { + jsonw_start_object(w); + jsonw_uint_field(w, "id", id); + jsonw_string_field(w, "kind", btf_kind_str[safe_kind]); + jsonw_string_field(w, "name", btf_str(btf, t->name_off)); + } else { + printf("[%u] %s '%s'", id, btf_kind_str[safe_kind], + btf_str(btf, t->name_off)); + } + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: { + __u32 v = *(__u32 *)(t + 1); + const char *enc; + + enc = btf_int_enc_str(BTF_INT_ENCODING(v)); + + if (json_output) { + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "bits_offset", BTF_INT_OFFSET(v)); + jsonw_uint_field(w, "nr_bits", BTF_INT_BITS(v)); + jsonw_string_field(w, "encoding", enc); + } else { + printf(" size=%u bits_offset=%u nr_bits=%u encoding=%s", + t->size, BTF_INT_OFFSET(v), BTF_INT_BITS(v), + enc); + } + break; + } + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + if (json_output) + jsonw_uint_field(w, "type_id", t->type); + else + printf(" type_id=%u", t->type); + break; + case BTF_KIND_ARRAY: { + const struct btf_array *arr = (const void *)(t + 1); + + if (json_output) { + jsonw_uint_field(w, "type_id", arr->type); + jsonw_uint_field(w, "index_type_id", arr->index_type); + jsonw_uint_field(w, "nr_elems", arr->nelems); + } else { + printf(" type_id=%u index_type_id=%u nr_elems=%u", + arr->type, arr->index_type, arr->nelems); + } + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = (const void *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + int i; + + if (json_output) { + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "members"); + jsonw_start_array(w); + } else { + printf(" size=%u vlen=%u", t->size, vlen); + } + for (i = 0; i < vlen; i++, m++) { + const char *name = btf_str(btf, m->name_off); + __u32 bit_off, bit_sz; + + if (BTF_INFO_KFLAG(t->info)) { + bit_off = BTF_MEMBER_BIT_OFFSET(m->offset); + bit_sz = BTF_MEMBER_BITFIELD_SIZE(m->offset); + } else { + bit_off = m->offset; + bit_sz = 0; + } + + if (json_output) { + jsonw_start_object(w); + jsonw_string_field(w, "name", name); + jsonw_uint_field(w, "type_id", m->type); + jsonw_uint_field(w, "bits_offset", bit_off); + if (bit_sz) { + jsonw_uint_field(w, "bitfield_size", + bit_sz); + } + jsonw_end_object(w); + } else { + printf("\n\t'%s' type_id=%u bits_offset=%u", + name, m->type, bit_off); + if (bit_sz) + printf(" bitfield_size=%u", bit_sz); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + case BTF_KIND_ENUM: { + const struct btf_enum *v = (const void *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + int i; + + if (json_output) { + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "values"); + jsonw_start_array(w); + } else { + printf(" size=%u vlen=%u", t->size, vlen); + } + for (i = 0; i < vlen; i++, v++) { + const char *name = btf_str(btf, v->name_off); + + if (json_output) { + jsonw_start_object(w); + jsonw_string_field(w, "name", name); + jsonw_uint_field(w, "val", v->val); + jsonw_end_object(w); + } else { + printf("\n\t'%s' val=%u", name, v->val); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + case BTF_KIND_FWD: { + const char *fwd_kind = BTF_INFO_KIND(t->info) ? "union" + : "struct"; + + if (json_output) + jsonw_string_field(w, "fwd_kind", fwd_kind); + else + printf(" fwd_kind=%s", fwd_kind); + break; + } + case BTF_KIND_FUNC: + if (json_output) + jsonw_uint_field(w, "type_id", t->type); + else + printf(" type_id=%u", t->type); + break; + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = (const void *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + int i; + + if (json_output) { + jsonw_uint_field(w, "ret_type_id", t->type); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "params"); + jsonw_start_array(w); + } else { + printf(" ret_type_id=%u vlen=%u", t->type, vlen); + } + for (i = 0; i < vlen; i++, p++) { + const char *name = btf_str(btf, p->name_off); + + if (json_output) { + jsonw_start_object(w); + jsonw_string_field(w, "name", name); + jsonw_uint_field(w, "type_id", p->type); + jsonw_end_object(w); + } else { + printf("\n\t'%s' type_id=%u", name, p->type); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + case BTF_KIND_VAR: { + const struct btf_var *v = (const void *)(t + 1); + const char *linkage; + + linkage = btf_var_linkage_str(v->linkage); + + if (json_output) { + jsonw_uint_field(w, "type_id", t->type); + jsonw_string_field(w, "linkage", linkage); + } else { + printf(" type_id=%u, linkage=%s", t->type, linkage); + } + break; + } + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *v = (const void *)(t+1); + __u16 vlen = BTF_INFO_VLEN(t->info); + int i; + + if (json_output) { + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "vars"); + jsonw_start_array(w); + } else { + printf(" size=%u vlen=%u", t->size, vlen); + } + for (i = 0; i < vlen; i++, v++) { + if (json_output) { + jsonw_start_object(w); + jsonw_uint_field(w, "type_id", v->type); + jsonw_uint_field(w, "offset", v->offset); + jsonw_uint_field(w, "size", v->size); + jsonw_end_object(w); + } else { + printf("\n\ttype_id=%u offset=%u size=%u", + v->type, v->offset, v->size); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + default: + break; + } + + if (json_output) + jsonw_end_object(json_wtr); + else + printf("\n"); + + return 0; +} + +static int dump_btf_raw(const struct btf *btf, + __u32 *root_type_ids, int root_type_cnt) +{ + const struct btf_type *t; + int i; + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "types"); + jsonw_start_array(json_wtr); + } + + if (root_type_cnt) { + for (i = 0; i < root_type_cnt; i++) { + t = btf__type_by_id(btf, root_type_ids[i]); + dump_btf_type(btf, root_type_ids[i], t); + } + } else { + int cnt = btf__get_nr_types(btf); + + for (i = 1; i <= cnt; i++) { + t = btf__type_by_id(btf, i); + dump_btf_type(btf, i, t); + } + } + + if (json_output) { + jsonw_end_array(json_wtr); + jsonw_end_object(json_wtr); + } + return 0; +} + +static bool check_btf_endianness(GElf_Ehdr *ehdr) +{ + static unsigned int const endian = 1; + + switch (ehdr->e_ident[EI_DATA]) { + case ELFDATA2LSB: + return *(unsigned char const *)&endian == 1; + case ELFDATA2MSB: + return *(unsigned char const *)&endian == 0; + default: + return 0; + } +} + +static int btf_load_from_elf(const char *path, struct btf **btf) +{ + int err = -1, fd = -1, idx = 0; + Elf_Data *btf_data = NULL; + Elf_Scn *scn = NULL; + Elf *elf = NULL; + GElf_Ehdr ehdr; + + if (elf_version(EV_CURRENT) == EV_NONE) { + p_err("failed to init libelf for %s", path); + return -1; + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + p_err("failed to open %s: %s", path, strerror(errno)); + return -1; + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (!elf) { + p_err("failed to open %s as ELF file", path); + goto done; + } + if (!gelf_getehdr(elf, &ehdr)) { + p_err("failed to get EHDR from %s", path); + goto done; + } + if (!check_btf_endianness(&ehdr)) { + p_err("non-native ELF endianness is not supported"); + goto done; + } + if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { + p_err("failed to get e_shstrndx from %s\n", path); + goto done; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + char *name; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + p_err("failed to get section(%d) header from %s", + idx, path); + goto done; + } + name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); + if (!name) { + p_err("failed to get section(%d) name from %s", + idx, path); + goto done; + } + if (strcmp(name, BTF_ELF_SEC) == 0) { + btf_data = elf_getdata(scn, 0); + if (!btf_data) { + p_err("failed to get section(%d, %s) data from %s", + idx, name, path); + goto done; + } + break; + } + } + + if (!btf_data) { + p_err("%s ELF section not found in %s", BTF_ELF_SEC, path); + goto done; + } + + *btf = btf__new(btf_data->d_buf, btf_data->d_size); + if (IS_ERR(*btf)) { + err = PTR_ERR(*btf); + *btf = NULL; + p_err("failed to load BTF data from %s: %s", + path, strerror(err)); + goto done; + } + + err = 0; +done: + if (err) { + if (*btf) { + btf__free(*btf); + *btf = NULL; + } + } + if (elf) + elf_end(elf); + close(fd); + return err; +} + +static int do_dump(int argc, char **argv) +{ + struct btf *btf = NULL; + __u32 root_type_ids[2]; + int root_type_cnt = 0; + __u32 btf_id = -1; + const char *src; + int fd = -1; + int err; + + if (!REQ_ARGS(2)) { + usage(); + return -1; + } + src = GET_ARG(); + + if (is_prefix(src, "map")) { + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + if (!REQ_ARGS(2)) { + usage(); + return -1; + } + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + btf_id = info.btf_id; + if (argc && is_prefix(*argv, "key")) { + root_type_ids[root_type_cnt++] = info.btf_key_type_id; + NEXT_ARG(); + } else if (argc && is_prefix(*argv, "value")) { + root_type_ids[root_type_cnt++] = info.btf_value_type_id; + NEXT_ARG(); + } else if (argc && is_prefix(*argv, "all")) { + NEXT_ARG(); + } else if (argc && is_prefix(*argv, "kv")) { + root_type_ids[root_type_cnt++] = info.btf_key_type_id; + root_type_ids[root_type_cnt++] = info.btf_value_type_id; + NEXT_ARG(); + } else { + root_type_ids[root_type_cnt++] = info.btf_key_type_id; + root_type_ids[root_type_cnt++] = info.btf_value_type_id; + } + } else if (is_prefix(src, "prog")) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + + if (!REQ_ARGS(2)) { + usage(); + return -1; + } + + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get prog info: %s", strerror(errno)); + goto done; + } + + btf_id = info.btf_id; + } else if (is_prefix(src, "id")) { + char *endptr; + + btf_id = strtoul(*argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as ID", **argv); + return -1; + } + NEXT_ARG(); + } else if (is_prefix(src, "file")) { + err = btf_load_from_elf(*argv, &btf); + if (err) + goto done; + NEXT_ARG(); + } else { + err = -1; + p_err("unrecognized BTF source specifier: '%s'", src); + goto done; + } + + if (!btf) { + err = btf__get_from_id(btf_id, &btf); + if (err) { + p_err("get btf by id (%u): %s", btf_id, strerror(err)); + goto done; + } + if (!btf) { + err = ENOENT; + p_err("can't find btf with ID (%u)", btf_id); + goto done; + } + } + + dump_btf_raw(btf, root_type_ids, root_type_cnt); + +done: + close(fd); + btf__free(btf); + return err; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s btf dump BTF_SRC\n" + " %s btf help\n" + "\n" + " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" + " " HELP_SPEC_MAP "\n" + " " HELP_SPEC_PROGRAM "\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name, bin_name); + + return 0; +} + +static const struct cmd cmds[] = { + { "help", do_help }, + { "dump", do_dump }, + { 0 } +}; + +int do_btf(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index e63bce0755eb..8cafb9b31467 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -309,6 +309,48 @@ static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id, return ret; } +static int btf_dumper_var(const struct btf_dumper *d, __u32 type_id, + __u8 bit_offset, const void *data) +{ + const struct btf_type *t = btf__type_by_id(d->btf, type_id); + int ret; + + jsonw_start_object(d->jw); + jsonw_name(d->jw, btf__name_by_offset(d->btf, t->name_off)); + ret = btf_dumper_do_type(d, t->type, bit_offset, data); + jsonw_end_object(d->jw); + + return ret; +} + +static int btf_dumper_datasec(const struct btf_dumper *d, __u32 type_id, + const void *data) +{ + struct btf_var_secinfo *vsi; + const struct btf_type *t; + int ret = 0, i, vlen; + + t = btf__type_by_id(d->btf, type_id); + if (!t) + return -EINVAL; + + vlen = BTF_INFO_VLEN(t->info); + vsi = (struct btf_var_secinfo *)(t + 1); + + jsonw_start_object(d->jw); + jsonw_name(d->jw, btf__name_by_offset(d->btf, t->name_off)); + jsonw_start_array(d->jw); + for (i = 0; i < vlen; i++) { + ret = btf_dumper_do_type(d, vsi[i].type, 0, data + vsi[i].offset); + if (ret) + break; + } + jsonw_end_array(d->jw); + jsonw_end_object(d->jw); + + return ret; +} + static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, __u8 bit_offset, const void *data) { @@ -341,6 +383,10 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, case BTF_KIND_CONST: case BTF_KIND_RESTRICT: return btf_dumper_modifier(d, type_id, bit_offset, data); + case BTF_KIND_VAR: + return btf_dumper_var(d, type_id, bit_offset, data); + case BTF_KIND_DATASEC: + return btf_dumper_datasec(d, type_id, data); default: jsonw_printf(d->jw, "(unsupported-kind"); return -EINVAL; @@ -377,6 +423,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, { const struct btf_type *proto_type; const struct btf_array *array; + const struct btf_var *var; const struct btf_type *t; if (!type_id) { @@ -440,6 +487,18 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, if (pos == -1) return -1; break; + case BTF_KIND_VAR: + var = (struct btf_var *)(t + 1); + if (var->linkage == BTF_VAR_STATIC) + BTF_PRINT_ARG("static "); + BTF_PRINT_TYPE(t->type); + BTF_PRINT_ARG(" %s", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_DATASEC: + BTF_PRINT_ARG("section (\"%s\") ", + btf__name_by_offset(btf, t->name_off)); + break; case BTF_KIND_UNKN: default: return -1; diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 4b5c8da2a7c0..7e22f115c8c1 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -25,7 +25,7 @@ " ATTACH_TYPE := { ingress | egress | sock_create |\n" \ " sock_ops | device | bind4 | bind6 |\n" \ " post_bind4 | post_bind6 | connect4 |\n" \ - " connect6 | sendmsg4 | sendmsg6 }" + " connect6 | sendmsg4 | sendmsg6 | sysctl }" static const char * const attach_type_strings[] = { [BPF_CGROUP_INET_INGRESS] = "ingress", @@ -41,6 +41,7 @@ static const char * const attach_type_strings[] = { [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", + [BPF_CGROUP_SYSCTL] = "sysctl", [__MAX_BPF_ATTACH_TYPE] = NULL, }; @@ -248,6 +249,13 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) show_attached_bpf_progs(cgroup_fd, type, ftw->level); + if (errno == EINVAL) + /* Last attach type does not support query. + * Do not report an error for this, especially because batch + * mode would stop processing commands. + */ + errno = 0; + if (json_output) { jsonw_end_array(json_wtr); jsonw_end_object(json_wtr); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index a9d5e9e6a732..1ac1fc520e6a 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -56,7 +56,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf | net | feature }\n" + " OBJECT := { prog | map | cgroup | perf | net | feature | btf }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -188,6 +188,7 @@ static const struct cmd cmds[] = { { "perf", do_perf }, { "net", do_net }, { "feature", do_feature }, + { "btf", do_btf }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index d7dd84d3c660..3d63feb7f852 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -73,6 +73,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", + [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", }; extern const char * const map_type_name[]; @@ -149,6 +150,7 @@ int do_perf(int argc, char **arg); int do_net(int argc, char **arg); int do_tracelog(int argc, char **arg); int do_feature(int argc, char **argv); +int do_btf(int argc, char **argv); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e0c650d91784..e951d45c0131 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -46,6 +46,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", [BPF_MAP_TYPE_QUEUE] = "queue", [BPF_MAP_TYPE_STACK] = "stack", + [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); @@ -153,11 +154,13 @@ static int do_dump_btf(const struct btf_dumper *d, /* start of key-value pair */ jsonw_start_object(d->jw); - jsonw_name(d->jw, "key"); + if (map_info->btf_key_type_id) { + jsonw_name(d->jw, "key"); - ret = btf_dumper_type(d, map_info->btf_key_type_id, key); - if (ret) - goto err_end_obj; + ret = btf_dumper_type(d, map_info->btf_key_type_id, key); + if (ret) + goto err_end_obj; + } if (!map_is_per_cpu(map_info->type)) { jsonw_name(d->jw, "value"); @@ -259,20 +262,20 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, } static void print_entry_error(struct bpf_map_info *info, unsigned char *key, - const char *value) + const char *error_msg) { - int value_size = strlen(value); + int msg_size = strlen(error_msg); bool single_line, break_names; - break_names = info->key_size > 16 || value_size > 16; - single_line = info->key_size + value_size <= 24 && !break_names; + break_names = info->key_size > 16 || msg_size > 16; + single_line = info->key_size + msg_size <= 24 && !break_names; printf("key:%c", break_names ? '\n' : ' '); fprint_hex(stdout, key, info->key_size, " "); printf(single_line ? " " : "\n"); - printf("value:%c%s", break_names ? '\n' : ' ', value); + printf("value:%c%s", break_names ? '\n' : ' ', error_msg); printf("\n"); } @@ -296,11 +299,7 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, if (info->value_size) { printf("value:%c", break_names ? '\n' : ' '); - if (value) - fprint_hex(stdout, value, info->value_size, - " "); - else - printf("<no entry>"); + fprint_hex(stdout, value, info->value_size, " "); } printf("\n"); @@ -319,11 +318,8 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, for (i = 0; i < n; i++) { printf("value (CPU %02d):%c", i, info->value_size > 16 ? '\n' : ' '); - if (value) - fprint_hex(stdout, value + i * step, - info->value_size, " "); - else - printf("<no entry>"); + fprint_hex(stdout, value + i * step, + info->value_size, " "); printf("\n"); } } @@ -536,6 +532,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) } close(fd); + if (info->btf_id) + jsonw_int_field(json_wtr, "btf_id", info->btf_id); + if (!hash_empty(map_table.table)) { struct pinned_obj *obj; @@ -602,15 +601,19 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) } close(fd); - printf("\n"); if (!hash_empty(map_table.table)) { struct pinned_obj *obj; hash_for_each_possible(map_table.table, obj, hash, info->id) { if (obj->id == info->id) - printf("\tpinned %s\n", obj->path); + printf("\n\tpinned %s", obj->path); } } + + if (info->btf_id) + printf("\n\tbtf_id %d", info->btf_id); + + printf("\n"); return 0; } @@ -720,11 +723,16 @@ static int dump_map_elem(int fd, void *key, void *value, jsonw_string_field(json_wtr, "error", strerror(lookup_errno)); jsonw_end_object(json_wtr); } else { - if (errno == ENOENT) - print_entry_plain(map_info, key, NULL); - else - print_entry_error(map_info, key, - strerror(lookup_errno)); + const char *msg = NULL; + + if (lookup_errno == ENOENT) + msg = "<no entry>"; + else if (lookup_errno == ENOSPC && + map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) + msg = "<cannot read>"; + + print_entry_error(map_info, key, + msg ? : strerror(lookup_errno)); } return 0; @@ -778,6 +786,10 @@ static int do_dump(int argc, char **argv) } } + if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && + info.value_size != 8) + p_info("Warning: cannot read values from %s map with value_size != 8", + map_type_name[info.type]); while (true) { err = bpf_map_get_next_key(fd, prev_key, key); if (err) { diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index db0e7de49d49..67e99c56bc88 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE #include <errno.h> +#include <fcntl.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -12,6 +13,8 @@ #include <linux/rtnetlink.h> #include <linux/tc_act/tc_bpf.h> #include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> #include <bpf.h> #include <nlattr.h> @@ -48,6 +51,10 @@ struct bpf_filter_t { int ifindex; }; +struct bpf_attach_info { + __u32 flow_dissector_id; +}; + static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_netdev_t *netinfo = cookie; @@ -180,8 +187,45 @@ out: return 0; } +static int query_flow_dissector(struct bpf_attach_info *attach_info) +{ + __u32 attach_flags; + __u32 prog_ids[1]; + __u32 prog_cnt; + int err; + int fd; + + fd = open("/proc/self/ns/net", O_RDONLY); + if (fd < 0) { + p_err("can't open /proc/self/ns/net: %d", + strerror(errno)); + return -1; + } + prog_cnt = ARRAY_SIZE(prog_ids); + err = bpf_prog_query(fd, BPF_FLOW_DISSECTOR, 0, + &attach_flags, prog_ids, &prog_cnt); + close(fd); + if (err) { + if (errno == EINVAL) { + /* Older kernel's don't support querying + * flow dissector programs. + */ + errno = 0; + return 0; + } + p_err("can't query prog: %s", strerror(errno)); + return -1; + } + + if (prog_cnt == 1) + attach_info->flow_dissector_id = prog_ids[0]; + + return 0; +} + static int do_show(int argc, char **argv) { + struct bpf_attach_info attach_info = {}; int i, sock, ret, filter_idx = -1; struct bpf_netdev_t dev_array; unsigned int nl_pid; @@ -199,6 +243,10 @@ static int do_show(int argc, char **argv) usage(); } + ret = query_flow_dissector(&attach_info); + if (ret) + return -1; + sock = libbpf_netlink_open(&nl_pid); if (sock < 0) { fprintf(stderr, "failed to open netlink sock\n"); @@ -227,6 +275,12 @@ static int do_show(int argc, char **argv) } NET_END_ARRAY("\n"); } + + NET_START_ARRAY("flow_dissector", "%s:\n"); + if (attach_info.flow_dissector_id > 0) + NET_DUMP_UINT("id", "id %u", attach_info.flow_dissector_id); + NET_END_ARRAY("\n"); + NET_END_OBJECT; if (json_output) jsonw_end_array(json_wtr); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 8ef80d65a474..fc495b27f0fc 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -249,6 +249,9 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); + if (info->btf_id) + jsonw_int_field(json_wtr, "btf_id", info->btf_id); + if (!hash_empty(prog_table.table)) { struct pinned_obj *obj; @@ -319,6 +322,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) } } + if (info->btf_id) + printf("\n\tbtf_id %d", info->btf_id); + printf("\n"); } @@ -401,41 +407,31 @@ static int do_show(int argc, char **argv) static int do_dump(int argc, char **argv) { - unsigned int finfo_rec_size, linfo_rec_size, jited_linfo_rec_size; - void *func_info = NULL, *linfo = NULL, *jited_linfo = NULL; - unsigned int nr_finfo, nr_linfo = 0, nr_jited_linfo = 0; + struct bpf_prog_info_linear *info_linear; struct bpf_prog_linfo *prog_linfo = NULL; - unsigned long *func_ksyms = NULL; - struct bpf_prog_info info = {}; - unsigned int *func_lens = NULL; + enum {DUMP_JITED, DUMP_XLATED} mode; const char *disasm_opt = NULL; - unsigned int nr_func_ksyms; - unsigned int nr_func_lens; + struct bpf_prog_info *info; struct dump_data dd = {}; - __u32 len = sizeof(info); + void *func_info = NULL; struct btf *btf = NULL; - unsigned int buf_size; char *filepath = NULL; bool opcodes = false; bool visual = false; char func_sig[1024]; unsigned char *buf; bool linum = false; - __u32 *member_len; - __u64 *member_ptr; + __u32 member_len; + __u64 arrays; ssize_t n; - int err; int fd; if (is_prefix(*argv, "jited")) { if (disasm_init()) return -1; - - member_len = &info.jited_prog_len; - member_ptr = &info.jited_prog_insns; + mode = DUMP_JITED; } else if (is_prefix(*argv, "xlated")) { - member_len = &info.xlated_prog_len; - member_ptr = &info.xlated_prog_insns; + mode = DUMP_XLATED; } else { p_err("expected 'xlated' or 'jited', got: %s", *argv); return -1; @@ -474,175 +470,50 @@ static int do_dump(int argc, char **argv) return -1; } - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { - p_err("can't get prog info: %s", strerror(errno)); - return -1; - } - - if (!*member_len) { - p_info("no instructions returned"); - close(fd); - return 0; - } + if (mode == DUMP_JITED) + arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; + else + arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; - buf_size = *member_len; + arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; - buf = malloc(buf_size); - if (!buf) { - p_err("mem alloc failed"); - close(fd); + info_linear = bpf_program__get_prog_info_linear(fd, arrays); + close(fd); + if (IS_ERR_OR_NULL(info_linear)) { + p_err("can't get prog info: %s", strerror(errno)); return -1; } - nr_func_ksyms = info.nr_jited_ksyms; - if (nr_func_ksyms) { - func_ksyms = malloc(nr_func_ksyms * sizeof(__u64)); - if (!func_ksyms) { - p_err("mem alloc failed"); - close(fd); + info = &info_linear->info; + if (mode == DUMP_JITED) { + if (info->jited_prog_len == 0) { + p_info("no instructions returned"); goto err_free; } - } - - nr_func_lens = info.nr_jited_func_lens; - if (nr_func_lens) { - func_lens = malloc(nr_func_lens * sizeof(__u32)); - if (!func_lens) { - p_err("mem alloc failed"); - close(fd); - goto err_free; - } - } - - nr_finfo = info.nr_func_info; - finfo_rec_size = info.func_info_rec_size; - if (nr_finfo && finfo_rec_size) { - func_info = malloc(nr_finfo * finfo_rec_size); - if (!func_info) { - p_err("mem alloc failed"); - close(fd); + buf = (unsigned char *)(info->jited_prog_insns); + member_len = info->jited_prog_len; + } else { /* DUMP_XLATED */ + if (info->xlated_prog_len == 0) { + p_err("error retrieving insn dump: kernel.kptr_restrict set?"); goto err_free; } + buf = (unsigned char *)info->xlated_prog_insns; + member_len = info->xlated_prog_len; } - linfo_rec_size = info.line_info_rec_size; - if (info.nr_line_info && linfo_rec_size && info.btf_id) { - nr_linfo = info.nr_line_info; - linfo = malloc(nr_linfo * linfo_rec_size); - if (!linfo) { - p_err("mem alloc failed"); - close(fd); - goto err_free; - } - } - - jited_linfo_rec_size = info.jited_line_info_rec_size; - if (info.nr_jited_line_info && - jited_linfo_rec_size && - info.nr_jited_ksyms && - info.nr_jited_func_lens && - info.btf_id) { - nr_jited_linfo = info.nr_jited_line_info; - jited_linfo = malloc(nr_jited_linfo * jited_linfo_rec_size); - if (!jited_linfo) { - p_err("mem alloc failed"); - close(fd); - goto err_free; - } - } - - memset(&info, 0, sizeof(info)); - - *member_ptr = ptr_to_u64(buf); - *member_len = buf_size; - info.jited_ksyms = ptr_to_u64(func_ksyms); - info.nr_jited_ksyms = nr_func_ksyms; - info.jited_func_lens = ptr_to_u64(func_lens); - info.nr_jited_func_lens = nr_func_lens; - info.nr_func_info = nr_finfo; - info.func_info_rec_size = finfo_rec_size; - info.func_info = ptr_to_u64(func_info); - info.nr_line_info = nr_linfo; - info.line_info_rec_size = linfo_rec_size; - info.line_info = ptr_to_u64(linfo); - info.nr_jited_line_info = nr_jited_linfo; - info.jited_line_info_rec_size = jited_linfo_rec_size; - info.jited_line_info = ptr_to_u64(jited_linfo); - - err = bpf_obj_get_info_by_fd(fd, &info, &len); - close(fd); - if (err) { - p_err("can't get prog info: %s", strerror(errno)); - goto err_free; - } - - if (*member_len > buf_size) { - p_err("too many instructions returned"); - goto err_free; - } - - if (info.nr_jited_ksyms > nr_func_ksyms) { - p_err("too many addresses returned"); - goto err_free; - } - - if (info.nr_jited_func_lens > nr_func_lens) { - p_err("too many values returned"); - goto err_free; - } - - if (info.nr_func_info != nr_finfo) { - p_err("incorrect nr_func_info %d vs. expected %d", - info.nr_func_info, nr_finfo); - goto err_free; - } - - if (info.func_info_rec_size != finfo_rec_size) { - p_err("incorrect func_info_rec_size %d vs. expected %d", - info.func_info_rec_size, finfo_rec_size); - goto err_free; - } - - if (linfo && info.nr_line_info != nr_linfo) { - p_err("incorrect nr_line_info %u vs. expected %u", - info.nr_line_info, nr_linfo); - goto err_free; - } - - if (info.line_info_rec_size != linfo_rec_size) { - p_err("incorrect line_info_rec_size %u vs. expected %u", - info.line_info_rec_size, linfo_rec_size); - goto err_free; - } - - if (jited_linfo && info.nr_jited_line_info != nr_jited_linfo) { - p_err("incorrect nr_jited_line_info %u vs. expected %u", - info.nr_jited_line_info, nr_jited_linfo); - goto err_free; - } - - if (info.jited_line_info_rec_size != jited_linfo_rec_size) { - p_err("incorrect jited_line_info_rec_size %u vs. expected %u", - info.jited_line_info_rec_size, jited_linfo_rec_size); - goto err_free; - } - - if ((member_len == &info.jited_prog_len && - info.jited_prog_insns == 0) || - (member_len == &info.xlated_prog_len && - info.xlated_prog_insns == 0)) { - p_err("error retrieving insn dump: kernel.kptr_restrict set?"); - goto err_free; - } - - if (info.btf_id && btf__get_from_id(info.btf_id, &btf)) { + if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) { p_err("failed to get btf"); goto err_free; } - if (nr_linfo) { - prog_linfo = bpf_prog_linfo__new(&info); + func_info = (void *)info->func_info; + + if (info->nr_line_info) { + prog_linfo = bpf_prog_linfo__new(info); if (!prog_linfo) p_info("error in processing bpf_line_info. continue without it."); } @@ -655,9 +526,9 @@ static int do_dump(int argc, char **argv) goto err_free; } - n = write(fd, buf, *member_len); + n = write(fd, buf, member_len); close(fd); - if (n != *member_len) { + if (n != member_len) { p_err("error writing output file: %s", n < 0 ? strerror(errno) : "short write"); goto err_free; @@ -665,19 +536,19 @@ static int do_dump(int argc, char **argv) if (json_output) jsonw_null(json_wtr); - } else if (member_len == &info.jited_prog_len) { + } else if (mode == DUMP_JITED) { const char *name = NULL; - if (info.ifindex) { - name = ifindex_to_bfd_params(info.ifindex, - info.netns_dev, - info.netns_ino, + if (info->ifindex) { + name = ifindex_to_bfd_params(info->ifindex, + info->netns_dev, + info->netns_ino, &disasm_opt); if (!name) goto err_free; } - if (info.nr_jited_func_lens && info.jited_func_lens) { + if (info->nr_jited_func_lens && info->jited_func_lens) { struct kernel_sym *sym = NULL; struct bpf_func_info *record; char sym_name[SYM_MAX_NAME]; @@ -685,17 +556,16 @@ static int do_dump(int argc, char **argv) __u64 *ksyms = NULL; __u32 *lens; __u32 i; - - if (info.nr_jited_ksyms) { + if (info->nr_jited_ksyms) { kernel_syms_load(&dd); - ksyms = (__u64 *) info.jited_ksyms; + ksyms = (__u64 *) info->jited_ksyms; } if (json_output) jsonw_start_array(json_wtr); - lens = (__u32 *) info.jited_func_lens; - for (i = 0; i < info.nr_jited_func_lens; i++) { + lens = (__u32 *) info->jited_func_lens; + for (i = 0; i < info->nr_jited_func_lens; i++) { if (ksyms) { sym = kernel_syms_search(&dd, ksyms[i]); if (sym) @@ -707,7 +577,7 @@ static int do_dump(int argc, char **argv) } if (func_info) { - record = func_info + i * finfo_rec_size; + record = func_info + i * info->func_info_rec_size; btf_dumper_type_only(btf, record->type_id, func_sig, sizeof(func_sig)); @@ -744,49 +614,37 @@ static int do_dump(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); } else { - disasm_print_insn(buf, *member_len, opcodes, name, + disasm_print_insn(buf, member_len, opcodes, name, disasm_opt, btf, NULL, 0, 0, false); } } else if (visual) { if (json_output) jsonw_null(json_wtr); else - dump_xlated_cfg(buf, *member_len); + dump_xlated_cfg(buf, member_len); } else { kernel_syms_load(&dd); - dd.nr_jited_ksyms = info.nr_jited_ksyms; - dd.jited_ksyms = (__u64 *) info.jited_ksyms; + dd.nr_jited_ksyms = info->nr_jited_ksyms; + dd.jited_ksyms = (__u64 *) info->jited_ksyms; dd.btf = btf; dd.func_info = func_info; - dd.finfo_rec_size = finfo_rec_size; + dd.finfo_rec_size = info->func_info_rec_size; dd.prog_linfo = prog_linfo; if (json_output) - dump_xlated_json(&dd, buf, *member_len, opcodes, + dump_xlated_json(&dd, buf, member_len, opcodes, linum); else - dump_xlated_plain(&dd, buf, *member_len, opcodes, + dump_xlated_plain(&dd, buf, member_len, opcodes, linum); kernel_syms_destroy(&dd); } - free(buf); - free(func_ksyms); - free(func_lens); - free(func_info); - free(linfo); - free(jited_linfo); - bpf_prog_linfo__free(prog_linfo); + free(info_linear); return 0; err_free: - free(buf); - free(func_ksyms); - free(func_lens); - free(func_info); - free(linfo); - free(jited_linfo); - bpf_prog_linfo__free(prog_linfo); + free(info_linear); return -1; } @@ -1202,7 +1060,7 @@ static int do_help(int argc, char **argv) " tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n" " cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n" " lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n" - " sk_reuseport | flow_dissector |\n" + " sk_reuseport | flow_dissector | cgroup/sysctl |\n" " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" " cgroup/sendmsg4 | cgroup/sendmsg6 }\n" diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 7073dbe1ff27..0bb17bf88b18 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -195,6 +195,9 @@ static const char *print_imm(void *private_data, if (insn->src_reg == BPF_PSEUDO_MAP_FD) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "map[id:%u]", insn->imm); + else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm); else snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "0x%llx", (unsigned long long)full_imm); diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 61e46d54a67c..8d3864b061f3 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -66,7 +66,8 @@ FEATURE_TESTS_BASIC := \ sched_getcpu \ sdt \ setns \ - libaio + libaio \ + disassembler-four-args # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests @@ -118,7 +119,8 @@ FEATURE_DISPLAY ?= \ lzma \ get_cpuid \ bpf \ - libaio + libaio \ + disassembler-four-args # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index e903b86b742f..7853e6d91090 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -178,6 +178,10 @@ # include "test-reallocarray.c" #undef main +#define main main_test_disassembler_four_args +# include "test-disassembler-four-args.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -219,6 +223,7 @@ int main(int argc, char *argv[]) main_test_setns(); main_test_libaio(); main_test_reallocarray(); + main_test_disassembler_four_args(); return 0; } diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index d68eb4fb40cc..2b0e02c38870 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0)) +#define OCSD_MIN_VER ((0 << 16) | (11 << 8) | (0)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 0.10.0 is required" +#error "OpenCSD >= 0.11.0 is required" #endif int main(void) diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index cce0b02c0e28..ca28b6ab8db7 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -278,10 +278,29 @@ .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) +#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF1, \ + .imm = IMM1 }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF2, \ + .imm = IMM2 }) + /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ - BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) + BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0, \ + MAP_FD, 0) + +#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF) \ + BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0, \ + MAP_FD, VALUE_OFF) /* Relative call */ diff --git a/tools/include/uapi/asm-generic/mman-common-tools.h b/tools/include/uapi/asm-generic/mman-common-tools.h new file mode 100644 index 000000000000..af7d0d3a3182 --- /dev/null +++ b/tools/include/uapi/asm-generic/mman-common-tools.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H +#define __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H + +#include <asm-generic/mman-common.h> + +/* We need this because we need to have tools/include/uapi/ included in the tools + * header search path to get access to stuff that is not yet in the system's + * copy of the files in that directory, but since this cset: + * + * 746c9398f5ac ("arch: move common mmap flags to linux/mman.h") + * + * We end up making sys/mman.h, that is in the system headers, to not find the + * MAP_SHARED and MAP_PRIVATE defines because they are not anymore in our copy + * of asm-generic/mman-common.h. So we define them here and include this header + * from each of the per arch mman.h headers. + */ +#ifndef MAP_SHARED +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#endif +#endif // __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index e7ee32861d51..abd238d0f7a4 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -15,9 +15,7 @@ #define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ #define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +/* 0x01 - 0x03 are defined in linux/mman.h */ #define MAP_TYPE 0x0f /* Mask for type of mapping */ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h index 653687d9771b..36c197fc44a0 100644 --- a/tools/include/uapi/asm-generic/mman.h +++ b/tools/include/uapi/asm-generic/mman.h @@ -2,7 +2,7 @@ #ifndef __ASM_GENERIC_MMAN_H #define __ASM_GENERIC_MMAN_H -#include <asm-generic/mman-common.h> +#include <asm-generic/mman-common-tools.h> #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index d90127298f12..dee7292e1df6 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -38,8 +38,10 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy) __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit) #define __NR_io_cancel 3 __SYSCALL(__NR_io_cancel, sys_io_cancel) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_io_getevents 4 -__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents) +__SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents) +#endif /* fs/xattr.c */ #define __NR_setxattr 5 @@ -179,7 +181,7 @@ __SYSCALL(__NR_fchownat, sys_fchownat) #define __NR_fchown 55 __SYSCALL(__NR_fchown, sys_fchown) #define __NR_openat 56 -__SC_COMP(__NR_openat, sys_openat, compat_sys_openat) +__SYSCALL(__NR_openat, sys_openat) #define __NR_close 57 __SYSCALL(__NR_close, sys_close) #define __NR_vhangup 58 @@ -222,10 +224,12 @@ __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev) __SYSCALL(__NR3264_sendfile, sys_sendfile64) /* fs/select.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_pselect6 72 -__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6) +__SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32) #define __NR_ppoll 73 -__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll) +__SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32) +#endif /* fs/signalfd.c */ #define __NR_signalfd4 74 @@ -269,16 +273,20 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \ /* fs/timerfd.c */ #define __NR_timerfd_create 85 __SYSCALL(__NR_timerfd_create, sys_timerfd_create) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timerfd_settime 86 -__SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \ - compat_sys_timerfd_settime) +__SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \ + sys_timerfd_settime) #define __NR_timerfd_gettime 87 -__SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \ - compat_sys_timerfd_gettime) +__SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \ + sys_timerfd_gettime) +#endif /* fs/utimes.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_utimensat 88 -__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat) +__SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat) +#endif /* kernel/acct.c */ #define __NR_acct 89 @@ -309,8 +317,10 @@ __SYSCALL(__NR_set_tid_address, sys_set_tid_address) __SYSCALL(__NR_unshare, sys_unshare) /* kernel/futex.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_futex 98 -__SC_COMP(__NR_futex, sys_futex, compat_sys_futex) +__SC_3264(__NR_futex, sys_futex_time32, sys_futex) +#endif #define __NR_set_robust_list 99 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \ compat_sys_set_robust_list) @@ -319,8 +329,10 @@ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \ compat_sys_get_robust_list) /* kernel/hrtimer.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_nanosleep 101 -__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep) +__SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep) +#endif /* kernel/itimer.c */ #define __NR_getitimer 102 @@ -341,23 +353,29 @@ __SYSCALL(__NR_delete_module, sys_delete_module) /* kernel/posix-timers.c */ #define __NR_timer_create 107 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timer_gettime 108 -__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime) +__SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime) +#endif #define __NR_timer_getoverrun 109 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timer_settime 110 -__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime) +__SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime) +#endif #define __NR_timer_delete 111 __SYSCALL(__NR_timer_delete, sys_timer_delete) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_clock_settime 112 -__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime) +__SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime) #define __NR_clock_gettime 113 -__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime) +__SC_3264(__NR_clock_gettime, sys_clock_gettime32, sys_clock_gettime) #define __NR_clock_getres 114 -__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres) +__SC_3264(__NR_clock_getres, sys_clock_getres_time32, sys_clock_getres) #define __NR_clock_nanosleep 115 -__SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \ - compat_sys_clock_nanosleep) +__SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \ + sys_clock_nanosleep) +#endif /* kernel/printk.c */ #define __NR_syslog 116 @@ -388,9 +406,11 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield) __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) #define __NR_sched_get_priority_min 126 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_sched_rr_get_interval 127 -__SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \ - compat_sys_sched_rr_get_interval) +__SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \ + sys_sched_rr_get_interval) +#endif /* kernel/signal.c */ #define __NR_restart_syscall 128 @@ -411,9 +431,11 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction) __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask) #define __NR_rt_sigpending 136 __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_rt_sigtimedwait 137 -__SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \ - compat_sys_rt_sigtimedwait) +__SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \ + sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32) +#endif #define __NR_rt_sigqueueinfo 138 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \ compat_sys_rt_sigqueueinfo) @@ -467,10 +489,15 @@ __SYSCALL(__NR_uname, sys_newuname) __SYSCALL(__NR_sethostname, sys_sethostname) #define __NR_setdomainname 162 __SYSCALL(__NR_setdomainname, sys_setdomainname) + +#ifdef __ARCH_WANT_SET_GET_RLIMIT +/* getrlimit and setrlimit are superseded with prlimit64 */ #define __NR_getrlimit 163 __SC_COMP(__NR_getrlimit, sys_getrlimit, compat_sys_getrlimit) #define __NR_setrlimit 164 __SC_COMP(__NR_setrlimit, sys_setrlimit, compat_sys_setrlimit) +#endif + #define __NR_getrusage 165 __SC_COMP(__NR_getrusage, sys_getrusage, compat_sys_getrusage) #define __NR_umask 166 @@ -481,12 +508,14 @@ __SYSCALL(__NR_prctl, sys_prctl) __SYSCALL(__NR_getcpu, sys_getcpu) /* kernel/time.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_gettimeofday 169 __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday) #define __NR_settimeofday 170 __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday) #define __NR_adjtimex 171 -__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex) +__SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex) +#endif /* kernel/timer.c */ #define __NR_getpid 172 @@ -511,11 +540,13 @@ __SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo) __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 181 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_mq_timedsend 182 -__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend) +__SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend) #define __NR_mq_timedreceive 183 -__SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \ - compat_sys_mq_timedreceive) +__SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \ + sys_mq_timedreceive) +#endif #define __NR_mq_notify 184 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 185 @@ -536,8 +567,10 @@ __SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd) __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 191 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_semtimedop 192 -__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop) +__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32) +#endif #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) @@ -658,8 +691,10 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_accept4 242 __SYSCALL(__NR_accept4, sys_accept4) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_recvmmsg 243 -__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg) +__SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32) +#endif /* * Architectures may provide up to 16 syscalls of their own @@ -667,8 +702,10 @@ __SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg) */ #define __NR_arch_specific_syscall 244 +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_wait4 260 __SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4) +#endif #define __NR_prlimit64 261 __SYSCALL(__NR_prlimit64, sys_prlimit64) #define __NR_fanotify_init 262 @@ -678,10 +715,11 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) #define __NR_name_to_handle_at 264 __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) #define __NR_open_by_handle_at 265 -__SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \ - compat_sys_open_by_handle_at) +__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_clock_adjtime 266 -__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime) +__SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime) +#endif #define __NR_syncfs 267 __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_setns 268 @@ -734,15 +772,69 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) __SYSCALL(__NR_pkey_free, sys_pkey_free) #define __NR_statx 291 __SYSCALL(__NR_statx, sys_statx) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_io_pgetevents 292 -__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents) +__SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents) +#endif #define __NR_rseq 293 __SYSCALL(__NR_rseq, sys_rseq) #define __NR_kexec_file_load 294 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) +/* 295 through 402 are unassigned to sync up with generic numbers, don't use */ +#if __BITS_PER_LONG == 32 +#define __NR_clock_gettime64 403 +__SYSCALL(__NR_clock_gettime64, sys_clock_gettime) +#define __NR_clock_settime64 404 +__SYSCALL(__NR_clock_settime64, sys_clock_settime) +#define __NR_clock_adjtime64 405 +__SYSCALL(__NR_clock_adjtime64, sys_clock_adjtime) +#define __NR_clock_getres_time64 406 +__SYSCALL(__NR_clock_getres_time64, sys_clock_getres) +#define __NR_clock_nanosleep_time64 407 +__SYSCALL(__NR_clock_nanosleep_time64, sys_clock_nanosleep) +#define __NR_timer_gettime64 408 +__SYSCALL(__NR_timer_gettime64, sys_timer_gettime) +#define __NR_timer_settime64 409 +__SYSCALL(__NR_timer_settime64, sys_timer_settime) +#define __NR_timerfd_gettime64 410 +__SYSCALL(__NR_timerfd_gettime64, sys_timerfd_gettime) +#define __NR_timerfd_settime64 411 +__SYSCALL(__NR_timerfd_settime64, sys_timerfd_settime) +#define __NR_utimensat_time64 412 +__SYSCALL(__NR_utimensat_time64, sys_utimensat) +#define __NR_pselect6_time64 413 +__SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) +#define __NR_ppoll_time64 414 +__SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) +#define __NR_io_pgetevents_time64 416 +__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +#define __NR_recvmmsg_time64 417 +__SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) +#define __NR_mq_timedsend_time64 418 +__SYSCALL(__NR_mq_timedsend_time64, sys_mq_timedsend) +#define __NR_mq_timedreceive_time64 419 +__SYSCALL(__NR_mq_timedreceive_time64, sys_mq_timedreceive) +#define __NR_semtimedop_time64 420 +__SYSCALL(__NR_semtimedop_time64, sys_semtimedop) +#define __NR_rt_sigtimedwait_time64 421 +__SC_COMP(__NR_rt_sigtimedwait_time64, sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time64) +#define __NR_futex_time64 422 +__SYSCALL(__NR_futex_time64, sys_futex) +#define __NR_sched_rr_get_interval_time64 423 +__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) +#endif + +#define __NR_pidfd_send_signal 424 +__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) +#define __NR_io_uring_setup 425 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) +#define __NR_io_uring_enter 426 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) +#define __NR_io_uring_register 427 +__SYSCALL(__NR_io_uring_register, sys_io_uring_register) #undef __NR_syscalls -#define __NR_syscalls 295 +#define __NR_syscalls 428 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 298b2e197744..397810fa2d33 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ #define I915_CONTEXT_DEFAULT_PRIORITY 0 #define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ + /* + * When using the following param, value should be a pointer to + * drm_i915_gem_context_param_sseu. + */ +#define I915_CONTEXT_PARAM_SSEU 0x7 __u64 value; }; +/** + * Context SSEU programming + * + * It may be necessary for either functional or performance reason to configure + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ + * Sub-slice/EU). + * + * This is done by configuring SSEU configuration using the below + * @struct drm_i915_gem_context_param_sseu for every supported engine which + * userspace intends to use. + * + * Not all GPUs or engines support this functionality in which case an error + * code -ENODEV will be returned. + * + * Also, flexibility of possible SSEU configuration permutations varies between + * GPU generations and software imposed limitations. Requesting such a + * combination will return an error code of -EINVAL. + * + * NOTE: When perf/OA is active the context's SSEU configuration is ignored in + * favour of a single global setting. + */ +struct drm_i915_gem_context_param_sseu { + /* + * Engine class & instance to be configured or queried. + */ + __u16 engine_class; + __u16 engine_instance; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd; +}; + enum drm_i915_oa_format { I915_OA_FORMAT_A13 = 1, /* HSW only */ I915_OA_FORMAT_A29, /* HSW only */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3c38ac9a92a7..72336bac7573 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -105,6 +105,7 @@ enum bpf_cmd { BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, BPF_MAP_LOOKUP_AND_DELETE_ELEM, + BPF_MAP_FREEZE, }; enum bpf_map_type { @@ -132,6 +133,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, + BPF_MAP_TYPE_SK_STORAGE, }; /* Note that tracing related programs such as @@ -166,6 +168,8 @@ enum bpf_prog_type { BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { @@ -187,6 +191,7 @@ enum bpf_attach_type { BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, BPF_FLOW_DISSECTOR, + BPF_CGROUP_SYSCTL, __MAX_BPF_ATTACH_TYPE }; @@ -255,8 +260,19 @@ enum bpf_attach_type { */ #define BPF_F_ANY_ALIGNMENT (1U << 1) -/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ +/* When BPF ldimm64's insn[0].src_reg != 0 then this can have + * two extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd map fd + * insn[1].imm: 0 offset into value + * insn[0].off: 0 0 + * insn[1].off: 0 0 + * ldimm64 rewrite: address of map address of map[0]+offset + * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_FD 1 +#define BPF_PSEUDO_MAP_VALUE 2 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -283,7 +299,7 @@ enum bpf_attach_type { #define BPF_OBJ_NAME_LEN 16U -/* Flags for accessing BPF object */ +/* Flags for accessing BPF object from syscall side. */ #define BPF_F_RDONLY (1U << 3) #define BPF_F_WRONLY (1U << 4) @@ -293,6 +309,10 @@ enum bpf_attach_type { /* Zero-initialize hash function seed. This should only be used for testing. */ #define BPF_F_ZERO_SEED (1U << 6) +/* Flags for accessing BPF object from program side. */ +#define BPF_F_RDONLY_PROG (1U << 7) +#define BPF_F_WRONLY_PROG (1U << 8) + /* flags for BPF_PROG_QUERY */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -396,6 +416,13 @@ union bpf_attr { __aligned_u64 data_out; __u32 repeat; __u32 duration; + __u32 ctx_size_in; /* input: len of ctx_in */ + __u32 ctx_size_out; /* input/output: len of ctx_out + * returns ENOSPC if ctx_out + * is too small. + */ + __aligned_u64 ctx_in; + __aligned_u64 ctx_out; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ @@ -502,16 +529,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) - * Description - * Push an element *value* in *map*. *flags* is one of: - * - * **BPF_EXIST** - * If the queue/stack is full, the oldest element is removed to - * make room for this. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1435,14 +1452,14 @@ union bpf_attr { * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return * A 8-byte long non-decreasing number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return * A 8-byte long non-decreasing number. * @@ -1488,13 +1505,31 @@ union bpf_attr { * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. * - * There is a single supported mode at this time: + * There are two supported modes at this time: + * + * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer + * (room space is added or removed below the layer 2 header). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The following flags are supported at this time: + * + * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. + * Adjusting mss in this way is not allowed for datagrams. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **: + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **: + * Any new space is reserved to hold a tunnel header. + * Configure skb offsets and other fields accordingly. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **: + * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **: + * Use with ENCAP_L3 flags to further specify the tunnel type. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **: + * Use with ENCAP_L3/L4 flags to further specify the tunnel + * type; **len** is the length of the inner MAC header. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -1704,12 +1739,19 @@ union bpf_attr { * error if an eBPF program tries to set a callback that is not * supported in the current kernel. * - * The supported callback values that *argval* can combine are: + * *argval* is a flag array which can combine these flags: * * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) * + * Therefore, this function can be used to clear a callback flag by + * setting the appropriate bit to zero. e.g. to disable the RTO + * callback: + * + * **bpf_sock_ops_cb_flags_set(bpf_sock,** + * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** + * * Here are some examples of where one could call such eBPF * program: * @@ -2098,52 +2140,52 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * int bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded key press with *scancode*, - * *toggle* value in the given *protocol*. The scancode will be - * translated to a keycode using the rc keymap, and reported as - * an input key down event. After a period a key up event is - * generated. This period can be extended by calling either - * **bpf_rc_keydown**\ () again with the same values, or calling - * **bpf_rc_repeat**\ (). + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. * - * Some protocols include a toggle bit, in case the button was - * released and pressed again between consecutive scancodes. + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. * * The *ctx* should point to the lirc sample as passed into * the program. * - * The *protocol* is the decoded protocol number (see - * **enum rc_proto** for some predefined values). - * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * int bpf_rc_repeat(void *ctx) + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded repeat key message. This delays - * the generation of a key up event for previously generated - * key down event. + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * - * Some IR protocols like NEC have a special IR message for - * repeating last button, for when a button is held down. + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into * the program. * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) + * u64 bpf_skb_cgroup_id(struct sk_buff *skb) * Description * Return the cgroup v2 id of the socket associated with the *skb*. * This is roughly similar to the **bpf_get_cgroup_classid**\ () @@ -2159,30 +2201,12 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) - * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *skb* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *skb*, then return value will be same as that - * of **bpf_skb_cgroup_id**\ (). - * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *skb*. - * - * The format of returned id and helper limitations are same as in - * **bpf_skb_cgroup_id**\ (). - * Return - * The id is returned or 0 in case the id could not be retrieved. - * * u64 bpf_get_current_cgroup_id(void) * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. * - * void* get_local_storage(void *map, u64 flags) + * void *bpf_get_local_storage(void *map, u64 flags) * Description * Get the pointer to the local storage area. * The type and the size of the local storage is defined @@ -2209,6 +2233,24 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child @@ -2289,6 +2331,16 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is + * removed to make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. @@ -2343,29 +2395,278 @@ union bpf_attr { * Return * 0 * + * int bpf_spin_lock(struct bpf_spin_lock *lock) + * Description + * Acquire a spinlock represented by the pointer *lock*, which is + * stored as part of a value of a map. Taking the lock allows to + * safely update the rest of the fields in that value. The + * spinlock can (and must) later be released with a call to + * **bpf_spin_unlock**\ (\ *lock*\ ). + * + * Spinlocks in BPF programs come with a number of restrictions + * and constraints: + * + * * **bpf_spin_lock** objects are only allowed inside maps of + * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this + * list could be extended in the future). + * * BTF description of the map is mandatory. + * * The BPF program can take ONE lock at a time, since taking two + * or more could cause dead locks. + * * Only one **struct bpf_spin_lock** is allowed per map element. + * * When the lock is taken, calls (either BPF to BPF or helpers) + * are not allowed. + * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not + * allowed inside a spinlock-ed region. + * * The BPF program MUST call **bpf_spin_unlock**\ () to release + * the lock, on all execution paths, before it returns. + * * The BPF program can access **struct bpf_spin_lock** only via + * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () + * helpers. Loading or storing data into the **struct + * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. + * * To use the **bpf_spin_lock**\ () helper, the BTF description + * of the map value must be a struct and have **struct + * bpf_spin_lock** *anyname*\ **;** field at the top level. + * Nested lock inside another struct is not allowed. + * * The **struct bpf_spin_lock** *lock* field in a map value must + * be aligned on a multiple of 4 bytes in that value. + * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy + * the **bpf_spin_lock** field to user space. + * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from + * a BPF program, do not update the **bpf_spin_lock** field. + * * **bpf_spin_lock** cannot be on the stack or inside a + * networking packet (it can only be inside of a map values). + * * **bpf_spin_lock** is available to root only. + * * Tracing programs and socket filter programs cannot use + * **bpf_spin_lock**\ () due to insufficient preemption checks + * (but this may change in the future). + * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. + * Return + * 0 + * + * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * Description + * Release the *lock* previously locked by a call to + * **bpf_spin_lock**\ (\ *lock*\ ). + * Return + * 0 + * * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such - * that all the fields in bpf_sock can be accessed. + * that all the fields in this **bpf_sock** can be accessed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. * * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_tcp_sock** pointer from a * **struct bpf_sock** pointer. - * * Return - * A **struct bpf_tcp_sock** pointer on success, or NULL in + * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * * int bpf_skb_ecn_set_ce(struct sk_buf *skb) - * Description - * Sets ECN of IP header to ce (congestion encountered) if - * current value is ect (ECN capable). Works with IPv6 and IPv4. - * Return - * 1 if set, 0 if not set. + * Description + * Set ECN (Explicit Congestion Notification) field of IP header + * to **CE** (Congestion Encountered) if current value is **ECT** + * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 + * and IPv4. + * Return + * 1 if the **CE** flag is set (either by the current helper call + * or because it was already present), 0 if it is not set. + * + * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) + * Description + * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. + * **bpf_sk_release**\ () is unnecessary and not allowed. + * Return + * A **struct bpf_sock** pointer on success, or **NULL** in + * case of failure. + * + * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * This function is identical to bpf_sk_lookup_tcp, except that it + * also returns timewait or request sockets. Use bpf_sk_fullsock + * or bpf_tcp_socket to access the full structure. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. + * + * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Check whether iph and th contain a valid SYN cookie ACK for + * the listening socket in sk. + * + * iph points to the start of the IPv4 or IPv6 header, while + * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr). + * + * th points to the start of the TCP header, while th_len contains + * sizeof(struct tcphdr). + * + * Return + * 0 if iph and th are a valid SYN cookie ACK, or a negative error + * otherwise. + * + * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * Description + * Get name of sysctl in /proc/sys/ and copy it into provided by + * program buffer *buf* of size *buf_len*. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * + * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is + * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name + * only (e.g. "tcp_mem"). + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * Description + * Get current value of sysctl as it is presented in /proc/sys + * (incl. newline, etc), and copy it as a string into provided + * by program buffer *buf* of size *buf_len*. + * + * The whole value is copied, no matter what file position user + * space issued e.g. sys_read at. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if current value was unavailable, e.g. because + * sysctl is uninitialized and read returns -EIO for it. + * + * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * Description + * Get new value being written by user space to sysctl (before + * the actual write happens) and copy it as a string into + * provided by program buffer *buf* of size *buf_len*. + * + * User space may write new value at file position > 0. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if sysctl is being read. + * + * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * Description + * Override new value being written by user space to sysctl with + * value provided by program in buffer *buf* of size *buf_len*. + * + * *buf* should contain a string in same form as provided by user + * space on sysctl write. + * + * User space may write new value at file position > 0. To override + * the whole sysctl value file position should be set to zero. + * Return + * 0 on success. + * + * **-E2BIG** if the *buf_len* is too big. + * + * **-EINVAL** if sysctl is being read. + * + * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * Description + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to a long integer according to the given base + * and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by isspace(3)) followed by a single optional '-' + * sign. + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space strtol(3). + * Return + * Number of characters consumed on success. Must be positive but + * no more than buf_len. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. + * + * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * Description + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to an unsigned long integer according to the + * given base and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by isspace(3)). + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space strtoul(3). + * Return + * Number of characters consumed on success. Must be positive but + * no more than buf_len. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. + * + * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * Description + * Get a bpf-local-storage from a sk. + * + * Logically, it could be thought of getting the value from + * a *map* with *sk* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem(map, &sk)** except this + * helper enforces the key must be a **bpf_fullsock()** + * and the map must be a BPF_MAP_TYPE_SK_STORAGE also. + * + * Underneath, the value is stored locally at *sk* instead of + * the map. The *map* is used as the bpf-local-storage **type**. + * The bpf-local-storage **type** (i.e. the *map*) is searched + * against all bpf-local-storages residing at sk. + * + * An optional *flags* (BPF_SK_STORAGE_GET_F_CREATE) can be + * used such that a new bpf-local-storage will be + * created if one does not exist. *value* can be used + * together with BPF_SK_STORAGE_GET_F_CREATE to specify + * the initial value of a bpf-local-storage. If *value* is + * NULL, the new bpf-local-storage will be zero initialized. + * Return + * A bpf-local-storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf-local-storage. + * + * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * Description + * Delete a bpf-local-storage from a sk. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf-local-storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2465,7 +2766,18 @@ union bpf_attr { FN(spin_unlock), \ FN(sk_fullsock), \ FN(tcp_sock), \ - FN(skb_ecn_set_ce), + FN(skb_ecn_set_ce), \ + FN(get_listener_sock), \ + FN(skc_lookup_tcp), \ + FN(tcp_check_syncookie), \ + FN(sysctl_get_name), \ + FN(sysctl_get_current_value), \ + FN(sysctl_get_new_value), \ + FN(sysctl_set_new_value), \ + FN(strtol), \ + FN(strtoul), \ + FN(sk_storage_get), \ + FN(sk_storage_delete), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2524,9 +2836,30 @@ enum bpf_func_id { /* Current network namespace */ #define BPF_F_CURRENT_NETNS (-1L) +/* BPF_FUNC_skb_adjust_room flags. */ +#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) + +#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff +#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 + +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) +#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) +#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) +#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ + BPF_ADJ_ROOM_ENCAP_L2_MASK) \ + << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) + +/* BPF_FUNC_sysctl_get_name flags. */ +#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) + +/* BPF_FUNC_sk_storage_get flags */ +#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_MAC, }; /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ @@ -3152,4 +3485,14 @@ struct bpf_line_info { struct bpf_spin_lock { __u32 val; }; + +struct bpf_sysctl { + __u32 write; /* Sysctl is being read (= 0) or written (= 1). + * Allows 1,2,4-byte read, but no write. + */ + __u32 file_pos; /* Sysctl file position to read from, write to. + * Allows 1,2,4-byte read an 4-byte write. + */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 7b7475ef2f17..9310652ca4f9 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -39,11 +39,11 @@ struct btf_type { * struct, union and fwd */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT and UNION. + /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC and FUNC_PROTO. + * FUNC, FUNC_PROTO and VAR. * "type" is a type_id referring to another type. */ union { @@ -70,8 +70,10 @@ struct btf_type { #define BTF_KIND_RESTRICT 11 /* Restrict */ #define BTF_KIND_FUNC 12 /* Function */ #define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ -#define BTF_KIND_MAX 13 -#define NR_BTF_KINDS 14 +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#define BTF_KIND_MAX BTF_KIND_DATASEC +#define NR_BTF_KINDS (BTF_KIND_MAX + 1) /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -138,4 +140,26 @@ struct btf_param { __u32 type; }; +enum { + BTF_VAR_STATIC = 0, + BTF_VAR_GLOBAL_ALLOCATED, +}; + +/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe + * additional information related to the variable such as its linkage. + */ +struct btf_var { + __u32 linkage; +}; + +/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo" + * to describe all BTF_KIND_VAR types it contains along with it's + * in-section offset as well as size. + */ +struct btf_var_secinfo { + __u32 type; + __u32 offset; + __u32 size; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 6448cdd9a350..a2f8658f1c55 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -41,6 +41,7 @@ #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ /* (1U << 31) is reserved for signed error codes */ /* diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index a55cb8b10165..e7ad9d350a28 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -292,10 +292,11 @@ struct sockaddr_in { #define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000) /* Defines for Multicast INADDR */ -#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ -#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ -#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ -#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ +#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ +#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ +#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ +#define INADDR_ALLSNOOPERS_GROUP 0xe000006aU /* 224.0.0.106 */ +#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ #endif /* <asm/byteorder.h> contains the htonl type stuff.. */ diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index d0f515d53299..fc1a64c3447b 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -12,6 +12,10 @@ #define OVERCOMMIT_ALWAYS 1 #define OVERCOMMIT_NEVER 2 +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ + /* * Huge page size encoding when MAP_HUGETLB is specified, and a huge page * size other than the default is desired. See hugetlb_encode.h. diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 404d4b9ffe76..df1153cea0b7 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -32,6 +32,7 @@ #ifndef __KERNEL__ #include <stdlib.h> +#include <time.h> #endif /* diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c index 512306a37531..0f257139b003 100644 --- a/tools/io_uring/io_uring-bench.c +++ b/tools/io_uring/io_uring-bench.c @@ -32,10 +32,6 @@ #include "liburing.h" #include "barrier.h" -#ifndef IOCQE_FLAG_CACHEHIT -#define IOCQE_FLAG_CACHEHIT (1U << 0) -#endif - #define min(a, b) ((a < b) ? (a) : (b)) struct io_sq_ring { @@ -85,7 +81,6 @@ struct submitter { unsigned long reaps; unsigned long done; unsigned long calls; - unsigned long cachehit, cachemiss; volatile int finish; __s32 *fds; @@ -270,10 +265,6 @@ static int reap_events(struct submitter *s) return -1; } } - if (cqe->flags & IOCQE_FLAG_CACHEHIT) - s->cachehit++; - else - s->cachemiss++; reaped++; head++; } while (1); @@ -489,7 +480,7 @@ static void file_depths(char *buf) int main(int argc, char *argv[]) { struct submitter *s = &submitters[0]; - unsigned long done, calls, reap, cache_hit, cache_miss; + unsigned long done, calls, reap; int err, i, flags, fd; char *fdepths; void *ret; @@ -569,44 +560,29 @@ int main(int argc, char *argv[]) pthread_create(&s->thread, NULL, submitter_fn, s); fdepths = malloc(8 * s->nr_files); - cache_hit = cache_miss = reap = calls = done = 0; + reap = calls = done = 0; do { unsigned long this_done = 0; unsigned long this_reap = 0; unsigned long this_call = 0; - unsigned long this_cache_hit = 0; - unsigned long this_cache_miss = 0; unsigned long rpc = 0, ipc = 0; - double hit = 0.0; sleep(1); this_done += s->done; this_call += s->calls; this_reap += s->reaps; - this_cache_hit += s->cachehit; - this_cache_miss += s->cachemiss; - if (this_cache_hit && this_cache_miss) { - unsigned long hits, total; - - hits = this_cache_hit - cache_hit; - total = hits + this_cache_miss - cache_miss; - hit = (double) hits / (double) total; - hit *= 100.0; - } if (this_call - calls) { rpc = (this_done - done) / (this_call - calls); ipc = (this_reap - reap) / (this_call - calls); } else rpc = ipc = -1; file_depths(fdepths); - printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s), Cachehit=%0.2f%%\n", + printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n", this_done - done, rpc, ipc, s->inflight, - fdepths, hit); + fdepths); done = this_done; calls = this_call; reap = this_reap; - cache_hit = s->cachehit; - cache_miss = s->cachemiss; } while (!finish); pthread_join(s->thread, &ret); diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 4db74758c674..7d9e182a1f51 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,3 +1,4 @@ libbpf_version.h +libbpf.pc FEATURE-DUMP.libbpf test_libbpf diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 61aaacf0cfa1..c6c06bc6683c 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -3,7 +3,7 @@ BPF_VERSION = 0 BPF_PATCHLEVEL = 0 -BPF_EXTRAVERSION = 1 +BPF_EXTRAVERSION = 3 MAKEFLAGS += --no-print-directory @@ -79,8 +79,6 @@ export prefix libdir src obj libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -LIB_FILE = libbpf.a libbpf.so - VERSION = $(BPF_VERSION) PATCHLEVEL = $(BPF_PATCHLEVEL) EXTRAVERSION = $(BPF_EXTRAVERSION) @@ -88,7 +86,11 @@ EXTRAVERSION = $(BPF_EXTRAVERSION) OBJ = $@ N = -LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) +LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) + +LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION) +LIB_FILE = libbpf.a libbpf.so* +PC_FILE = libbpf.pc # Set compile option CFLAGS ifdef EXTRA_CFLAGS @@ -128,16 +130,19 @@ all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include -BPF_IN := $(OUTPUT)libbpf-in.o -LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) -VERSION_SCRIPT := libbpf.map +BPF_IN := $(OUTPUT)libbpf-in.o +VERSION_SCRIPT := libbpf.map + +LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE)) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) -CMD_TARGETS = $(LIB_FILE) +CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) CXX_TEST_TARGET = $(OUTPUT)test_libbpf @@ -170,9 +175,13 @@ $(BPF_IN): force elfdep bpfdep echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf -$(OUTPUT)libbpf.so: $(BPF_IN) - $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \ - $^ -o $@ +$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) + +$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ + @ln -sf $(@F) $(OUTPUT)libbpf.so + @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION) $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ @@ -180,6 +189,12 @@ $(OUTPUT)libbpf.a: $(BPF_IN) $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@ +$(OUTPUT)libbpf.pc: + $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ + -e "s|@LIBDIR@|$(libdir_SQ)|" \ + -e "s|@VERSION@|$(LIBBPF_VERSION)|" \ + < libbpf.pc.template > $@ + check: check_abi check_abi: $(OUTPUT)libbpf.so @@ -192,6 +207,12 @@ check_abi: $(OUTPUT)libbpf.so exit 1; \ fi +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ @@ -200,16 +221,22 @@ define do_install endef install_lib: all_cmd - $(call QUIET_INSTALL, $(LIB_FILE)) \ - $(call do_install,$(LIB_FILE),$(libdir_SQ)) + $(call QUIET_INSTALL, $(LIB_TARGET)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) install_headers: $(call QUIET_INSTALL, headers) \ $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ - $(call do_install,libbpf.h,$(prefix)/include/bpf,644); - $(call do_install,btf.h,$(prefix)/include/bpf,644); + $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ + $(call do_install,btf.h,$(prefix)/include/bpf,644); \ + $(call do_install,xsk.h,$(prefix)/include/bpf,644); + +install_pkgconfig: $(PC_FILE) + $(call QUIET_INSTALL, $(PC_FILE)) \ + $(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644) -install: install_lib +install: install_lib install_pkgconfig ### Cleaning rules @@ -219,7 +246,7 @@ config-clean: clean: $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ - *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS + *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd *.pc LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst index 5788479384ca..cef7b77eab69 100644 --- a/tools/lib/bpf/README.rst +++ b/tools/lib/bpf/README.rst @@ -111,6 +111,7 @@ starting from ``0.0.1``. Every time ABI is being changed, e.g. because a new symbol is added or semantic of existing symbol is changed, ABI version should be bumped. +This bump in ABI version is at most once per kernel development cycle. For example, if current state of ``libbpf.map`` is: diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9cd015574e83..955191c64b64 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -79,7 +79,6 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { - __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -89,8 +88,9 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) attr.value_size = create_attr->value_size; attr.max_entries = create_attr->max_entries; attr.map_flags = create_attr->map_flags; - memcpy(attr.map_name, create_attr->name, - min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (create_attr->name) + memcpy(attr.map_name, create_attr->name, + min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); attr.numa_node = create_attr->numa_node; attr.btf_fd = create_attr->btf_fd; attr.btf_key_type_id = create_attr->btf_key_type_id; @@ -155,7 +155,6 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { - __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -166,7 +165,9 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, attr.inner_map_fd = inner_map_fd; attr.max_entries = max_entries; attr.map_flags = map_flags; - memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (name) + memcpy(attr.map_name, name, + min(strlen(name), BPF_OBJ_NAME_LEN - 1)); if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; @@ -216,18 +217,15 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, void *finfo = NULL, *linfo = NULL; union bpf_attr attr; __u32 log_level; - __u32 name_len; int fd; if (!load_attr || !log_buf != !log_buf_sz) return -EINVAL; log_level = load_attr->log_level; - if (log_level > 2 || (log_level && !log_buf)) + if (log_level > (4 | 2 | 1) || (log_level && !log_buf)) return -EINVAL; - name_len = load_attr->name ? strlen(load_attr->name) : 0; - memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; @@ -253,8 +251,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.line_info_rec_size = load_attr->line_info_rec_size; attr.line_info_cnt = load_attr->line_info_cnt; attr.line_info = ptr_to_u64(load_attr->line_info); - memcpy(attr.prog_name, load_attr->name, - min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (load_attr->name) + memcpy(attr.prog_name, load_attr->name, + min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) @@ -429,6 +428,16 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key) return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); } +int bpf_map_freeze(int fd) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + + return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); +} + int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; @@ -545,10 +554,15 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) attr.test.data_out = ptr_to_u64(test_attr->data_out); attr.test.data_size_in = test_attr->data_size_in; attr.test.data_size_out = test_attr->data_size_out; + attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in); + attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out); + attr.test.ctx_size_in = test_attr->ctx_size_in; + attr.test.ctx_size_out = test_attr->ctx_size_out; attr.test.repeat = test_attr->repeat; ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); test_attr->data_size_out = attr.test.data_size_out; + test_attr->ctx_size_out = attr.test.ctx_size_out; test_attr->retval = attr.test.retval; test_attr->duration = attr.test.duration; return ret; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6ffdd79bea89..9593fec75652 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -26,6 +26,7 @@ #include <linux/bpf.h> #include <stdbool.h> #include <stddef.h> +#include <stdint.h> #ifdef __cplusplus extern "C" { @@ -92,7 +93,7 @@ struct bpf_load_program_attr { #define MAPS_RELAX_COMPAT 0x01 /* Recommend log buffer size */ -#define BPF_LOG_BUF_SIZE (256 * 1024) +#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz); @@ -117,6 +118,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); +LIBBPF_API int bpf_map_freeze(int fd); LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, @@ -135,6 +137,11 @@ struct bpf_prog_test_run_attr { * out: length of data_out */ __u32 retval; /* out: return code of the BPF program */ __u32 duration; /* out: average per repetition in ns */ + const void *ctx_in; /* optional */ + __u32 ctx_size_in; + void *ctx_out; /* optional */ + __u32 ctx_size_out; /* in: max length of ctx_out + * out: length of cxt_out */ }; LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1b8d8cdd3575..75eaf10b9e1a 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -24,6 +24,8 @@ ((k) == BTF_KIND_CONST) || \ ((k) == BTF_KIND_RESTRICT)) +#define IS_VAR(k) ((k) == BTF_KIND_VAR) + static struct btf_type btf_void; struct btf { @@ -212,6 +214,10 @@ static int btf_type_size(struct btf_type *t) return base_size + vlen * sizeof(struct btf_member); case BTF_KIND_FUNC_PROTO: return base_size + vlen * sizeof(struct btf_param); + case BTF_KIND_VAR: + return base_size + sizeof(struct btf_var); + case BTF_KIND_DATASEC: + return base_size + vlen * sizeof(struct btf_var_secinfo); default: pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info)); return -EINVAL; @@ -283,6 +289,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_DATASEC: size = t->size; goto done; case BTF_KIND_PTR: @@ -292,6 +299,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_VAR: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -326,7 +334,8 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) t = btf__type_by_id(btf, type_id); while (depth < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t) && - IS_MODIFIER(BTF_INFO_KIND(t->info))) { + (IS_MODIFIER(BTF_INFO_KIND(t->info)) || + IS_VAR(BTF_INFO_KIND(t->info)))) { type_id = t->type; t = btf__type_by_id(btf, type_id); depth++; @@ -408,6 +417,92 @@ done: return btf; } +static int compare_vsi_off(const void *_a, const void *_b) +{ + const struct btf_var_secinfo *a = _a; + const struct btf_var_secinfo *b = _b; + + return a->offset - b->offset; +} + +static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, + struct btf_type *t) +{ + __u32 size = 0, off = 0, i, vars = BTF_INFO_VLEN(t->info); + const char *name = btf__name_by_offset(btf, t->name_off); + const struct btf_type *t_var; + struct btf_var_secinfo *vsi; + struct btf_var *var; + int ret; + + if (!name) { + pr_debug("No name found in string section for DATASEC kind.\n"); + return -ENOENT; + } + + ret = bpf_object__section_size(obj, name, &size); + if (ret || !size || (t->size && t->size != size)) { + pr_debug("Invalid size for section %s: %u bytes\n", name, size); + return -ENOENT; + } + + t->size = size; + + for (i = 0, vsi = (struct btf_var_secinfo *)(t + 1); + i < vars; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + var = (struct btf_var *)(t_var + 1); + + if (BTF_INFO_KIND(t_var->info) != BTF_KIND_VAR) { + pr_debug("Non-VAR type seen in section %s\n", name); + return -EINVAL; + } + + if (var->linkage == BTF_VAR_STATIC) + continue; + + name = btf__name_by_offset(btf, t_var->name_off); + if (!name) { + pr_debug("No name found in string section for VAR kind\n"); + return -ENOENT; + } + + ret = bpf_object__variable_offset(obj, name, &off); + if (ret) { + pr_debug("No offset found in symbol table for VAR %s\n", name); + return -ENOENT; + } + + vsi->offset = off; + } + + qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off); + return 0; +} + +int btf__finalize_data(struct bpf_object *obj, struct btf *btf) +{ + int err = 0; + __u32 i; + + for (i = 1; i <= btf->nr_types; i++) { + struct btf_type *t = btf->types[i]; + + /* Loader needs to fix up some of the things compiler + * couldn't get its hands on while emitting BTF. This + * is section size and global variable offset. We use + * the info from the ELF itself for this purpose. + */ + if (BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC) { + err = btf_fixup_datasec(obj, btf, t); + if (err) + break; + } + } + + return err; +} + int btf__load(struct btf *btf) { __u32 log_buf_size = BPF_LOG_BUF_SIZE; @@ -1259,8 +1354,16 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, } /* special BTF "void" type is made canonical immediately */ d->map[0] = 0; - for (i = 1; i <= btf->nr_types; i++) - d->map[i] = BTF_UNPROCESSED_ID; + for (i = 1; i <= btf->nr_types; i++) { + struct btf_type *t = d->btf->types[i]; + __u16 kind = BTF_INFO_KIND(t->info); + + /* VAR and DATASEC are never deduped and are self-canonical */ + if (kind == BTF_KIND_VAR || kind == BTF_KIND_DATASEC) + d->map[i] = i; + else + d->map[i] = BTF_UNPROCESSED_ID; + } d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); if (!d->hypot_map) { @@ -1602,16 +1705,12 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) /* Calculate type signature hash of ENUM. */ static __u32 btf_hash_enum(struct btf_type *t) { - struct btf_enum *member = (struct btf_enum *)(t + 1); - __u32 vlen = BTF_INFO_VLEN(t->info); - __u32 h = btf_hash_common(t); - int i; + __u32 h; - for (i = 0; i < vlen; i++) { - h = hash_combine(h, member->name_off); - h = hash_combine(h, member->val); - member++; - } + /* don't hash vlen and enum members to support enum fwd resolving */ + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info & ~0xffff); + h = hash_combine(h, t->size); return h; } @@ -1637,6 +1736,22 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) return true; } +static inline bool btf_is_enum_fwd(struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM && + BTF_INFO_VLEN(t->info) == 0; +} + +static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) + return btf_equal_enum(t1, t2); + /* ignore vlen when comparing */ + return t1->name_off == t2->name_off && + (t1->info & ~0xffff) == (t2->info & ~0xffff) && + t1->size == t2->size; +} + /* * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, * as referenced type IDs equivalence is established separately during type @@ -1839,6 +1954,8 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_UNION: case BTF_KIND_FUNC: case BTF_KIND_FUNC_PROTO: + case BTF_KIND_VAR: + case BTF_KIND_DATASEC: return 0; case BTF_KIND_INT: @@ -1860,6 +1977,17 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) new_id = cand_node->type_id; break; } + if (d->opts.dont_resolve_fwds) + continue; + if (btf_compat_enum(t, cand)) { + if (btf_is_enum_fwd(t)) { + /* resolve fwd to full enum */ + new_id = cand_node->type_id; + break; + } + /* resolve canonical enum fwd to full enum */ + d->map[cand_node->type_id] = type_id; + } } break; @@ -2084,7 +2212,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return fwd_kind == real_kind; } - if (cand_type->info != canon_type->info) + if (cand_kind != canon_kind) return 0; switch (cand_kind) { @@ -2092,7 +2220,10 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return btf_equal_int(cand_type, canon_type); case BTF_KIND_ENUM: - return btf_equal_enum(cand_type, canon_type); + if (d->opts.dont_resolve_fwds) + return btf_equal_enum(cand_type, canon_type); + else + return btf_compat_enum(cand_type, canon_type); case BTF_KIND_FWD: return btf_equal_common(cand_type, canon_type); @@ -2103,6 +2234,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + if (cand_type->info != canon_type->info) + return 0; return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); case BTF_KIND_ARRAY: { @@ -2576,6 +2709,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + case BTF_KIND_VAR: r = btf_dedup_remap_type_id(d, t->type); if (r < 0) return r; @@ -2630,6 +2764,20 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) break; } + case BTF_KIND_DATASEC: { + struct btf_var_secinfo *var = (struct btf_var_secinfo *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, var->type); + if (r < 0) + return r; + var->type = r; + var++; + } + break; + } + default: return -EINVAL; } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 28a1e1e59861..c7b399e81fce 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -21,6 +21,8 @@ struct btf; struct btf_ext; struct btf_type; +struct bpf_object; + /* * The .BTF.ext ELF section layout defined as * struct btf_ext_header @@ -57,6 +59,7 @@ struct btf_ext_header { LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); +LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d5b830d60601..11a65db4b93f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7,6 +7,7 @@ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> * Copyright (C) 2015 Huawei Inc. * Copyright (C) 2017 Nicira, Inc. + * Copyright (C) 2019 Isovalent, Inc. */ #ifndef _GNU_SOURCE @@ -52,6 +53,11 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +/* vsprintf() in __base_pr() uses nonliteral format string. It may break + * compilation if user enables corresponding warning. Disable it explicitly. + */ +#pragma GCC diagnostic ignored "-Wformat-nonliteral" + #define __printf(a, b) __attribute__((format(printf, a, b))) static int __base_pr(enum libbpf_print_level level, const char *format, @@ -112,9 +118,16 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ #endif +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + struct bpf_capabilities { /* v4.14: kernel support for program & map names. */ __u32 name:1; + /* v5.2: kernel support for global data sections. */ + __u32 global_data:1; }; /* @@ -139,6 +152,7 @@ struct bpf_program { enum { RELO_LD64, RELO_CALL, + RELO_DATA, } type; int insn_idx; union { @@ -147,6 +161,7 @@ struct bpf_program { }; } *reloc_desc; int nr_reloc; + int log_level; struct { int nr; @@ -171,6 +186,19 @@ struct bpf_program { __u32 line_info_cnt; }; +enum libbpf_map_type { + LIBBPF_MAP_UNSPEC, + LIBBPF_MAP_DATA, + LIBBPF_MAP_BSS, + LIBBPF_MAP_RODATA, +}; + +static const char * const libbpf_type_to_btf_name[] = { + [LIBBPF_MAP_DATA] = ".data", + [LIBBPF_MAP_BSS] = ".bss", + [LIBBPF_MAP_RODATA] = ".rodata", +}; + struct bpf_map { int fd; char *name; @@ -182,11 +210,18 @@ struct bpf_map { __u32 btf_value_type_id; void *priv; bpf_map_clear_priv_t clear_priv; + enum libbpf_map_type libbpf_type; +}; + +struct bpf_secdata { + void *rodata; + void *data; }; static LIST_HEAD(bpf_objects_list); struct bpf_object { + char name[BPF_OBJ_NAME_LEN]; char license[64]; __u32 kern_version; @@ -194,6 +229,7 @@ struct bpf_object { size_t nr_programs; struct bpf_map *maps; size_t nr_maps; + struct bpf_secdata sections; bool loaded; bool has_pseudo_calls; @@ -209,6 +245,9 @@ struct bpf_object { Elf *elf; GElf_Ehdr ehdr; Elf_Data *symbols; + Elf_Data *data; + Elf_Data *rodata; + Elf_Data *bss; size_t strtabidx; struct { GElf_Shdr shdr; @@ -217,6 +256,9 @@ struct bpf_object { int nr_reloc; int maps_shndx; int text_shndx; + int data_shndx; + int rodata_shndx; + int bss_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -438,6 +480,7 @@ static struct bpf_object *bpf_object__new(const char *path, size_t obj_buf_sz) { struct bpf_object *obj; + char *end; obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); if (!obj) { @@ -446,8 +489,14 @@ static struct bpf_object *bpf_object__new(const char *path, } strcpy(obj->path, path); - obj->efile.fd = -1; + /* Using basename() GNU version which doesn't modify arg. */ + strncpy(obj->name, basename((void *)path), + sizeof(obj->name) - 1); + end = strchr(obj->name, '.'); + if (end) + *end = 0; + obj->efile.fd = -1; /* * Caller of this function should also calls * bpf_object__elf_finish() after data collection to return @@ -457,6 +506,9 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.maps_shndx = -1; + obj->efile.data_shndx = -1; + obj->efile.rodata_shndx = -1; + obj->efile.bss_shndx = -1; obj->loaded = false; @@ -475,6 +527,9 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.elf = NULL; } obj->efile.symbols = NULL; + obj->efile.data = NULL; + obj->efile.rodata = NULL; + obj->efile.bss = NULL; zfree(&obj->efile.reloc); obj->efile.nr_reloc = 0; @@ -616,27 +671,182 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) return false; } +static int bpf_object_search_section_size(const struct bpf_object *obj, + const char *name, size_t *d_size) +{ + const GElf_Ehdr *ep = &obj->efile.ehdr; + Elf *elf = obj->efile.elf; + Elf_Scn *scn = NULL; + int idx = 0; + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + const char *sec_name; + Elf_Data *data; + GElf_Shdr sh; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + pr_warning("failed to get section(%d) header from %s\n", + idx, obj->path); + return -EIO; + } + + sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); + if (!sec_name) { + pr_warning("failed to get section(%d) name from %s\n", + idx, obj->path); + return -EIO; + } + + if (strcmp(name, sec_name)) + continue; + + data = elf_getdata(scn, 0); + if (!data) { + pr_warning("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); + return -EIO; + } + + *d_size = data->d_size; + return 0; + } + + return -ENOENT; +} + +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size) +{ + int ret = -ENOENT; + size_t d_size; + + *size = 0; + if (!name) { + return -EINVAL; + } else if (!strcmp(name, ".data")) { + if (obj->efile.data) + *size = obj->efile.data->d_size; + } else if (!strcmp(name, ".bss")) { + if (obj->efile.bss) + *size = obj->efile.bss->d_size; + } else if (!strcmp(name, ".rodata")) { + if (obj->efile.rodata) + *size = obj->efile.rodata->d_size; + } else { + ret = bpf_object_search_section_size(obj, name, &d_size); + if (!ret) + *size = d_size; + } + + return *size ? 0 : ret; +} + +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off) +{ + Elf_Data *symbols = obj->efile.symbols; + const char *sname; + size_t si; + + if (!name || !off) + return -EINVAL; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + GELF_ST_TYPE(sym.st_info) != STT_OBJECT) + continue; + + sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name); + if (!sname) { + pr_warning("failed to get sym name string for var %s\n", + name); + return -EIO; + } + if (strcmp(name, sname) == 0) { + *off = sym.st_value; + return 0; + } + } + + return -ENOENT; +} + +static bool bpf_object__has_maps(const struct bpf_object *obj) +{ + return obj->efile.maps_shndx >= 0 || + obj->efile.data_shndx >= 0 || + obj->efile.rodata_shndx >= 0 || + obj->efile.bss_shndx >= 0; +} + +static int +bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map, + enum libbpf_map_type type, Elf_Data *data, + void **data_buff) +{ + struct bpf_map_def *def = &map->def; + char map_name[BPF_OBJ_NAME_LEN]; + + map->libbpf_type = type; + map->offset = ~(typeof(map->offset))0; + snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, + libbpf_type_to_btf_name[type]); + map->name = strdup(map_name); + if (!map->name) { + pr_warning("failed to alloc map name\n"); + return -ENOMEM; + } + + def->type = BPF_MAP_TYPE_ARRAY; + def->key_size = sizeof(int); + def->value_size = data->d_size; + def->max_entries = 1; + def->map_flags = type == LIBBPF_MAP_RODATA ? + BPF_F_RDONLY_PROG : 0; + if (data_buff) { + *data_buff = malloc(data->d_size); + if (!*data_buff) { + zfree(&map->name); + pr_warning("failed to alloc map content buffer\n"); + return -ENOMEM; + } + memcpy(*data_buff, data->d_buf, data->d_size); + } + + pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); + return 0; +} + static int bpf_object__init_maps(struct bpf_object *obj, int flags) { + int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0; bool strict = !(flags & MAPS_RELAX_COMPAT); - int i, map_idx, map_def_sz, nr_maps = 0; - Elf_Scn *scn; - Elf_Data *data; Elf_Data *symbols = obj->efile.symbols; + Elf_Data *data = NULL; + int ret = 0; - if (obj->efile.maps_shndx < 0) - return -EINVAL; if (!symbols) return -EINVAL; + nr_syms = symbols->d_size / sizeof(GElf_Sym); - scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); - if (scn) - data = elf_getdata(scn, NULL); - if (!scn || !data) { - pr_warning("failed to get Elf_Data from map section %d\n", - obj->efile.maps_shndx); - return -EINVAL; + if (obj->efile.maps_shndx >= 0) { + Elf_Scn *scn = elf_getscn(obj->efile.elf, + obj->efile.maps_shndx); + + if (scn) + data = elf_getdata(scn, NULL); + if (!scn || !data) { + pr_warning("failed to get Elf_Data from map section %d\n", + obj->efile.maps_shndx); + return -EINVAL; + } } /* @@ -646,7 +856,16 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) * * TODO: Detect array of map and report error. */ - for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + if (obj->caps.global_data) { + if (obj->efile.data_shndx >= 0) + nr_maps_glob++; + if (obj->efile.rodata_shndx >= 0) + nr_maps_glob++; + if (obj->efile.bss_shndx >= 0) + nr_maps_glob++; + } + + for (i = 0; data && i < nr_syms; i++) { GElf_Sym sym; if (!gelf_getsym(symbols, i, &sym)) @@ -656,22 +875,24 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) nr_maps++; } - /* Alloc obj->maps and fill nr_maps. */ - pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, - nr_maps, data->d_size); - - if (!nr_maps) + if (!nr_maps && !nr_maps_glob) return 0; /* Assume equally sized map definitions */ - map_def_sz = data->d_size / nr_maps; - if (!data->d_size || (data->d_size % nr_maps) != 0) { - pr_warning("unable to determine map definition size " - "section %s, %d maps in %zd bytes\n", - obj->path, nr_maps, data->d_size); - return -EINVAL; + if (data) { + pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, + nr_maps, data->d_size); + + map_def_sz = data->d_size / nr_maps; + if (!data->d_size || (data->d_size % nr_maps) != 0) { + pr_warning("unable to determine map definition size " + "section %s, %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); + return -EINVAL; + } } + nr_maps += nr_maps_glob; obj->maps = calloc(nr_maps, sizeof(obj->maps[0])); if (!obj->maps) { pr_warning("alloc maps for object failed\n"); @@ -692,7 +913,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) /* * Fill obj->maps using data in "maps" section. */ - for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + for (i = 0, map_idx = 0; data && i < nr_syms; i++) { GElf_Sym sym; const char *map_name; struct bpf_map_def *def; @@ -705,6 +926,8 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name); + + obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC; obj->maps[map_idx].offset = sym.st_value; if (sym.st_value + map_def_sz > data->d_size) { pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", @@ -753,8 +976,31 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) map_idx++; } - qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map); - return 0; + if (!obj->caps.global_data) + goto finalize; + + /* + * Populate rest of obj->maps with libbpf internal maps. + */ + if (obj->efile.data_shndx >= 0) + ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], + LIBBPF_MAP_DATA, + obj->efile.data, + &obj->sections.data); + if (!ret && obj->efile.rodata_shndx >= 0) + ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], + LIBBPF_MAP_RODATA, + obj->efile.rodata, + &obj->sections.rodata); + if (!ret && obj->efile.bss_shndx >= 0) + ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], + LIBBPF_MAP_BSS, + obj->efile.bss, NULL); +finalize: + if (!ret) + qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), + compare_bpf_map); + return ret; } static bool section_have_execinstr(struct bpf_object *obj, int idx) @@ -780,6 +1026,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; Elf_Data *btf_ext_data = NULL; + Elf_Data *btf_data = NULL; Elf_Scn *scn = NULL; int idx = 0, err = 0; @@ -823,25 +1070,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) (int)sh.sh_link, (unsigned long)sh.sh_flags, (int)sh.sh_type); - if (strcmp(name, "license") == 0) + if (strcmp(name, "license") == 0) { err = bpf_object__init_license(obj, data->d_buf, data->d_size); - else if (strcmp(name, "version") == 0) + } else if (strcmp(name, "version") == 0) { err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); - else if (strcmp(name, "maps") == 0) + } else if (strcmp(name, "maps") == 0) { obj->efile.maps_shndx = idx; - else if (strcmp(name, BTF_ELF_SEC) == 0) { - obj->btf = btf__new(data->d_buf, data->d_size); - if (IS_ERR(obj->btf) || btf__load(obj->btf)) { - pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", - BTF_ELF_SEC, PTR_ERR(obj->btf)); - if (!IS_ERR(obj->btf)) - btf__free(obj->btf); - obj->btf = NULL; - } + } else if (strcmp(name, BTF_ELF_SEC) == 0) { + btf_data = data; } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { @@ -853,20 +1093,28 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->efile.symbols = data; obj->efile.strtabidx = sh.sh_link; } - } else if ((sh.sh_type == SHT_PROGBITS) && - (sh.sh_flags & SHF_EXECINSTR) && - (data->d_size > 0)) { - if (strcmp(name, ".text") == 0) - obj->efile.text_shndx = idx; - err = bpf_object__add_program(obj, data->d_buf, - data->d_size, name, idx); - if (err) { - char errmsg[STRERR_BUFSIZE]; - char *cp = libbpf_strerror_r(-err, errmsg, - sizeof(errmsg)); - - pr_warning("failed to alloc program %s (%s): %s", - name, obj->path, cp); + } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { + if (sh.sh_flags & SHF_EXECINSTR) { + if (strcmp(name, ".text") == 0) + obj->efile.text_shndx = idx; + err = bpf_object__add_program(obj, data->d_buf, + data->d_size, name, idx); + if (err) { + char errmsg[STRERR_BUFSIZE]; + char *cp = libbpf_strerror_r(-err, errmsg, + sizeof(errmsg)); + + pr_warning("failed to alloc program %s (%s): %s", + name, obj->path, cp); + } + } else if (strcmp(name, ".data") == 0) { + obj->efile.data = data; + obj->efile.data_shndx = idx; + } else if (strcmp(name, ".rodata") == 0) { + obj->efile.rodata = data; + obj->efile.rodata_shndx = idx; + } else { + pr_debug("skip section(%d) %s\n", idx, name); } } else if (sh.sh_type == SHT_REL) { void *reloc = obj->efile.reloc; @@ -894,6 +1142,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->efile.reloc[n].shdr = sh; obj->efile.reloc[n].data = data; } + } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { + obj->efile.bss = data; + obj->efile.bss_shndx = idx; } else { pr_debug("skip section(%d) %s\n", idx, name); } @@ -905,6 +1156,25 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } + if (btf_data) { + obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); + if (IS_ERR(obj->btf)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_ELF_SEC, PTR_ERR(obj->btf)); + obj->btf = NULL; + } else { + err = btf__finalize_data(obj, obj->btf); + if (!err) + err = btf__load(obj->btf); + if (err) { + pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n", + BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + err = 0; + } + } + } if (btf_ext_data) { if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", @@ -920,7 +1190,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) } } } - if (obj->efile.maps_shndx >= 0) { + if (bpf_object__has_maps(obj)) { err = bpf_object__init_maps(obj, flags); if (err) goto out; @@ -956,13 +1226,46 @@ bpf_object__find_program_by_title(struct bpf_object *obj, const char *title) return NULL; } +static bool bpf_object__shndx_is_data(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.data_shndx || + shndx == obj->efile.bss_shndx || + shndx == obj->efile.rodata_shndx; +} + +static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.maps_shndx; +} + +static bool bpf_object__relo_in_known_section(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.text_shndx || + bpf_object__shndx_is_maps(obj, shndx) || + bpf_object__shndx_is_data(obj, shndx); +} + +static enum libbpf_map_type +bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) +{ + if (shndx == obj->efile.data_shndx) + return LIBBPF_MAP_DATA; + else if (shndx == obj->efile.bss_shndx) + return LIBBPF_MAP_BSS; + else if (shndx == obj->efile.rodata_shndx) + return LIBBPF_MAP_RODATA; + else + return LIBBPF_MAP_UNSPEC; +} + static int bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, Elf_Data *data, struct bpf_object *obj) { Elf_Data *symbols = obj->efile.symbols; - int text_shndx = obj->efile.text_shndx; - int maps_shndx = obj->efile.maps_shndx; struct bpf_map *maps = obj->maps; size_t nr_maps = obj->nr_maps; int i, nrels; @@ -982,7 +1285,10 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, GElf_Sym sym; GElf_Rel rel; unsigned int insn_idx; + unsigned int shdr_idx; struct bpf_insn *insns = prog->insns; + enum libbpf_map_type type; + const char *name; size_t map_idx; if (!gelf_getrel(data, i, &rel)) { @@ -997,13 +1303,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } - pr_debug("relo for %lld value %lld name %d\n", + + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name) ? : "<?>"; + + pr_debug("relo for %lld value %lld name %d (\'%s\')\n", (long long) (rel.r_info >> 32), - (long long) sym.st_value, sym.st_name); + (long long) sym.st_value, sym.st_name, name); - if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { - pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", - prog->section_name, sym.st_shndx); + shdr_idx = sym.st_shndx; + if (!bpf_object__relo_in_known_section(obj, shdr_idx)) { + pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n", + prog->section_name, shdr_idx); return -LIBBPF_ERRNO__RELOC; } @@ -1028,24 +1339,45 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, return -LIBBPF_ERRNO__RELOC; } - /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */ - for (map_idx = 0; map_idx < nr_maps; map_idx++) { - if (maps[map_idx].offset == sym.st_value) { - pr_debug("relocation: find map %zd (%s) for insn %u\n", - map_idx, maps[map_idx].name, insn_idx); - break; + if (bpf_object__shndx_is_maps(obj, shdr_idx) || + bpf_object__shndx_is_data(obj, shdr_idx)) { + type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); + if (type != LIBBPF_MAP_UNSPEC) { + if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL) { + pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n", + name, insn_idx, insns[insn_idx].code); + return -LIBBPF_ERRNO__RELOC; + } + if (!obj->caps.global_data) { + pr_warning("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", + name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } } - } - if (map_idx >= nr_maps) { - pr_warning("bpf relocation: map_idx %d large than %d\n", - (int)map_idx, (int)nr_maps - 1); - return -LIBBPF_ERRNO__RELOC; - } + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + if (maps[map_idx].libbpf_type != type) + continue; + if (type != LIBBPF_MAP_UNSPEC || + (type == LIBBPF_MAP_UNSPEC && + maps[map_idx].offset == sym.st_value)) { + pr_debug("relocation: find map %zd (%s) for insn %u\n", + map_idx, maps[map_idx].name, insn_idx); + break; + } + } - prog->reloc_desc[i].type = RELO_LD64; - prog->reloc_desc[i].insn_idx = insn_idx; - prog->reloc_desc[i].map_idx = map_idx; + if (map_idx >= nr_maps) { + pr_warning("bpf relocation: map_idx %d large than %d\n", + (int)map_idx, (int)nr_maps - 1); + return -LIBBPF_ERRNO__RELOC; + } + + prog->reloc_desc[i].type = type != LIBBPF_MAP_UNSPEC ? + RELO_DATA : RELO_LD64; + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].map_idx = map_idx; + } } return 0; } @@ -1053,18 +1385,27 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) { struct bpf_map_def *def = &map->def; - __u32 key_type_id, value_type_id; + __u32 key_type_id = 0, value_type_id = 0; int ret; - ret = btf__get_map_kv_tids(btf, map->name, def->key_size, - def->value_size, &key_type_id, - &value_type_id); - if (ret) + if (!bpf_map__is_internal(map)) { + ret = btf__get_map_kv_tids(btf, map->name, def->key_size, + def->value_size, &key_type_id, + &value_type_id); + } else { + /* + * LLVM annotates global data differently in BTF, that is, + * only as '.data', '.bss' or '.rodata'. + */ + ret = btf__find_by_name(btf, + libbpf_type_to_btf_name[map->libbpf_type]); + } + if (ret < 0) return ret; map->btf_key_type_id = key_type_id; - map->btf_value_type_id = value_type_id; - + map->btf_value_type_id = bpf_map__is_internal(map) ? + ret : value_type_id; return 0; } @@ -1170,9 +1511,95 @@ bpf_object__probe_name(struct bpf_object *obj) } static int +bpf_object__probe_global_data(struct bpf_object *obj) +{ + struct bpf_load_program_attr prg_attr; + struct bpf_create_map_attr map_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map; + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_ARRAY; + map_attr.key_size = sizeof(int); + map_attr.value_size = 32; + map_attr.max_entries = 1; + + map = bpf_create_map_xattr(&map_attr); + if (map < 0) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, errno); + return -errno; + } + + insns[0].imm = map; + + memset(&prg_attr, 0, sizeof(prg_attr)); + prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + prg_attr.insns = insns; + prg_attr.insns_cnt = ARRAY_SIZE(insns); + prg_attr.license = "GPL"; + + ret = bpf_load_program_xattr(&prg_attr, NULL, 0); + if (ret >= 0) { + obj->caps.global_data = 1; + close(ret); + } + + close(map); + return 0; +} + +static int bpf_object__probe_caps(struct bpf_object *obj) { - return bpf_object__probe_name(obj); + int (*probe_fn[])(struct bpf_object *obj) = { + bpf_object__probe_name, + bpf_object__probe_global_data, + }; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { + ret = probe_fn[i](obj); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err, zero = 0; + __u8 *data; + + /* Nothing to do here since kernel already zero-initializes .bss map. */ + if (map->libbpf_type == LIBBPF_MAP_BSS) + return 0; + + data = map->libbpf_type == LIBBPF_MAP_DATA ? + obj->sections.data : obj->sections.rodata; + + err = bpf_map_update_elem(map->fd, &zero, data, 0); + /* Freeze .rodata map as read-only from syscall side. */ + if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { + err = bpf_map_freeze(map->fd); + if (err) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error freezing map(%s) as read-only: %s\n", + map->name, cp); + err = 0; + } + } + return err; } static int @@ -1232,6 +1659,7 @@ bpf_object__create_maps(struct bpf_object *obj) size_t j; err = *pfd; +err_out: cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("failed to create map (name: '%s'): %s\n", map->name, cp); @@ -1239,6 +1667,15 @@ bpf_object__create_maps(struct bpf_object *obj) zclose(obj->maps[j].fd); return err; } + + if (bpf_map__is_internal(map)) { + err = bpf_object__populate_internal_map(obj, map); + if (err < 0) { + zclose(*pfd); + goto err_out; + } + } + pr_debug("create map %s: fd=%d\n", map->name, *pfd); } @@ -1393,21 +1830,29 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) return 0; for (i = 0; i < prog->nr_reloc; i++) { - if (prog->reloc_desc[i].type == RELO_LD64) { + if (prog->reloc_desc[i].type == RELO_LD64 || + prog->reloc_desc[i].type == RELO_DATA) { + bool relo_data = prog->reloc_desc[i].type == RELO_DATA; struct bpf_insn *insns = prog->insns; int insn_idx, map_idx; insn_idx = prog->reloc_desc[i].insn_idx; map_idx = prog->reloc_desc[i].map_idx; - if (insn_idx >= (int)prog->insns_cnt) { + if (insn_idx + 1 >= (int)prog->insns_cnt) { pr_warning("relocation out of range: '%s'\n", prog->section_name); return -LIBBPF_ERRNO__RELOC; } - insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + + if (!relo_data) { + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + } else { + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_VALUE; + insns[insn_idx + 1].imm = insns[insn_idx].imm; + } insns[insn_idx].imm = obj->maps[map_idx].fd; - } else { + } else if (prog->reloc_desc[i].type == RELO_CALL) { err = bpf_program__reloc_text(prog, obj, &prog->reloc_desc[i]); if (err) @@ -1482,6 +1927,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; + int log_buf_size = BPF_LOG_BUF_SIZE; char *log_buf; int ret; @@ -1502,21 +1948,30 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.line_info = prog->line_info; load_attr.line_info_rec_size = prog->line_info_rec_size; load_attr.line_info_cnt = prog->line_info_cnt; + load_attr.log_level = prog->log_level; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; - log_buf = malloc(BPF_LOG_BUF_SIZE); +retry_load: + log_buf = malloc(log_buf_size); if (!log_buf) pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); - ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); if (ret >= 0) { + if (load_attr.log_level) + pr_debug("verifier log:\n%s", log_buf); *pfd = ret; ret = 0; goto out; } + if (errno == ENOSPC) { + log_buf_size <<= 1; + free(log_buf); + goto retry_load; + } ret = -LIBBPF_ERRNO__LOAD; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("load bpf program failed: %s\n", cp); @@ -1681,7 +2136,9 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: return false; case BPF_PROG_TYPE_KPROBE: default: @@ -1717,6 +2174,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, CHECK_ERR(bpf_object__elf_init(obj), err, out); CHECK_ERR(bpf_object__check_endianness(obj), err, out); + CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out); @@ -1810,7 +2268,6 @@ int bpf_object__load(struct bpf_object *obj) obj->loaded = true; - CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__create_maps(obj), err, out); CHECK_ERR(bpf_object__relocate(obj), err, out); CHECK_ERR(bpf_object__load_progs(obj), err, out); @@ -2291,6 +2748,9 @@ void bpf_object__close(struct bpf_object *obj) obj->maps[i].priv = NULL; obj->maps[i].clear_priv = NULL; } + + zfree(&obj->sections.rodata); + zfree(&obj->sections.data); zfree(&obj->maps); obj->nr_maps = 0; @@ -2619,6 +3079,8 @@ static const struct { BPF_CGROUP_UDP4_SENDMSG), BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG), + BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_CGROUP_SYSCTL), }; #undef BPF_PROG_SEC_IMPL @@ -2768,6 +3230,11 @@ bool bpf_map__is_offload_neutral(struct bpf_map *map) return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; } +bool bpf_map__is_internal(struct bpf_map *map) +{ + return map->libbpf_type != LIBBPF_MAP_UNSPEC; +} + void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) { map->map_ifindex = ifindex; @@ -2926,6 +3393,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, bpf_program__set_expected_attach_type(prog, expected_attach_type); + prog->log_level = attr->log_level; if (!first_prog) first_prog = prog; } @@ -2999,3 +3467,249 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, ring_buffer_write_tail(header, data_tail); return ret; } + +struct bpf_prog_info_array_desc { + int array_offset; /* e.g. offset of jited_prog_insns */ + int count_offset; /* e.g. offset of jited_prog_len */ + int size_offset; /* > 0: offset of rec size, + * < 0: fix size of -size_offset + */ +}; + +static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = { + [BPF_PROG_INFO_JITED_INSNS] = { + offsetof(struct bpf_prog_info, jited_prog_insns), + offsetof(struct bpf_prog_info, jited_prog_len), + -1, + }, + [BPF_PROG_INFO_XLATED_INSNS] = { + offsetof(struct bpf_prog_info, xlated_prog_insns), + offsetof(struct bpf_prog_info, xlated_prog_len), + -1, + }, + [BPF_PROG_INFO_MAP_IDS] = { + offsetof(struct bpf_prog_info, map_ids), + offsetof(struct bpf_prog_info, nr_map_ids), + -(int)sizeof(__u32), + }, + [BPF_PROG_INFO_JITED_KSYMS] = { + offsetof(struct bpf_prog_info, jited_ksyms), + offsetof(struct bpf_prog_info, nr_jited_ksyms), + -(int)sizeof(__u64), + }, + [BPF_PROG_INFO_JITED_FUNC_LENS] = { + offsetof(struct bpf_prog_info, jited_func_lens), + offsetof(struct bpf_prog_info, nr_jited_func_lens), + -(int)sizeof(__u32), + }, + [BPF_PROG_INFO_FUNC_INFO] = { + offsetof(struct bpf_prog_info, func_info), + offsetof(struct bpf_prog_info, nr_func_info), + offsetof(struct bpf_prog_info, func_info_rec_size), + }, + [BPF_PROG_INFO_LINE_INFO] = { + offsetof(struct bpf_prog_info, line_info), + offsetof(struct bpf_prog_info, nr_line_info), + offsetof(struct bpf_prog_info, line_info_rec_size), + }, + [BPF_PROG_INFO_JITED_LINE_INFO] = { + offsetof(struct bpf_prog_info, jited_line_info), + offsetof(struct bpf_prog_info, nr_jited_line_info), + offsetof(struct bpf_prog_info, jited_line_info_rec_size), + }, + [BPF_PROG_INFO_PROG_TAGS] = { + offsetof(struct bpf_prog_info, prog_tags), + offsetof(struct bpf_prog_info, nr_prog_tags), + -(int)sizeof(__u8) * BPF_TAG_SIZE, + }, + +}; + +static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offset) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u32)]; + return -(int)offset; +} + +static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, int offset) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u64)]; + return -(int)offset; +} + +static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, + __u32 val) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + array[offset / sizeof(__u32)] = val; +} + +static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, + __u64 val) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + array[offset / sizeof(__u64)] = val; +} + +struct bpf_prog_info_linear * +bpf_program__get_prog_info_linear(int fd, __u64 arrays) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 data_len = 0; + int i, err; + void *ptr; + + if (arrays >> BPF_PROG_INFO_LAST_ARRAY) + return ERR_PTR(-EINVAL); + + /* step 1: get array dimensions */ + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + return ERR_PTR(-EFAULT); + } + + /* step 2: calculate total size of all arrays */ + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + bool include_array = (arrays & (1UL << i)) > 0; + struct bpf_prog_info_array_desc *desc; + __u32 count, size; + + desc = bpf_prog_info_array_desc + i; + + /* kernel is too old to support this field */ + if (info_len < desc->array_offset + sizeof(__u32) || + info_len < desc->count_offset + sizeof(__u32) || + (desc->size_offset > 0 && info_len < desc->size_offset)) + include_array = false; + + if (!include_array) { + arrays &= ~(1UL << i); /* clear the bit */ + continue; + } + + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + + data_len += count * size; + } + + /* step 3: allocate continuous memory */ + data_len = roundup(data_len, sizeof(__u64)); + info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); + if (!info_linear) + return ERR_PTR(-ENOMEM); + + /* step 4: fill data to info_linear->info */ + info_linear->arrays = arrays; + memset(&info_linear->info, 0, sizeof(info)); + ptr = info_linear->data; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u32 count, size; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->count_offset, count); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->size_offset, size); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, + ptr_to_u64(ptr)); + ptr += count * size; + } + + /* step 5: call syscall again to get required arrays */ + err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + free(info_linear); + return ERR_PTR(-EFAULT); + } + + /* step 6: verify the data */ + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u32 v1, v2; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->count_offset); + if (v1 != v2) + pr_warning("%s: mismatch in element count\n", __func__); + + v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->size_offset); + if (v1 != v2) + pr_warning("%s: mismatch in rec size\n", __func__); + } + + /* step 7: update info_len and data_len */ + info_linear->info_len = sizeof(struct bpf_prog_info); + info_linear->data_len = data_len; + + return info_linear; +} + +void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear) +{ + int i; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + addr = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + offs = addr - ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, offs); + } +} + +void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) +{ + int i; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + offs = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + addr = offs + ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, addr); + } +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index b4652aa1a58a..c5ff00515ce7 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -10,6 +10,7 @@ #ifndef __LIBBPF_LIBBPF_H #define __LIBBPF_LIBBPF_H +#include <stdarg.h> #include <stdio.h> #include <stdint.h> #include <stdbool.h> @@ -74,6 +75,10 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, size_t obj_buf_sz, const char *name); +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size); +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off); LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, const char *path); @@ -300,6 +305,7 @@ LIBBPF_API void *bpf_map__priv(struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); +LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); @@ -313,6 +319,7 @@ struct bpf_prog_load_attr { enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; int ifindex; + int log_level; }; LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, @@ -377,6 +384,69 @@ LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); +/* + * Get bpf_prog_info in continuous memory + * + * struct bpf_prog_info has multiple arrays. The user has option to choose + * arrays to fetch from kernel. The following APIs provide an uniform way to + * fetch these data. All arrays in bpf_prog_info are stored in a single + * continuous memory region. This makes it easy to store the info in a + * file. + * + * Before writing bpf_prog_info_linear to files, it is necessary to + * translate pointers in bpf_prog_info to offsets. Helper functions + * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr() + * are introduced to switch between pointers and offsets. + * + * Examples: + * # To fetch map_ids and prog_tags: + * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) | + * (1UL << BPF_PROG_INFO_PROG_TAGS); + * struct bpf_prog_info_linear *info_linear = + * bpf_program__get_prog_info_linear(fd, arrays); + * + * # To save data in file + * bpf_program__bpil_addr_to_offs(info_linear); + * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); + * + * # To read data from file + * read(f, info_linear, <proper_size>); + * bpf_program__bpil_offs_to_addr(info_linear); + */ +enum bpf_prog_info_array { + BPF_PROG_INFO_FIRST_ARRAY = 0, + BPF_PROG_INFO_JITED_INSNS = 0, + BPF_PROG_INFO_XLATED_INSNS, + BPF_PROG_INFO_MAP_IDS, + BPF_PROG_INFO_JITED_KSYMS, + BPF_PROG_INFO_JITED_FUNC_LENS, + BPF_PROG_INFO_FUNC_INFO, + BPF_PROG_INFO_LINE_INFO, + BPF_PROG_INFO_JITED_LINE_INFO, + BPF_PROG_INFO_PROG_TAGS, + BPF_PROG_INFO_LAST_ARRAY, +}; + +struct bpf_prog_info_linear { + /* size of struct bpf_prog_info, when the tool is compiled */ + __u32 info_len; + /* total bytes allocated for data, round up to 8 bytes */ + __u32 data_len; + /* which arrays are included in data */ + __u64 arrays; + struct bpf_prog_info info; + __u8 data[]; +}; + +LIBBPF_API struct bpf_prog_info_linear * +bpf_program__get_prog_info_linear(int fd, __u64 arrays); + +LIBBPF_API void +bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); + +LIBBPF_API void +bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 778a26702a70..673001787cba 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -153,4 +153,14 @@ LIBBPF_0.0.2 { xsk_socket__delete; xsk_umem__fd; xsk_socket__fd; + bpf_program__get_prog_info_linear; + bpf_program__bpil_addr_to_offs; + bpf_program__bpil_offs_to_addr; } LIBBPF_0.0.1; + +LIBBPF_0.0.3 { + global: + bpf_map__is_internal; + bpf_map_freeze; + btf__finalize_data; +} LIBBPF_0.0.2; diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template new file mode 100644 index 000000000000..ac17fcef2108 --- /dev/null +++ b/tools/lib/bpf/libbpf.pc.template @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=${prefix}/include + +Name: libbpf +Description: BPF library +Version: @VERSION@ +Libs: -L${libdir} -lbpf +Requires.private: libelf +Cflags: -I${includedir} diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 8c3a1c04dcb2..a2c64a9ce1a6 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -9,6 +9,7 @@ #include <net/if.h> #include <sys/utsname.h> +#include <linux/btf.h> #include <linux/filter.h> #include <linux/kernel.h> @@ -93,10 +94,12 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_SK_MSG: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_CGROUP_SYSCTL: default: break; } @@ -129,11 +132,65 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) return errno != EINVAL && errno != EOPNOTSUPP; } +static int load_btf(void) +{ +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_TYPE_ENC(name, info, size_or_type) \ + (name), (info), (size_or_type) +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) +#define BTF_MEMBER_ENC(name, type, bits_offset) \ + (name), (type), (bits_offset) + + const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; + /* struct bpf_spin_lock { + * int val; + * }; + * struct val { + * int cnt; + * struct bpf_spin_lock l; + * }; + */ + __u32 btf_raw_types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct bpf_spin_lock */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), + BTF_MEMBER_ENC(15, 1, 0), /* int val; */ + /* struct val */ /* [3] */ + BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ + BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ + }; + struct btf_header btf_hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(btf_raw_types), + .str_off = sizeof(btf_raw_types), + .str_len = sizeof(btf_str_sec), + }; + __u8 raw_btf[sizeof(struct btf_header) + sizeof(btf_raw_types) + + sizeof(btf_str_sec)]; + + memcpy(raw_btf, &btf_hdr, sizeof(btf_hdr)); + memcpy(raw_btf + sizeof(btf_hdr), btf_raw_types, sizeof(btf_raw_types)); + memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types), + btf_str_sec, sizeof(btf_str_sec)); + + return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0); +} + bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) { int key_size, value_size, max_entries, map_flags; + __u32 btf_key_type_id = 0, btf_value_type_id = 0; struct bpf_create_map_attr attr = {}; - int fd = -1, fd_inner; + int fd = -1, btf_fd = -1, fd_inner; key_size = sizeof(__u32); value_size = sizeof(__u32); @@ -159,6 +216,16 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_STACK: key_size = 0; break; + case BPF_MAP_TYPE_SK_STORAGE: + btf_key_type_id = 1; + btf_value_type_id = 3; + value_size = 8; + max_entries = 0; + map_flags = BPF_F_NO_PREALLOC; + btf_fd = load_btf(); + if (btf_fd < 0) + return false; + break; case BPF_MAP_TYPE_UNSPEC: case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: @@ -204,11 +271,18 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) attr.max_entries = max_entries; attr.map_flags = map_flags; attr.map_ifindex = ifindex; + if (btf_fd >= 0) { + attr.btf_fd = btf_fd; + attr.btf_key_type_id = btf_key_type_id; + attr.btf_value_type_id = btf_value_type_id; + } fd = bpf_create_map_xattr(&attr); } if (fd >= 0) close(fd); + if (btf_fd >= 0) + close(btf_fd); return fd >= 0; } diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h index 81ecda0cb9c9..da94c4cb2e4d 100644 --- a/tools/lib/bpf/libbpf_util.h +++ b/tools/lib/bpf/libbpf_util.h @@ -23,6 +23,36 @@ do { \ #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +/* Use these barrier functions instead of smp_[rw]mb() when they are + * used in a libbpf header file. That way they can be built into the + * application that uses libbpf. + */ +#if defined(__i386__) || defined(__x86_64__) +# define libbpf_smp_rmb() asm volatile("" : : : "memory") +# define libbpf_smp_wmb() asm volatile("" : : : "memory") +# define libbpf_smp_mb() \ + asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc") +/* Hinders stores to be observed before older loads. */ +# define libbpf_smp_rwmb() asm volatile("" : : : "memory") +#elif defined(__aarch64__) +# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory") +# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") +# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_rwmb() libbpf_smp_mb() +#elif defined(__arm__) +/* These are only valid for armv7 and above */ +# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") +# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_rwmb() libbpf_smp_mb() +#else +/* Architecture missing native barrier functions. */ +# define libbpf_smp_rmb() __sync_synchronize() +# define libbpf_smp_wmb() __sync_synchronize() +# define libbpf_smp_mb() __sync_synchronize() +# define libbpf_smp_rwmb() __sync_synchronize() +#endif + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index f98ac82c9aea..557ef8d1250d 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -126,8 +126,8 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->frame_headroom = usr_cfg->frame_headroom; } -static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, - const struct xsk_socket_config *usr_cfg) +static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, + const struct xsk_socket_config *usr_cfg) { if (!usr_cfg) { cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; @@ -135,14 +135,19 @@ static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, cfg->libbpf_flags = 0; cfg->xdp_flags = 0; cfg->bind_flags = 0; - return; + return 0; } + if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) + return -EINVAL; + cfg->rx_size = usr_cfg->rx_size; cfg->tx_size = usr_cfg->tx_size; cfg->libbpf_flags = usr_cfg->libbpf_flags; cfg->xdp_flags = usr_cfg->xdp_flags; cfg->bind_flags = usr_cfg->bind_flags; + + return 0; } int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, @@ -254,7 +259,8 @@ out_umem_alloc: static int xsk_load_xdp_prog(struct xsk_socket *xsk) { - char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static const int log_buf_size = 16 * 1024; + char log_buf[log_buf_size]; int err, prog_fd; /* This is the C-program: @@ -303,10 +309,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, - "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf, - BPF_LOG_BUF_SIZE); + "LGPL-2.1 or BSD-2-Clause", 0, log_buf, + log_buf_size); if (prog_fd < 0) { - pr_warning("BPF log buffer:\n%s", bpf_log_buf); + pr_warning("BPF log buffer:\n%s", log_buf); return prog_fd; } @@ -557,7 +563,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } strncpy(xsk->ifname, ifname, IFNAMSIZ); - xsk_set_xdp_socket_config(&xsk->config, usr_config); + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_socket; if (rx) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index a497f00e2962..82ea71a0f3ec 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -16,6 +16,7 @@ #include <linux/if_xdp.h> #include "libbpf.h" +#include "libbpf_util.h" #ifdef __cplusplus extern "C" { @@ -36,6 +37,10 @@ struct name { \ DEFINE_XSK_RING(xsk_ring_prod); DEFINE_XSK_RING(xsk_ring_cons); +/* For a detailed explanation on the memory barriers associated with the + * ring, please take a look at net/xdp/xsk_queue.h. + */ + struct xsk_umem; struct xsk_socket; @@ -105,7 +110,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, size_t nb, __u32 *idx) { - if (unlikely(xsk_prod_nb_free(prod, nb) < nb)) + if (xsk_prod_nb_free(prod, nb) < nb) return 0; *idx = prod->cached_prod; @@ -116,10 +121,10 @@ static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) { - /* Make sure everything has been written to the ring before signalling - * this to the kernel. + /* Make sure everything has been written to the ring before indicating + * this to the kernel by writing the producer pointer. */ - smp_wmb(); + libbpf_smp_wmb(); *prod->producer += nb; } @@ -129,11 +134,11 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, { size_t entries = xsk_cons_nb_avail(cons, nb); - if (likely(entries > 0)) { + if (entries > 0) { /* Make sure we do not speculatively read the data before * we have received the packet buffers from the ring. */ - smp_rmb(); + libbpf_smp_rmb(); *idx = cons->cached_cons; cons->cached_cons += entries; @@ -144,6 +149,11 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) { + /* Make sure data has been read before indicating we are done + * with the entries by updating the consumer pointer. + */ + libbpf_smp_rwmb(); + *cons->consumer += nb; } diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 87494c7c619d..981c6ce2da2c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2233,7 +2233,7 @@ eval_type_str(unsigned long long val, const char *type, int pointer) return val & 0xffffffff; if (strcmp(type, "u64") == 0 || - strcmp(type, "s64")) + strcmp(type, "s64") == 0) return val; if (strcmp(type, "s8") == 0) diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index c9d038f91af6..53f8be0f4a1f 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -25,14 +25,17 @@ LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a OBJTOOL := $(OUTPUT)objtool OBJTOOL_IN := $(OBJTOOL)-in.o +LIBELF_FLAGS := $(shell pkg-config libelf --cflags 2>/dev/null) +LIBELF_LIBS := $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf) + all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/objtool/arch/$(ARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -CFLAGS += -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) -LDFLAGS += -lelf $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) +CFLAGS += -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS) +LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) # Allow old libelf to be used: elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0414a0d52262..479196aeb409 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -165,6 +165,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "fortify_panic", "usercopy_abort", "machine_real_restart", + "rewind_stack_do_exit", }; if (func->bind == STB_WEAK) @@ -2184,9 +2185,10 @@ static void cleanup(struct objtool_file *file) elf_close(file->elf); } +static struct objtool_file file; + int check(const char *_objname, bool orc) { - struct objtool_file file; int ret, warnings = 0; objname = _objname; diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt index f6fc6507ba55..3766886c4bca 100644 --- a/tools/perf/Documentation/Build.txt +++ b/tools/perf/Documentation/Build.txt @@ -47,3 +47,27 @@ Those objects are then used in final linking: NOTE this description is omitting other libraries involved, only focusing on build framework outcomes + +3) Build with ASan or UBSan +========================== + $ cd tools/perf + $ make DESTDIR=/usr + $ make DESTDIR=/usr install + +AddressSanitizer (or ASan) is a GCC feature that detects memory corruption bugs +such as buffer overflows and memory leaks. + + $ cd tools/perf + $ make DEBUG=1 EXTRA_CFLAGS='-fno-omit-frame-pointer -fsanitize=address' + $ ASAN_OPTIONS=log_path=asan.log ./perf record -a + +ASan outputs all detected issues into a log file named 'asan.log.<pid>'. + +UndefinedBehaviorSanitizer (or UBSan) is a fast undefined behavior detector +supported by GCC. UBSan detects undefined behaviors of programs at runtime. + + $ cd tools/perf + $ make DEBUG=1 EXTRA_CFLAGS='-fno-omit-frame-pointer -fsanitize=undefined' + $ UBSAN_OPTIONS=print_stacktrace=1 ./perf record -a + +If UBSan detects any problem at runtime, it outputs a “runtime error:” message. diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 86f3dcc15f83..462b3cde0675 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -114,7 +114,7 @@ Given a $HOME/.perfconfig like this: [report] # Defaults - sort-order = comm,dso,symbol + sort_order = comm,dso,symbol percent-limit = 0 queue-size = 0 children = true @@ -584,6 +584,20 @@ llvm.*:: llvm.opts:: Options passed to llc. +samples.*:: + + samples.context:: + Define how many ns worth of time to show + around samples in perf report sample context browser. + +scripts.*:: + + Any option defines a script that is added to the scripts menu + in the interactive perf browser and whose output is displayed. + The name of the option is the name, the value is a script command line. + The script gets the same options passed as a full perf script, + in particular -i perfdata file, --cpu, --tid + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 8f0c2be34848..8fe4dffcadd0 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -495,6 +495,10 @@ overhead. You can still switch them on with: --switch-output --no-no-buildid --no-no-buildid-cache +--switch-max-files=N:: + +When rotating perf.data with --switch-output, only keep N files. + --dry-run:: Parse options then exit. --dry-run can be used to detect errors in cmdline options. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 1a27bfe05039..f441baa794ce 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -105,6 +105,8 @@ OPTIONS guest machine - sample: Number of sample - period: Raw number of event count of sample + - time: Separate the samples by time stamp with the resolution specified by + --time-quantum (default 100ms). Specify with overhead and before it. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) @@ -459,6 +461,10 @@ include::itrace.txt[] --socket-filter:: Only report the samples on the processor socket that match with this filter +--samples=N:: + Save N individual samples for each histogram entry to show context in perf + report tui browser. + --raw-trace:: When displaying traceevent output, do not use print fmt or plugins. @@ -477,6 +483,9 @@ include::itrace.txt[] Please note that not all mmaps are stored, options affecting which ones are include 'perf record --data', for instance. +--ns:: + Show time stamps in nanoseconds. + --stats:: Display overall events statistics without any further processing. (like the one at the end of the perf report -D command) @@ -494,6 +503,10 @@ include::itrace.txt[] The period/hits keywords set the base the percentage is computed on - the samples period or the number of samples (hits). +--time-quantum:: + Configure time quantum for time sort key. Default 100ms. + Accepts s, us, ms, ns units. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2e19fd7ffe35..9b0d04dd2a61 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -380,6 +380,9 @@ include::itrace.txt[] Set the maximum number of program blocks to print with brstackasm for each sample. +--reltime:: + Print time stamps relative to trace start. + --per-event-dump:: Create per event files with a "perf.data.EVENT.dump" name instead of printing to stdout, useful, for instance, for generating flamegraphs. diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4bc2085e5197..39c05f89104e 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -72,9 +72,8 @@ report:: --all-cpus:: system-wide collection from all CPUs (default if no target is specified) --c:: ---scale:: - scale/normalize counter values +--no-scale:: + Don't scale/normalize counter values -d:: --detailed:: diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 849599f39c5e..869965d629ce 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -15,6 +15,7 @@ To see callchains in a more compact form: perf report -g folded Show individual samples with: perf script Limit to show entries above 5% only: perf report --percent-limit 5 Profiling branch (mis)predictions with: perf record -b / perf report +To show assembler sample contexts use perf record -b / perf script -F +brstackinsn --xed Treat branches as callchains: perf report --branch-history To count events in every 1000 msec: perf stat -I 1000 Print event counts in CSV format with: perf stat -x, @@ -34,3 +35,9 @@ Show current config key-value pairs: perf config --list Show user configuration overrides: perf config --user --list To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node` To report cacheline events from previous recording: perf c2c report +To browse sample contexts use perf report --sample 10 and select in context menu +To separate samples by time use perf report --sort time,overhead,sym +To set sample time separation other than 100ms with --sort time use --time-quantum +Add -I to perf report to sample register values visible in perf report context. +To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context +To show context switches in perf report sample context add --switch-events to perf record. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 0f11d5891301..fe3f97e342fa 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -227,6 +227,8 @@ FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt +FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes + CFLAGS += -fno-omit-frame-pointer CFLAGS += -ggdb3 CFLAGS += -funwind-tables @@ -713,7 +715,7 @@ else endif ifeq ($(feature-libbfd), 1) - EXTLIBS += -lbfd + EXTLIBS += -lbfd -lopcodes else # we are on a system that requires -liberty and (maybe) -lz # to link against -lbfd; test each case individually here @@ -724,12 +726,15 @@ else $(call feature_check,libbfd-liberty-z) ifeq ($(feature-libbfd-liberty), 1) - EXTLIBS += -lbfd -liberty + EXTLIBS += -lbfd -lopcodes -liberty + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl else ifeq ($(feature-libbfd-liberty-z), 1) - EXTLIBS += -lbfd -liberty -lz + EXTLIBS += -lbfd -lopcodes -liberty -lz + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl endif endif + $(call feature_check,disassembler-four-args) endif ifdef NO_DEMANGLE @@ -808,6 +813,10 @@ ifdef HAVE_KVM_STAT_SUPPORT CFLAGS += -DHAVE_KVM_STAT_SUPPORT endif +ifeq ($(feature-disassembler-four-args), 1) + CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE +endif + ifeq (${IS_64_BIT}, 1) ifndef NO_PERF_READ_VDSO32 $(call feature_check,compile-32) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 01f7555fd933..e8c9f77e9010 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -481,8 +481,8 @@ $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_t mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh -$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl) - $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ +$(mmap_flags_array): $(linux_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl) + $(Q)$(SHELL) '$(mmap_flags_tbl)' $(linux_uapi_dir) $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ mount_flags_array := $(beauty_outdir)/mount_flags_array.c mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index f0b1709a5ffb..92ee0b4378d4 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -343,6 +343,12 @@ 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq +# don't use numbers 387 through 423, add new calls after the last +# 'common' entry +424 common pidfd_send_signal __x64_sys_pidfd_send_signal +425 common io_uring_setup __x64_sys_io_uring_setup +426 common io_uring_enter __x64_sys_io_uring_enter +427 common io_uring_register __x64_sys_io_uring_register # # x32-specific system call numbers start at 512 to avoid cache impact @@ -361,7 +367,7 @@ 520 x32 execve __x32_compat_sys_execve/ptregs 521 x32 ptrace __x32_compat_sys_ptrace 522 x32 rt_sigpending __x32_compat_sys_rt_sigpending -523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait +523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64 524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo 525 x32 sigaltstack __x32_compat_sys_sigaltstack 526 x32 timer_create __x32_compat_sys_timer_create @@ -375,7 +381,7 @@ 534 x32 preadv __x32_compat_sys_preadv64 535 x32 pwritev __x32_compat_sys_pwritev64 536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo -537 x32 recvmmsg __x32_compat_sys_recvmmsg +537 x32 recvmmsg __x32_compat_sys_recvmmsg_time64 538 x32 sendmmsg __x32_compat_sys_sendmmsg 539 x32 process_vm_readv __x32_compat_sys_process_vm_readv 540 x32 process_vm_writev __x32_compat_sys_process_vm_writev diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 7aab0be5fc5f..47f9c56e744f 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -14,5 +14,6 @@ perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += archinsn.o perf-$(CONFIG_AUXTRACE) += intel-pt.o perf-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c new file mode 100644 index 000000000000..4237bb2e7fa2 --- /dev/null +++ b/tools/perf/arch/x86/util/archinsn.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "perf.h" +#include "archinsn.h" +#include "util/intel-pt-decoder/insn.h" +#include "machine.h" +#include "thread.h" +#include "symbol.h" + +void arch_fetch_insn(struct perf_sample *sample, + struct thread *thread, + struct machine *machine) +{ + struct insn insn; + int len; + bool is64bit = false; + + if (!sample->ip) + return; + len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit); + if (len <= 0) + return; + insn_init(&insn, sample->insn, len, is64bit); + insn_get_length(&insn); + if (insn_complete(&insn) && insn.length <= len) + sample->insn_len = insn.length; +} diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 0c0a6e824934..2af067859966 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -224,7 +224,7 @@ static int do_threads(struct worker *worker, struct cpu_map *cpu) pthread_attr_t thread_attr, *attrp = NULL; cpu_set_t cpuset; unsigned int i, j; - int ret; + int ret = 0; if (!noaffinity) pthread_attr_init(&thread_attr); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 5a11534e96a0..fe85448abd45 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -293,7 +293,7 @@ static int do_threads(struct worker *worker, struct cpu_map *cpu) pthread_attr_t thread_attr, *attrp = NULL; cpu_set_t cpuset; unsigned int i, j; - int ret, events = EPOLLIN; + int ret = 0, events = EPOLLIN; if (oneshot) events |= EPOLLONESHOT; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index c9f98d00c0e9..a8394b4f1167 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv) details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, NULL, raw_dump, details_flag); + metricgroup__print(true, true, s, raw_dump, details_flag); free(s); } } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f3f7f3100336..4e2d953d4bc5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -62,6 +62,9 @@ struct switch_output { unsigned long time; const char *str; bool set; + char **filenames; + int num_files; + int cur_file; }; struct record { @@ -392,7 +395,7 @@ static int record__process_auxtrace(struct perf_tool *tool, size_t padding; u8 pad[8] = {0}; - if (!perf_data__is_pipe(data)) { + if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) { off_t file_offset; int fd = perf_data__fd(data); int err; @@ -837,6 +840,8 @@ static void record__init_features(struct record *rec) if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) perf_header__clear_feat(&session->header, HEADER_CLOCKID); + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); + perf_header__clear_feat(&session->header, HEADER_STAT); } @@ -890,6 +895,7 @@ record__switch_output(struct record *rec, bool at_exit) { struct perf_data *data = &rec->data; int fd, err; + char *new_filename; /* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp"; @@ -910,7 +916,7 @@ record__switch_output(struct record *rec, bool at_exit) fd = perf_data__switch(data, timestamp, rec->session->header.data_offset, - at_exit); + at_exit, &new_filename); if (fd >= 0 && !at_exit) { rec->bytes_written = 0; rec->session->header.data_size = 0; @@ -920,6 +926,21 @@ record__switch_output(struct record *rec, bool at_exit) fprintf(stderr, "[ perf record: Dump %s.%s ]\n", data->path, timestamp); + if (rec->switch_output.num_files) { + int n = rec->switch_output.cur_file + 1; + + if (n >= rec->switch_output.num_files) + n = 0; + rec->switch_output.cur_file = n; + if (rec->switch_output.filenames[n]) { + remove(rec->switch_output.filenames[n]); + free(rec->switch_output.filenames[n]); + } + rec->switch_output.filenames[n] = new_filename; + } else { + free(new_filename); + } + /* Output tracking events */ if (!at_exit) { record__synthesize(rec, false); @@ -1093,7 +1114,7 @@ static int record__synthesize(struct record *rec, bool tail) return err; } - err = perf_event__synthesize_bpf_events(tool, process_synthesized_event, + err = perf_event__synthesize_bpf_events(session, process_synthesized_event, machine, opts); if (err < 0) pr_warning("Couldn't synthesize bpf events.\n"); @@ -1116,6 +1137,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data *data = &rec->data; struct perf_session *session; bool disabled = false, draining = false; + struct perf_evlist *sb_evlist = NULL; int fd; atexit(record__sig_exit); @@ -1216,6 +1238,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + if (!opts->no_bpf_event) + bpf_event__add_sb_event(&sb_evlist, &session->header.env); + + if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { + pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); + opts->no_bpf_event = true; + } + err = record__synthesize(rec, false); if (err < 0) goto out_child; @@ -1466,6 +1496,9 @@ out_child: out_delete_session: perf_session__delete(session); + + if (!opts->no_bpf_event) + perf_evlist__stop_sb_thread(sb_evlist); return status; } @@ -1870,7 +1903,7 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"), OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), - OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"), + OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"), OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", @@ -1968,9 +2001,11 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, "Record timestamp boundary (time of first/last samples)"), OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, - &record.switch_output.set, "signal,size,time", - "Switch output when receive SIGUSR2 or cross size,time threshold", + &record.switch_output.set, "signal or size[BKMG] or time[smhd]", + "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", "signal"), + OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, + "Limit number of switch output generated files"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), #ifdef HAVE_AIO_SUPPORT @@ -2057,6 +2092,13 @@ int cmd_record(int argc, const char **argv) alarm(rec->switch_output.time); } + if (rec->switch_output.num_files) { + rec->switch_output.filenames = calloc(sizeof(char *), + rec->switch_output.num_files); + if (!rec->switch_output.filenames) + return -EINVAL; + } + /* * Allow aliases to facilitate the lookup of symbols for address * filters. Refer to auxtrace_parse_filters(). diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ee93c18a6685..4054eb1f98ac 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -47,9 +47,11 @@ #include <errno.h> #include <inttypes.h> #include <regex.h> +#include "sane_ctype.h" #include <signal.h> #include <linux/bitmap.h> #include <linux/stringify.h> +#include <linux/time64.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -926,6 +928,43 @@ report_parse_callchain_opt(const struct option *opt, const char *arg, int unset) return parse_callchain_report_opt(arg); } +static int +parse_time_quantum(const struct option *opt, const char *arg, + int unset __maybe_unused) +{ + unsigned long *time_q = opt->value; + char *end; + + *time_q = strtoul(arg, &end, 0); + if (end == arg) + goto parse_err; + if (*time_q == 0) { + pr_err("time quantum cannot be 0"); + return -1; + } + while (isspace(*end)) + end++; + if (*end == 0) + return 0; + if (!strcmp(end, "s")) { + *time_q *= NSEC_PER_SEC; + return 0; + } + if (!strcmp(end, "ms")) { + *time_q *= NSEC_PER_MSEC; + return 0; + } + if (!strcmp(end, "us")) { + *time_q *= NSEC_PER_USEC; + return 0; + } + if (!strcmp(end, "ns")) + return 0; +parse_err: + pr_err("Cannot parse time quantum `%s'\n", arg); + return -1; +} + int report_parse_ignore_callees_opt(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused) @@ -1044,10 +1083,9 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN(0, "header-only", &report.header_only, "Show only data header."), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..." - " Please refer the man page for the complete list."), + sort_help("sort by key(s):")), OPT_STRING('F', "fields", &field_order, "key[,keys...]", - "output field(s): overhead, period, sample plus all of sort keys"), + sort_help("output field(s): overhead period sample ")), OPT_BOOLEAN(0, "show-cpu-utilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_BOOLEAN_FLAG(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, @@ -1120,6 +1158,8 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"), + OPT_INTEGER(0, "samples", &symbol_conf.res_sample, + "Number of samples to save per histogram entry for individual browsing"), OPT_CALLBACK(0, "percent-limit", &report, "percent", "Don't show entries under that percent", parse_percent_limit), OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", @@ -1147,6 +1187,10 @@ int cmd_report(int argc, const char **argv) OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period", "Set percent type local/global-period/hits", annotate_parse_percent_type), + OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Show times in nanosecs"), + OPT_CALLBACK(0, "time-quantum", &symbol_conf.time_quantum, "time (ms|us|ns|s)", + "Set time quantum for time sort key (default 100ms)", + parse_time_quantum), OPT_END() }; struct perf_data data = { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 53f78cf3113f..61cfd8f70989 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -29,10 +29,12 @@ #include "util/time-utils.h" #include "util/path.h" #include "print_binary.h" +#include "archinsn.h" #include <linux/bitmap.h> #include <linux/kernel.h> #include <linux/stringify.h> #include <linux/time64.h> +#include <sys/utsname.h> #include "asm/bug.h" #include "util/mem-events.h" #include "util/dump-insn.h" @@ -51,6 +53,8 @@ static char const *script_name; static char const *generate_script_lang; +static bool reltime; +static u64 initial_time; static bool debug_mode; static u64 last_timestamp; static u64 nr_unordered; @@ -58,11 +62,11 @@ static bool no_callchain; static bool latency_format; static bool system_wide; static bool print_flags; -static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); static struct perf_stat_config stat_config; static int max_blocks; +static bool native_arch; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; @@ -684,15 +688,21 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, } if (PRINT_FIELD(TIME)) { - nsecs = sample->time; + u64 t = sample->time; + if (reltime) { + if (!initial_time) + initial_time = sample->time; + t = sample->time - initial_time; + } + nsecs = t; secs = nsecs / NSEC_PER_SEC; nsecs -= secs * NSEC_PER_SEC; - if (nanosecs) + if (symbol_conf.nanosecs) printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs); else { char sample_time[32]; - timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time)); + timestamp__scnprintf_usec(t, sample_time, sizeof(sample_time)); printed += fprintf(fp, "%12s: ", sample_time); } } @@ -1227,6 +1237,12 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, return len + dlen; } +__weak void arch_fetch_insn(struct perf_sample *sample __maybe_unused, + struct thread *thread __maybe_unused, + struct machine *machine __maybe_unused) +{ +} + static int perf_sample__fprintf_insn(struct perf_sample *sample, struct perf_event_attr *attr, struct thread *thread, @@ -1234,9 +1250,12 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, { int printed = 0; + if (sample->insn_len == 0 && native_arch) + arch_fetch_insn(sample, thread, machine); + if (PRINT_FIELD(INSNLEN)) printed += fprintf(fp, " ilen: %d", sample->insn_len); - if (PRINT_FIELD(INSN)) { + if (PRINT_FIELD(INSN) && sample->insn_len) { int i; printed += fprintf(fp, " insn:"); @@ -1922,6 +1941,13 @@ static int cleanup_scripting(void) return scripting_ops ? scripting_ops->stop_script() : 0; } +static bool filter_cpu(struct perf_sample *sample) +{ + if (cpu_list) + return !test_bit(sample->cpu, cpu_bitmap); + return false; +} + static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -1956,7 +1982,7 @@ static int process_sample_event(struct perf_tool *tool, if (al.filtered) goto out_put; - if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) + if (filter_cpu(sample)) goto out_put; if (scripting_ops) @@ -2041,9 +2067,11 @@ static int process_comm_event(struct perf_tool *tool, sample->tid = event->comm.tid; sample->pid = event->comm.pid; } - perf_sample__fprintf_start(sample, thread, evsel, + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, PERF_RECORD_COMM, stdout); - perf_event__fprintf(event, stdout); + perf_event__fprintf(event, stdout); + } ret = 0; out: thread__put(thread); @@ -2077,9 +2105,11 @@ static int process_namespaces_event(struct perf_tool *tool, sample->tid = event->namespaces.tid; sample->pid = event->namespaces.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_NAMESPACES, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_NAMESPACES, stdout); + perf_event__fprintf(event, stdout); + } ret = 0; out: thread__put(thread); @@ -2111,9 +2141,11 @@ static int process_fork_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_FORK, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_FORK, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; @@ -2141,9 +2173,11 @@ static int process_exit_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_EXIT, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_EXIT, stdout); + perf_event__fprintf(event, stdout); + } if (perf_event__process_exit(tool, event, sample, machine) < 0) err = -1; @@ -2177,9 +2211,11 @@ static int process_mmap_event(struct perf_tool *tool, sample->tid = event->mmap.tid; sample->pid = event->mmap.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2209,9 +2245,11 @@ static int process_mmap2_event(struct perf_tool *tool, sample->tid = event->mmap2.tid; sample->pid = event->mmap2.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP2, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP2, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2236,9 +2274,11 @@ static int process_switch_event(struct perf_tool *tool, return -1; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_SWITCH, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SWITCH, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2259,9 +2299,11 @@ process_lost_event(struct perf_tool *tool, if (thread == NULL) return -1; - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_LOST, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_LOST, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2948,7 +2990,8 @@ static int check_ev_match(char *dir_name, char *scriptname, * will list all statically runnable scripts, select one, execute it and * show the output in a perf browser. */ -int find_scripts(char **scripts_array, char **scripts_path_array) +int find_scripts(char **scripts_array, char **scripts_path_array, int num, + int pathlen) { struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; @@ -2993,7 +3036,10 @@ int find_scripts(char **scripts_array, char **scripts_path_array) /* Skip those real time scripts: xxxtop.p[yl] */ if (strstr(script_dirent->d_name, "top.")) continue; - sprintf(scripts_path_array[i], "%s/%s", lang_path, + if (i >= num) + break; + snprintf(scripts_path_array[i], pathlen, "%s/%s", + lang_path, script_dirent->d_name); temp = strchr(script_dirent->d_name, '.'); snprintf(scripts_array[i], @@ -3232,7 +3278,7 @@ static int parse_insn_trace(const struct option *opt __maybe_unused, { parse_output_fields(NULL, "+insn,-event,-period", 0); itrace_parse_synth_opts(opt, "i0ns", 0); - nanosecs = true; + symbol_conf.nanosecs = true; return 0; } @@ -3250,7 +3296,7 @@ static int parse_call_trace(const struct option *opt __maybe_unused, { parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0); itrace_parse_synth_opts(opt, "cewp", 0); - nanosecs = true; + symbol_conf.nanosecs = true; return 0; } @@ -3260,7 +3306,7 @@ static int parse_callret_trace(const struct option *opt __maybe_unused, { parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0); itrace_parse_synth_opts(opt, "crewp", 0); - nanosecs = true; + symbol_conf.nanosecs = true; return 0; } @@ -3277,6 +3323,7 @@ int cmd_script(int argc, const char **argv) .set = false, .default_no_sample = true, }; + struct utsname uts; char *script_path = NULL; const char **__argv; int i, j, err = 0; @@ -3374,6 +3421,7 @@ int cmd_script(int argc, const char **argv) "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), + OPT_BOOLEAN(0, "reltime", &reltime, "Show time stamps relative to start"), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -3395,7 +3443,7 @@ int cmd_script(int argc, const char **argv) OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), OPT_INTEGER(0, "max-blocks", &max_blocks, "Maximum number of code blocks to dump with brstackinsn"), - OPT_BOOLEAN(0, "ns", &nanosecs, + OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options\n" ITRACE_HELP, @@ -3448,6 +3496,11 @@ int cmd_script(int argc, const char **argv) } } + if (script.time_str && reltime) { + fprintf(stderr, "Don't combine --reltime with --time\n"); + return -1; + } + if (itrace_synth_opts.callchain && itrace_synth_opts.callchain_sz > scripting_max_stack) scripting_max_stack = itrace_synth_opts.callchain_sz; @@ -3615,6 +3668,12 @@ int cmd_script(int argc, const char **argv) if (symbol__init(&session->header.env) < 0) goto out_delete; + uname(&uts); + if (!strcmp(uts.machine, session->header.env.arch) || + (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386"))) + native_arch = true; + script.session = session; script__setup_sample_type(&script); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7b8f09b0b8bf..c3625ec374e0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -718,7 +718,8 @@ static struct option stat_options[] = { "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), - OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), + OPT_BOOLEAN(0, "scale", &stat_config.scale, + "Use --no-scale to disable counter scaling for multiplexing"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &stat_config.run_count, @@ -1307,6 +1308,7 @@ static void init_features(struct perf_session *session) for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) perf_header__set_feat(&session->header, feat); + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); perf_header__clear_feat(&session->header, HEADER_BUILD_ID); perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 231a90daa958..fbbb0da43abb 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1189,30 +1189,26 @@ static int __cmd_top(struct perf_top *top) pthread_t thread, thread_process; int ret; - top->session = perf_session__new(NULL, false, NULL); - if (top->session == NULL) - return -1; - if (!top->annotation_opts.objdump_path) { ret = perf_env__lookup_objdump(&top->session->header.env, &top->annotation_opts.objdump_path); if (ret) - goto out_delete; + return ret; } ret = callchain_param__setup_sample_type(&callchain_param); if (ret) - goto out_delete; + return ret; if (perf_session__register_idle_thread(top->session) < 0) - goto out_delete; + return ret; if (top->nr_threads_synthesize > 1) perf_set_multithreaded(); init_process_thread(top); - ret = perf_event__synthesize_bpf_events(&top->tool, perf_event__process, + ret = perf_event__synthesize_bpf_events(top->session, perf_event__process, &top->session->machines.host, &top->record_opts); if (ret < 0) @@ -1227,13 +1223,18 @@ static int __cmd_top(struct perf_top *top) if (perf_hpp_list.socket) { ret = perf_env__read_cpu_topology_map(&perf_env); - if (ret < 0) - goto out_err_cpu_topo; + if (ret < 0) { + char errbuf[BUFSIZ]; + const char *err = str_error_r(-ret, errbuf, sizeof(errbuf)); + + ui__error("Could not read the CPU topology map: %s\n", err); + return ret; + } } ret = perf_top__start_counters(top); if (ret) - goto out_delete; + return ret; top->session->evlist = top->evlist; perf_session__set_id_hdr_size(top->session); @@ -1252,7 +1253,7 @@ static int __cmd_top(struct perf_top *top) ret = -1; if (pthread_create(&thread_process, NULL, process_thread, top)) { ui__error("Could not create process thread.\n"); - goto out_delete; + return ret; } if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : @@ -1296,19 +1297,7 @@ out_join: out_join_thread: pthread_cond_signal(&top->qe.cond); pthread_join(thread_process, NULL); -out_delete: - perf_session__delete(top->session); - top->session = NULL; - return ret; - -out_err_cpu_topo: { - char errbuf[BUFSIZ]; - const char *err = str_error_r(-ret, errbuf, sizeof(errbuf)); - - ui__error("Could not read the CPU topology map: %s\n", err); - goto out_delete; -} } static int @@ -1388,6 +1377,7 @@ int cmd_top(int argc, const char **argv) * */ .overwrite = 0, .sample_time = true, + .sample_time_set = true, }, .max_stack = sysctl__max_stack(), .annotation_opts = annotation__default_options, @@ -1480,6 +1470,7 @@ int cmd_top(int argc, const char **argv) "Display raw encoding of assembly instructions (default)"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), + OPT_BOOLEAN(0, "no-bpf-event", &top.record_opts.no_bpf_event, "do not record bpf events"), OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style", @@ -1511,6 +1502,7 @@ int cmd_top(int argc, const char **argv) "number of thread to run event synthesize"), OPT_END() }; + struct perf_evlist *sb_evlist = NULL; const char * const top_usage[] = { "perf top [<options>]", NULL @@ -1628,8 +1620,9 @@ int cmd_top(int argc, const char **argv) annotation_config__init(); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); - if (symbol__init(NULL) < 0) - return -1; + status = symbol__init(NULL); + if (status < 0) + goto out_delete_evlist; sort__setup_elide(stdout); @@ -1639,10 +1632,28 @@ int cmd_top(int argc, const char **argv) signal(SIGWINCH, winch_sig); } + top.session = perf_session__new(NULL, false, NULL); + if (top.session == NULL) { + status = -1; + goto out_delete_evlist; + } + + if (!top.record_opts.no_bpf_event) + bpf_event__add_sb_event(&sb_evlist, &perf_env); + + if (perf_evlist__start_sb_thread(sb_evlist, target)) { + pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); + opts->no_bpf_event = true; + } + status = __cmd_top(&top); + if (!opts->no_bpf_event) + perf_evlist__stop_sb_thread(sb_evlist); + out_delete_evlist: perf_evlist__delete(top.evlist); + perf_session__delete(top.session); return status; } diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 05745f3ce912..999fe9170122 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -40,5 +40,6 @@ int cmd_mem(int argc, const char **argv); int cmd_data(int argc, const char **argv); int cmd_ftrace(int argc, const char **argv); -int find_scripts(char **scripts_array, char **scripts_path_array); +int find_scripts(char **scripts_array, char **scripts_path_array, int num, + int pathlen); #endif diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 7b55613924de..c68ee06cae63 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -103,7 +103,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' -check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"' +check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' # diff non-symmetric files diff --git a/tools/perf/perf.c b/tools/perf/perf.c index a11cb006f968..72df4b6fa36f 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -298,6 +298,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) use_pager = 1; commit_pager_choice(); + perf_env__init(&perf_env); perf_env__set_cmdline(&perf_env, argc, argv); status = p->fn(argc, argv); perf_config__exit(); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index b120e547ddc7..c59743def8d3 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -66,7 +66,7 @@ struct record_opts { bool ignore_missing_thread; bool strict_freq; bool sample_id; - bool bpf_event; + bool no_bpf_event; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json index 704302c3e67d..9dc2f6b70354 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json @@ -348,18 +348,6 @@ "PublicDescription": "" }, {, - "EventCode": "0x517082", - "EventName": "PM_CO_DISP_FAIL", - "BriefDescription": "CO dispatch failed due to all CO machines being busy", - "PublicDescription": "" - }, - {, - "EventCode": "0x527084", - "EventName": "PM_CO_TM_SC_FOOTPRINT", - "BriefDescription": "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3)", - "PublicDescription": "" - }, - {, "EventCode": "0x3608a", "EventName": "PM_CO_USAGE", "BriefDescription": "Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running", @@ -1578,36 +1566,12 @@ "PublicDescription": "" }, {, - "EventCode": "0x617082", - "EventName": "PM_ISIDE_DISP", - "BriefDescription": "All i-side dispatch attempts", - "PublicDescription": "" - }, - {, - "EventCode": "0x627084", - "EventName": "PM_ISIDE_DISP_FAIL", - "BriefDescription": "All i-side dispatch attempts that failed due to a addr collision with another machine", - "PublicDescription": "" - }, - {, - "EventCode": "0x627086", - "EventName": "PM_ISIDE_DISP_FAIL_OTHER", - "BriefDescription": "All i-side dispatch attempts that failed due to a reason other than addrs collision", - "PublicDescription": "" - }, - {, "EventCode": "0x4608e", "EventName": "PM_ISIDE_L2MEMACC", "BriefDescription": "valid when first beat of data comes in for an i-side fetch where data came from mem(or L4)", "PublicDescription": "" }, {, - "EventCode": "0x44608e", - "EventName": "PM_ISIDE_MRU_TOUCH", - "BriefDescription": "Iside L2 MRU touch", - "PublicDescription": "" - }, - {, "EventCode": "0x30ac", "EventName": "PM_ISU_REF_FX0", "BriefDescription": "FX0 ISU reject", @@ -1734,222 +1698,36 @@ "PublicDescription": "" }, {, - "EventCode": "0x417080", - "EventName": "PM_L2_CASTOUT_MOD", - "BriefDescription": "L2 Castouts - Modified (M, Mu, Me)", - "PublicDescription": "" - }, - {, - "EventCode": "0x417082", - "EventName": "PM_L2_CASTOUT_SHR", - "BriefDescription": "L2 Castouts - Shared (T, Te, Si, S)", - "PublicDescription": "" - }, - {, "EventCode": "0x27084", "EventName": "PM_L2_CHIP_PUMP", "BriefDescription": "RC requests that were local on chip pump attempts", "PublicDescription": "" }, {, - "EventCode": "0x427086", - "EventName": "PM_L2_DC_INV", - "BriefDescription": "Dcache invalidates from L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x44608c", - "EventName": "PM_L2_DISP_ALL_L2MISS", - "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2miss", - "PublicDescription": "" - }, - {, "EventCode": "0x27086", "EventName": "PM_L2_GROUP_PUMP", "BriefDescription": "RC requests that were on Node Pump attempts", "PublicDescription": "" }, {, - "EventCode": "0x626084", - "EventName": "PM_L2_GRP_GUESS_CORRECT", - "BriefDescription": "L2 guess grp and guess was correct (data intra-6chip AND ^on-chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x626086", - "EventName": "PM_L2_GRP_GUESS_WRONG", - "BriefDescription": "L2 guess grp and guess was not correct (ie data on-chip OR beyond-6chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x427084", - "EventName": "PM_L2_IC_INV", - "BriefDescription": "Icache Invalidates from L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x436088", - "EventName": "PM_L2_INST", - "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)", - "PublicDescription": "" - }, - {, - "EventCode": "0x43608a", - "EventName": "PM_L2_INST_MISS", - "BriefDescription": "All successful i-side dispatches that were an L2miss for this thread (excludes i_l2mru_tch reqs)", - "PublicDescription": "" - }, - {, - "EventCode": "0x416080", - "EventName": "PM_L2_LD", - "BriefDescription": "All successful D-side Load dispatches for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x437088", - "EventName": "PM_L2_LD_DISP", - "BriefDescription": "All successful load dispatches", - "PublicDescription": "" - }, - {, - "EventCode": "0x43708a", - "EventName": "PM_L2_LD_HIT", - "BriefDescription": "All successful load dispatches that were L2 hits", - "PublicDescription": "" - }, - {, - "EventCode": "0x426084", - "EventName": "PM_L2_LD_MISS", - "BriefDescription": "All successful D-Side Load dispatches that were an L2miss for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x616080", - "EventName": "PM_L2_LOC_GUESS_CORRECT", - "BriefDescription": "L2 guess loc and guess was correct (ie data local)", - "PublicDescription": "" - }, - {, - "EventCode": "0x616082", - "EventName": "PM_L2_LOC_GUESS_WRONG", - "BriefDescription": "L2 guess loc and guess was not correct (ie data not on chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x516080", - "EventName": "PM_L2_RCLD_DISP", - "BriefDescription": "L2 RC load dispatch attempt", - "PublicDescription": "" - }, - {, - "EventCode": "0x516082", - "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR", - "BriefDescription": "L2 RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", - "PublicDescription": "" - }, - {, - "EventCode": "0x526084", - "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER", - "BriefDescription": "L2 RC load dispatch attempt failed due to other reasons", - "PublicDescription": "" - }, - {, - "EventCode": "0x536088", - "EventName": "PM_L2_RCST_DISP", - "BriefDescription": "L2 RC store dispatch attempt", - "PublicDescription": "" - }, - {, - "EventCode": "0x53608a", - "EventName": "PM_L2_RCST_DISP_FAIL_ADDR", - "BriefDescription": "L2 RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", - "PublicDescription": "" - }, - {, - "EventCode": "0x54608c", - "EventName": "PM_L2_RCST_DISP_FAIL_OTHER", - "BriefDescription": "L2 RC store dispatch attempt failed due to other reasons", - "PublicDescription": "" - }, - {, - "EventCode": "0x537088", - "EventName": "PM_L2_RC_ST_DONE", - "BriefDescription": "RC did st to line that was Tx or Sx", - "PublicDescription": "" - }, - {, - "EventCode": "0x63708a", - "EventName": "PM_L2_RTY_LD", - "BriefDescription": "RC retries on PB for any load from core", - "PublicDescription": "" - }, - {, "EventCode": "0x3708a", "EventName": "PM_L2_RTY_ST", "BriefDescription": "RC retries on PB for any store from core", "PublicDescription": "" }, {, - "EventCode": "0x54708c", - "EventName": "PM_L2_SN_M_RD_DONE", - "BriefDescription": "SNP dispatched for a read and was M", - "PublicDescription": "" - }, - {, - "EventCode": "0x54708e", - "EventName": "PM_L2_SN_M_WR_DONE", - "BriefDescription": "SNP dispatched for a write and was M", - "PublicDescription": "" - }, - {, - "EventCode": "0x53708a", - "EventName": "PM_L2_SN_SX_I_DONE", - "BriefDescription": "SNP dispatched and went from Sx or Tx to Ix", - "PublicDescription": "" - }, - {, "EventCode": "0x17080", "EventName": "PM_L2_ST", "BriefDescription": "All successful D-side store dispatches for this thread", "PublicDescription": "" }, {, - "EventCode": "0x44708c", - "EventName": "PM_L2_ST_DISP", - "BriefDescription": "All successful store dispatches", - "PublicDescription": "" - }, - {, - "EventCode": "0x44708e", - "EventName": "PM_L2_ST_HIT", - "BriefDescription": "All successful store dispatches that were L2Hits", - "PublicDescription": "" - }, - {, "EventCode": "0x17082", "EventName": "PM_L2_ST_MISS", "BriefDescription": "All successful D-side store dispatches for this thread that were L2 Miss", "PublicDescription": "" }, {, - "EventCode": "0x636088", - "EventName": "PM_L2_SYS_GUESS_CORRECT", - "BriefDescription": "L2 guess sys and guess was correct (ie data beyond-6chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x63608a", - "EventName": "PM_L2_SYS_GUESS_WRONG", - "BriefDescription": "L2 guess sys and guess was not correct (ie data ^beyond-6chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x617080", - "EventName": "PM_L2_SYS_PUMP", - "BriefDescription": "RC requests that were system pump attempts", - "PublicDescription": "" - }, - {, "EventCode": "0x1e05e", "EventName": "PM_L2_TM_REQ_ABORT", "BriefDescription": "TM abort", @@ -1962,36 +1740,12 @@ "PublicDescription": "" }, {, - "EventCode": "0x23808a", - "EventName": "PM_L3_CINJ", - "BriefDescription": "l3 ci of cache inject", - "PublicDescription": "" - }, - {, - "EventCode": "0x128084", - "EventName": "PM_L3_CI_HIT", - "BriefDescription": "L3 Castins Hit (total count", - "PublicDescription": "" - }, - {, - "EventCode": "0x128086", - "EventName": "PM_L3_CI_MISS", - "BriefDescription": "L3 castins miss (total count", - "PublicDescription": "" - }, - {, "EventCode": "0x819082", "EventName": "PM_L3_CI_USAGE", "BriefDescription": "rotating sample of 16 CI or CO actives", "PublicDescription": "" }, {, - "EventCode": "0x438088", - "EventName": "PM_L3_CO", - "BriefDescription": "l3 castout occurring ( does not include casthrough or log writes (cinj/dmaw)", - "PublicDescription": "" - }, - {, "EventCode": "0x83908b", "EventName": "PM_L3_CO0_ALLOC", "BriefDescription": "lifetime, sample of CO machine 0 valid", @@ -2010,120 +1764,18 @@ "PublicDescription": "" }, {, - "EventCode": "0x238088", - "EventName": "PM_L3_CO_LCO", - "BriefDescription": "Total L3 castouts occurred on LCO", - "PublicDescription": "" - }, - {, "EventCode": "0x28084", "EventName": "PM_L3_CO_MEM", "BriefDescription": "L3 CO to memory OR of port 0 and 1 ( lossy)", "PublicDescription": "" }, {, - "EventCode": "0xb19082", - "EventName": "PM_L3_GRP_GUESS_CORRECT", - "BriefDescription": "Initial scope=group and data from same group (near) (pred successful)", - "PublicDescription": "" - }, - {, - "EventCode": "0xb3908a", - "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH", - "BriefDescription": "Initial scope=group but data from local node. Predition too high", - "PublicDescription": "" - }, - {, - "EventCode": "0xb39088", - "EventName": "PM_L3_GRP_GUESS_WRONG_LOW", - "BriefDescription": "Initial scope=group but data from outside group (far or rem). Prediction too Low", - "PublicDescription": "" - }, - {, - "EventCode": "0x218080", - "EventName": "PM_L3_HIT", - "BriefDescription": "L3 Hits", - "PublicDescription": "" - }, - {, - "EventCode": "0x138088", - "EventName": "PM_L3_L2_CO_HIT", - "BriefDescription": "L2 castout hits", - "PublicDescription": "" - }, - {, - "EventCode": "0x13808a", - "EventName": "PM_L3_L2_CO_MISS", - "BriefDescription": "L2 castout miss", - "PublicDescription": "" - }, - {, - "EventCode": "0x14808c", - "EventName": "PM_L3_LAT_CI_HIT", - "BriefDescription": "L3 Lateral Castins Hit", - "PublicDescription": "" - }, - {, - "EventCode": "0x14808e", - "EventName": "PM_L3_LAT_CI_MISS", - "BriefDescription": "L3 Lateral Castins Miss", - "PublicDescription": "" - }, - {, - "EventCode": "0x228084", - "EventName": "PM_L3_LD_HIT", - "BriefDescription": "L3 demand LD Hits", - "PublicDescription": "" - }, - {, - "EventCode": "0x228086", - "EventName": "PM_L3_LD_MISS", - "BriefDescription": "L3 demand LD Miss", - "PublicDescription": "" - }, - {, "EventCode": "0x1e052", "EventName": "PM_L3_LD_PREF", "BriefDescription": "L3 Load Prefetches", "PublicDescription": "" }, {, - "EventCode": "0xb19080", - "EventName": "PM_L3_LOC_GUESS_CORRECT", - "BriefDescription": "initial scope=node/chip and data from local node (local) (pred successful)", - "PublicDescription": "" - }, - {, - "EventCode": "0xb29086", - "EventName": "PM_L3_LOC_GUESS_WRONG", - "BriefDescription": "Initial scope=node but data from out side local node (near or far or rem). Prediction too Low", - "PublicDescription": "" - }, - {, - "EventCode": "0x218082", - "EventName": "PM_L3_MISS", - "BriefDescription": "L3 Misses", - "PublicDescription": "" - }, - {, - "EventCode": "0x54808c", - "EventName": "PM_L3_P0_CO_L31", - "BriefDescription": "l3 CO to L3.1 (lco) port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x538088", - "EventName": "PM_L3_P0_CO_MEM", - "BriefDescription": "l3 CO to memory port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x929084", - "EventName": "PM_L3_P0_CO_RTY", - "BriefDescription": "L3 CO received retry port 0", - "PublicDescription": "" - }, - {, "EventCode": "0xa29084", "EventName": "PM_L3_P0_GRP_PUMP", "BriefDescription": "L3 pf sent with grp scope port 0", @@ -2148,120 +1800,6 @@ "PublicDescription": "" }, {, - "EventCode": "0xa19080", - "EventName": "PM_L3_P0_NODE_PUMP", - "BriefDescription": "L3 pf sent with nodal scope port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x919080", - "EventName": "PM_L3_P0_PF_RTY", - "BriefDescription": "L3 PF received retry port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x939088", - "EventName": "PM_L3_P0_SN_HIT", - "BriefDescription": "L3 snoop hit port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x118080", - "EventName": "PM_L3_P0_SN_INV", - "BriefDescription": "Port0 snooper detects someone doing a store to a line thats Sx", - "PublicDescription": "" - }, - {, - "EventCode": "0x94908c", - "EventName": "PM_L3_P0_SN_MISS", - "BriefDescription": "L3 snoop miss port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0xa39088", - "EventName": "PM_L3_P0_SYS_PUMP", - "BriefDescription": "L3 pf sent with sys scope port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x54808e", - "EventName": "PM_L3_P1_CO_L31", - "BriefDescription": "l3 CO to L3.1 (lco) port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x53808a", - "EventName": "PM_L3_P1_CO_MEM", - "BriefDescription": "l3 CO to memory port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x929086", - "EventName": "PM_L3_P1_CO_RTY", - "BriefDescription": "L3 CO received retry port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0xa29086", - "EventName": "PM_L3_P1_GRP_PUMP", - "BriefDescription": "L3 pf sent with grp scope port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x528086", - "EventName": "PM_L3_P1_LCO_DATA", - "BriefDescription": "lco sent with data port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x518082", - "EventName": "PM_L3_P1_LCO_NO_DATA", - "BriefDescription": "dataless l3 lco sent port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0xa4908e", - "EventName": "PM_L3_P1_LCO_RTY", - "BriefDescription": "L3 LCO received retry port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0xa19082", - "EventName": "PM_L3_P1_NODE_PUMP", - "BriefDescription": "L3 pf sent with nodal scope port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x919082", - "EventName": "PM_L3_P1_PF_RTY", - "BriefDescription": "L3 PF received retry port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x93908a", - "EventName": "PM_L3_P1_SN_HIT", - "BriefDescription": "L3 snoop hit port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x118082", - "EventName": "PM_L3_P1_SN_INV", - "BriefDescription": "Port1 snooper detects someone doing a store to a line thats Sx", - "PublicDescription": "" - }, - {, - "EventCode": "0x94908e", - "EventName": "PM_L3_P1_SN_MISS", - "BriefDescription": "L3 snoop miss port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0xa3908a", - "EventName": "PM_L3_P1_SYS_PUMP", - "BriefDescription": "L3 pf sent with sys scope port 1", - "PublicDescription": "" - }, - {, "EventCode": "0x84908d", "EventName": "PM_L3_PF0_ALLOC", "BriefDescription": "lifetime, sample of PF machine 0 valid", @@ -2274,12 +1812,6 @@ "PublicDescription": "" }, {, - "EventCode": "0x428084", - "EventName": "PM_L3_PF_HIT_L3", - "BriefDescription": "l3 pf hit in l3", - "PublicDescription": "" - }, - {, "EventCode": "0x18080", "EventName": "PM_L3_PF_MISS_L3", "BriefDescription": "L3 Prefetch missed in L3", @@ -2370,42 +1902,12 @@ "PublicDescription": "" }, {, - "EventCode": "0xb29084", - "EventName": "PM_L3_SYS_GUESS_CORRECT", - "BriefDescription": "Initial scope=system and data from outside group (far or rem)(pred successful)", - "PublicDescription": "" - }, - {, - "EventCode": "0xb4908c", - "EventName": "PM_L3_SYS_GUESS_WRONG", - "BriefDescription": "Initial scope=system but data from local or near. Predction too high", - "PublicDescription": "" - }, - {, - "EventCode": "0x24808e", - "EventName": "PM_L3_TRANS_PF", - "BriefDescription": "L3 Transient prefetch", - "PublicDescription": "" - }, - {, "EventCode": "0x18081", "EventName": "PM_L3_WI0_ALLOC", "BriefDescription": "lifetime, sample of Write Inject machine 0 valid", "PublicDescription": "0.0" }, {, - "EventCode": "0x418080", - "EventName": "PM_L3_WI0_BUSY", - "BriefDescription": "lifetime, sample of Write Inject machine 0 valid", - "PublicDescription": "" - }, - {, - "EventCode": "0x418082", - "EventName": "PM_L3_WI_USAGE", - "BriefDescription": "rotating sample of 8 WI actives", - "PublicDescription": "" - }, - {, "EventCode": "0xc080", "EventName": "PM_LD_REF_L1_LSU0", "BriefDescription": "LS0 L1 D cache load references counted at finish, gated by reject", @@ -3312,12 +2814,6 @@ "PublicDescription": "" }, {, - "EventCode": "0x328084", - "EventName": "PM_NON_TM_RST_SC", - "BriefDescription": "non tm snp rst tm sc", - "PublicDescription": "" - }, - {, "EventCode": "0x2001a", "EventName": "PM_NTCG_ALL_FIN", "BriefDescription": "Cycles after all instructions have finished to group completed", @@ -3420,24 +2916,6 @@ "PublicDescription": "" }, {, - "EventCode": "0x34808e", - "EventName": "PM_RD_CLEARING_SC", - "BriefDescription": "rd clearing sc", - "PublicDescription": "" - }, - {, - "EventCode": "0x34808c", - "EventName": "PM_RD_FORMING_SC", - "BriefDescription": "rd forming sc", - "PublicDescription": "" - }, - {, - "EventCode": "0x428086", - "EventName": "PM_RD_HIT_PF", - "BriefDescription": "rd machine hit l3 pf machine", - "PublicDescription": "" - }, - {, "EventCode": "0x20004", "EventName": "PM_REAL_SRQ_FULL", "BriefDescription": "Out of real srq entries", @@ -3504,18 +2982,6 @@ "PublicDescription": "TLBIE snoopSnoop TLBIE" }, {, - "EventCode": "0x338088", - "EventName": "PM_SNP_TM_HIT_M", - "BriefDescription": "snp tm st hit m mu", - "PublicDescription": "" - }, - {, - "EventCode": "0x33808a", - "EventName": "PM_SNP_TM_HIT_T", - "BriefDescription": "snp tm_st_hit t tn te", - "PublicDescription": "" - }, - {, "EventCode": "0x4608c", "EventName": "PM_SN_USAGE", "BriefDescription": "Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running", @@ -3534,12 +3000,6 @@ "PublicDescription": "STCX executed reported at sent to nest42" }, {, - "EventCode": "0x717080", - "EventName": "PM_ST_CAUSED_FAIL", - "BriefDescription": "Non TM St caused any thread to fail", - "PublicDescription": "" - }, - {, "EventCode": "0x3090", "EventName": "PM_SWAP_CANCEL", "BriefDescription": "SWAP cancel , rtag not available", @@ -3624,18 +3084,6 @@ "PublicDescription": "" }, {, - "EventCode": "0x318082", - "EventName": "PM_TM_CAM_OVERFLOW", - "BriefDescription": "l3 tm cam overflow during L2 co of SC", - "PublicDescription": "" - }, - {, - "EventCode": "0x74708c", - "EventName": "PM_TM_CAP_OVERFLOW", - "BriefDescription": "TM Footprint Capactiy Overflow", - "PublicDescription": "" - }, - {, "EventCode": "0x20ba", "EventName": "PM_TM_END_ALL", "BriefDescription": "Tm any tend", @@ -3690,48 +3138,6 @@ "PublicDescription": "Transactional conflict from LSU, whatever gets reported to texas 42" }, {, - "EventCode": "0x727086", - "EventName": "PM_TM_FAV_CAUSED_FAIL", - "BriefDescription": "TM Load (fav) caused another thread to fail", - "PublicDescription": "" - }, - {, - "EventCode": "0x717082", - "EventName": "PM_TM_LD_CAUSED_FAIL", - "BriefDescription": "Non TM Ld caused any thread to fail", - "PublicDescription": "" - }, - {, - "EventCode": "0x727084", - "EventName": "PM_TM_LD_CONF", - "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)", - "PublicDescription": "" - }, - {, - "EventCode": "0x328086", - "EventName": "PM_TM_RST_SC", - "BriefDescription": "tm snp rst tm sc", - "PublicDescription": "" - }, - {, - "EventCode": "0x318080", - "EventName": "PM_TM_SC_CO", - "BriefDescription": "l3 castout tm Sc line", - "PublicDescription": "" - }, - {, - "EventCode": "0x73708a", - "EventName": "PM_TM_ST_CAUSED_FAIL", - "BriefDescription": "TM Store (fav or non-fav) caused another thread to fail", - "PublicDescription": "" - }, - {, - "EventCode": "0x737088", - "EventName": "PM_TM_ST_CONF", - "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)", - "PublicDescription": "" - }, - {, "EventCode": "0x20bc", "EventName": "PM_TM_TBEGIN", "BriefDescription": "Tm nested tbegin", diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json b/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json new file mode 100644 index 000000000000..93ddfd8053ca --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json @@ -0,0 +1,12 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 BTB Correction." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 BTB Correction." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json new file mode 100644 index 000000000000..fad4af9142cb --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json @@ -0,0 +1,287 @@ +[ + { + "EventName": "ic_fw32", + "EventCode": "0x80", + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." + }, + { + "EventName": "ic_fw32_miss", + "EventCode": "0x81", + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." + }, + { + "EventName": "ic_cache_fill_l2", + "EventCode": "0x82", + "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." + }, + { + "EventName": "ic_cache_fill_sys", + "EventCode": "0x83", + "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." + }, + { + "EventName": "bp_l1_tlb_miss_l2_hit", + "EventCode": "0x84", + "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." + }, + { + "EventName": "bp_l1_tlb_miss_l2_miss", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs." + }, + { + "EventName": "bp_snp_re_sync", + "EventCode": "0x86", + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." + }, + { + "EventName": "ic_fetch_stall.ic_stall_any", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "UMask": "0x4" + }, + { + "EventName": "ic_fetch_stall.ic_stall_dq_empty", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "UMask": "0x2" + }, + { + "EventName": "ic_fetch_stall.ic_stall_back_pressure", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "UMask": "0x1" + }, + { + "EventName": "ic_cache_inval.l2_invalidating_probe", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).", + "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).", + "UMask": "0x2" + }, + { + "EventName": "ic_cache_inval.fill_invalidated", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to overwriting fill response.", + "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.", + "UMask": "0x1" + }, + { + "EventName": "bp_tlb_rel", + "EventCode": "0x99", + "BriefDescription": "The number of ITLB reload requests." + }, + { + "EventName": "l2_request_g1.rd_blk_l", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g1.rd_blk_x", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g1.ls_rd_blk_c_s", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g1.cacheable_ic_read", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g1.change_to_x", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g1.prefetch_l2", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g1.l2_hw_pf", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g1.other_requests", + "EventCode": "0x60", + "BriefDescription": "Events covered by l2_request_g2.", + "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.", + "UMask": "0x1" + }, + { + "EventName": "l2_request_g2.group1", + "EventCode": "0x61", + "BriefDescription": "All Group 1 commands not in unit0.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g2.ls_rd_sized", + "EventCode": "0x61", + "BriefDescription": "RdSized, RdSized32, RdSized64.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g2.ls_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g2.ic_rd_sized", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g2.ic_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g2.smc_inval", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g2.bus_locks_originator", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g2.bus_locks_responses", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x1" + }, + { + "EventName": "l2_latency.l2_cycles_waiting_on_fills", + "EventCode": "0x62", + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "UMask": "0x1" + }, + { + "EventName": "l2_wcb_req.wcb_write", + "EventCode": "0x63", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", + "BriefDescription": "LS to L2 WCB write requests.", + "UMask": "0x40" + }, + { + "EventName": "l2_wcb_req.wcb_close", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB close requests.", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "UMask": "0x20" + }, + { + "EventName": "l2_wcb_req.zero_byte_store", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB zero byte store requests.", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "UMask": "0x4" + }, + { + "EventName": "l2_wcb_req.cl_zero", + "EventCode": "0x63", + "PublicDescription": "LS to L2 WCB cache line zeroing requests.", + "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "UMask": "0x1" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", + "EventCode": "0x64", + "BriefDescription": "LS ReadBlock C/S Hit.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.", + "UMask": "0x80" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", + "EventCode": "0x64", + "BriefDescription": "LS Read Block L Hit X.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.", + "UMask": "0x40" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", + "EventCode": "0x64", + "BriefDescription": "LsRdBlkL Hit Shared.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.", + "UMask": "0x20" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_x", + "EventCode": "0x64", + "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_c", + "EventCode": "0x64", + "BriefDescription": "LS Read Block C S L X Change to X Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.", + "UMask": "0x8" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_x", + "EventCode": "0x64", + "BriefDescription": "IC Fill Hit Exclusive Stale.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.", + "UMask": "0x4" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_s", + "EventCode": "0x64", + "BriefDescription": "IC Fill Hit Shared.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.", + "UMask": "0x2" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_miss", + "EventCode": "0x64", + "BriefDescription": "IC Fill Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.", + "UMask": "0x1" + }, + { + "EventName": "l2_fill_pending.l2_fill_busy", + "EventCode": "0x6d", + "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json new file mode 100644 index 000000000000..7b285b0a7f35 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json @@ -0,0 +1,134 @@ +[ + { + "EventName": "ex_ret_instr", + "EventCode": "0xc0", + "BriefDescription": "Retired Instructions." + }, + { + "EventName": "ex_ret_cops", + "EventCode": "0xc1", + "BriefDescription": "Retired Uops.", + "PublicDescription": "The number of uOps retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 4." + }, + { + "EventName": "ex_ret_brn", + "EventCode": "0xc2", + "BriefDescription": "[Retired Branch Instructions.", + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_misp", + "EventCode": "0xc3", + "BriefDescription": "Retired Branch Instructions Mispredicted.", + "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)." + }, + { + "EventName": "ex_ret_brn_tkn", + "EventCode": "0xc4", + "BriefDescription": "Retired Taken Branch Instructions.", + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_tkn_misp", + "EventCode": "0xc5", + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." + }, + { + "EventName": "ex_ret_brn_far", + "EventCode": "0xc6", + "BriefDescription": "Retired Far Control Transfers.", + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." + }, + { + "EventName": "ex_ret_brn_resync", + "EventCode": "0xc7", + "BriefDescription": "Retired Branch Resyncs.", + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." + }, + { + "EventName": "ex_ret_near_ret", + "EventCode": "0xc8", + "BriefDescription": "Retired Near Returns.", + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." + }, + { + "EventName": "ex_ret_near_ret_mispred", + "EventCode": "0xc9", + "BriefDescription": "Retired Near Returns Mispredicted.", + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." + }, + { + "EventName": "ex_ret_brn_ind_misp", + "EventCode": "0xca", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", + "PublicDescription": "Retired Indirect Branch Instructions Mispredicted." + }, + { + "EventName": "ex_ret_mmx_fp_instr.sse_instr", + "EventCode": "0xcb", + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "UMask": "0x4" + }, + { + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", + "EventCode": "0xcb", + "BriefDescription": "MMX instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", + "UMask": "0x2" + }, + { + "EventName": "ex_ret_mmx_fp_instr.x87_instr", + "EventCode": "0xcb", + "BriefDescription": "x87 instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_cond", + "EventCode": "0xd1", + "BriefDescription": "Retired Conditional Branch Instructions." + }, + { + "EventName": "ex_ret_cond_misp", + "EventCode": "0xd2", + "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." + }, + { + "EventName": "ex_div_busy", + "EventCode": "0xd3", + "BriefDescription": "Div Cycles Busy count." + }, + { + "EventName": "ex_div_count", + "EventCode": "0xd4", + "BriefDescription": "Div Op Count." + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", + "EventCode": "0x1cf", + "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "UMask": "0x4" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", + "EventCode": "0x1cf", + "BriefDescription": "Number of Ops tagged by IBS that retired.", + "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "UMask": "0x2" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", + "EventCode": "0x1cf", + "BriefDescription": "Number of Ops tagged by IBS.", + "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_fus_brnch_inst", + "EventCode": "0x1d0", + "BriefDescription": "The number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json b/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json new file mode 100644 index 000000000000..ea4711983d1d --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json @@ -0,0 +1,168 @@ +[ + { + "EventName": "fpu_pipe_assignment.dual", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.", + "UMask": "0xf0" + }, + { + "EventName": "fpu_pipe_assignment.total", + "EventCode": "0x00", + "BriefDescription": "Total number uOps.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.", + "UMask": "0xf" + }, + { + "EventName": "fp_sched_empty", + "EventCode": "0x01", + "BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler." + }, + { + "EventName": "fp_retx87_fp_ops.all", + "EventCode": "0x02", + "BriefDescription": "All Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", + "UMask": "0x7" + }, + { + "EventName": "fp_retx87_fp_ops.div_sqr_r_ops", + "EventCode": "0x02", + "BriefDescription": "Divide and square root Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.", + "UMask": "0x4" + }, + { + "EventName": "fp_retx87_fp_ops.mul_ops", + "EventCode": "0x02", + "BriefDescription": "Multiply Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.", + "UMask": "0x2" + }, + { + "EventName": "fp_retx87_fp_ops.add_sub_ops", + "EventCode": "0x02", + "BriefDescription": "Add/subtract Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.", + "UMask": "0x1" + }, + { + "EventName": "fp_ret_sse_avx_ops.all", + "EventCode": "0x03", + "BriefDescription": "All FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0xff" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_mult_add_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "UMask": "0x80" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_div_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision divide/square root FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision divide/square root FLOPS.", + "UMask": "0x40" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_mult_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision multiply FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply FLOPS.", + "UMask": "0x20" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision add/subtract FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision add/subtract FLOPS.", + "UMask": "0x10" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_mult_add_flops", + "EventCode": "0x03", + "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "UMask": "0x8" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_div_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision divide/square root FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.", + "UMask": "0x4" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_mult_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision multiply FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.", + "UMask": "0x2" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision add/subtract FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.", + "UMask": "0x1" + }, + { + "EventName": "fp_num_mov_elim_scal_op.optimized", + "EventCode": "0x04", + "BriefDescription": "Number of Scalar Ops optimized.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.", + "UMask": "0x8" + }, + { + "EventName": "fp_num_mov_elim_scal_op.opt_potential", + "EventCode": "0x04", + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).", + "UMask": "0x4" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops eliminated.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.", + "UMask": "0x2" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.", + "UMask": "0x1" + }, + { + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", + "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", + "UMask": "0x8" + }, + { + "EventName": "fp_retired_ser_ops.x87_bot_ret", + "EventCode": "0x05", + "BriefDescription": "x87 bottom-executing uOps retired.", + "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.", + "UMask": "0x4" + }, + { + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "UMask": "0x2" + }, + { + "EventName": "fp_retired_ser_ops.sse_bot_ret", + "EventCode": "0x05", + "BriefDescription": "SSE bottom-executing uOps retired.", + "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json b/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json new file mode 100644 index 000000000000..fa2d60d4def0 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json @@ -0,0 +1,162 @@ +[ + { + "EventName": "ls_locks.bus_lock", + "EventCode": "0x25", + "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", + "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", + "UMask": "0x1" + }, + { + "EventName": "ls_dispatch.ld_st_dispatch", + "EventCode": "0x29", + "BriefDescription": "Load-op-Stores.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", + "UMask": "0x4" + }, + { + "EventName": "ls_dispatch.store_dispatch", + "EventCode": "0x29", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x2" + }, + { + "EventName": "ls_dispatch.ld_dispatch", + "EventCode": "0x29", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x1" + }, + { + "EventName": "ls_stlf", + "EventCode": "0x35", + "BriefDescription": "Number of STLF hits." + }, + { + "EventName": "ls_dc_accesses", + "EventCode": "0x40", + "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." + }, + { + "EventName": "ls_l1_d_tlb_miss.all", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss or Reload off all sizes.", + "PublicDescription": "L1 DTLB Miss or Reload off all sizes.", + "UMask": "0xff" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 1G size.", + "PublicDescription": "L1 DTLB Miss of a page of 1G size.", + "UMask": "0x80" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 2M size.", + "PublicDescription": "L1 DTLB Miss of a page of 2M size.", + "UMask": "0x40" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 32K size.", + "PublicDescription": "L1 DTLB Miss of a page of 32K size.", + "UMask": "0x20" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 4K size.", + "PublicDescription": "L1 DTLB Miss of a page of 4K size.", + "UMask": "0x10" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 1G size.", + "PublicDescription": "L1 DTLB Reload of a page of 1G size.", + "UMask": "0x8" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 2M size.", + "PublicDescription": "L1 DTLB Reload of a page of 2M size.", + "UMask": "0x4" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 32K size.", + "PublicDescription": "L1 DTLB Reload of a page of 32K size.", + "UMask": "0x2" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 4K size.", + "PublicDescription": "L1 DTLB Reload of a page of 4K size.", + "UMask": "0x1" + }, + { + "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside", + "EventCode": "0x46", + "BriefDescription": "Tablewalker allocation.", + "PublicDescription": "Tablewalker allocation.", + "UMask": "0xc" + }, + { + "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside", + "EventCode": "0x46", + "BriefDescription": "Tablewalker allocation.", + "PublicDescription": "Tablewalker allocation.", + "UMask": "0x3" + }, + { + "EventName": "ls_misal_accesses", + "EventCode": "0x47", + "BriefDescription": "Misaligned loads." + }, + { + "EventName": "ls_pref_instr_disp.prefetch_nta", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", + "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", + "UMask": "0x4" + }, + { + "EventName": "ls_pref_instr_disp.store_prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", + "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", + "UMask": "0x2" + }, + { + "EventName": "ls_pref_instr_disp.load_prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.", + "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", + "UMask": "0x1" + }, + { + "EventName": "ls_inef_sw_pref.mab_mch_cnt", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "UMask": "0x2" + }, + { + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "UMask": "0x1" + }, + { + "EventName": "ls_not_halted_cyc", + "EventCode": "0x76", + "BriefDescription": "Cycles not in Halt." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json b/tools/perf/pmu-events/arch/x86/amdfam17h/other.json new file mode 100644 index 000000000000..b26a00d05a2e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/other.json @@ -0,0 +1,65 @@ +[ + { + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC to IC mode switch.", + "PublicDescription": "OC Mode Switch. OC to IC mode switch.", + "UMask": "0x2" + }, + { + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "IC to OC mode switch.", + "PublicDescription": "OC Mode Switch. IC to OC mode switch.", + "UMask": "0x1" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", + "EventCode": "0xaf", + "BriefDescription": "RETIRE Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", + "EventCode": "0xaf", + "BriefDescription": "AGSQ Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALU tokens total unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "UMask": "0x8" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 3 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", + "UMask": "0x4" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 2 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 1 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index e05c2c8458fc..d6984a3017e0 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -33,3 +33,4 @@ GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-2F,v2,westmereex,core GenuineIntel-6-55-[01234],v1,skylakex,core GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core +AuthenticAMD-23-[[:xdigit:]]+,v1,amdfam17h,core diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 390a351d15ea..c3eae1d77d36 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -10,6 +10,8 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. +from __future__ import print_function + import os import sys import struct @@ -199,6 +201,18 @@ import datetime from PySide.QtSql import * +if sys.version_info < (3, 0): + def toserverstr(str): + return str + def toclientstr(str): + return str +else: + # Assume UTF-8 server_encoding and client_encoding + def toserverstr(str): + return bytes(str, "UTF_8") + def toclientstr(str): + return bytes(str, "UTF_8") + # Need to access PostgreSQL C library directly to use COPY FROM STDIN from ctypes import * libpq = CDLL("libpq.so.5") @@ -234,12 +248,17 @@ perf_db_export_mode = True perf_db_export_calls = False perf_db_export_callchains = False +def printerr(*args, **kw_args): + print(*args, file=sys.stderr, **kw_args) + +def printdate(*args, **kw_args): + print(datetime.datetime.today(), *args, sep=' ', **kw_args) def usage(): - print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]" - print >> sys.stderr, "where: columns 'all' or 'branches'" - print >> sys.stderr, " calls 'calls' => create calls and call_paths table" - print >> sys.stderr, " callchains 'callchains' => create call_paths table" + printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]") + printerr("where: columns 'all' or 'branches'") + printerr(" calls 'calls' => create calls and call_paths table") + printerr(" callchains 'callchains' => create call_paths table") raise Exception("Too few arguments") if (len(sys.argv) < 2): @@ -273,7 +292,7 @@ def do_query(q, s): return raise Exception("Query failed: " + q.lastError().text()) -print datetime.datetime.today(), "Creating database..." +printdate("Creating database...") db = QSqlDatabase.addDatabase('QPSQL') query = QSqlQuery(db) @@ -506,12 +525,12 @@ do_query(query, 'CREATE VIEW samples_view AS ' ' FROM samples') -file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0) -file_trailer = "\377\377" +file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0) +file_trailer = b"\377\377" def open_output_file(file_name): path_name = output_dir_name + "/" + file_name - file = open(path_name, "w+") + file = open(path_name, "wb+") file.write(file_header) return file @@ -526,13 +545,13 @@ def copy_output_file_direct(file, table_name): # Use COPY FROM STDIN because security may prevent postgres from accessing the files directly def copy_output_file(file, table_name): - conn = PQconnectdb("dbname = " + dbname) + conn = PQconnectdb(toclientstr("dbname = " + dbname)) if (PQstatus(conn)): raise Exception("COPY FROM STDIN PQconnectdb failed") file.write(file_trailer) file.seek(0) sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')" - res = PQexec(conn, sql) + res = PQexec(conn, toclientstr(sql)) if (PQresultStatus(res) != 4): raise Exception("COPY FROM STDIN PQexec failed") data = file.read(65536) @@ -566,7 +585,7 @@ if perf_db_export_calls: call_file = open_output_file("call_table.bin") def trace_begin(): - print datetime.datetime.today(), "Writing to intermediate files..." + printdate("Writing to intermediate files...") # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs evsel_table(0, "unknown") machine_table(0, 0, "unknown") @@ -582,7 +601,7 @@ def trace_begin(): unhandled_count = 0 def trace_end(): - print datetime.datetime.today(), "Copying to database..." + printdate("Copying to database...") copy_output_file(evsel_file, "selected_events") copy_output_file(machine_file, "machines") copy_output_file(thread_file, "threads") @@ -597,7 +616,7 @@ def trace_end(): if perf_db_export_calls: copy_output_file(call_file, "calls") - print datetime.datetime.today(), "Removing intermediate files..." + printdate("Removing intermediate files...") remove_output_file(evsel_file) remove_output_file(machine_file) remove_output_file(thread_file) @@ -612,7 +631,7 @@ def trace_end(): if perf_db_export_calls: remove_output_file(call_file) os.rmdir(output_dir_name) - print datetime.datetime.today(), "Adding primary keys" + printdate("Adding primary keys") do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE machines ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE threads ADD PRIMARY KEY (id)') @@ -627,7 +646,7 @@ def trace_end(): if perf_db_export_calls: do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)') - print datetime.datetime.today(), "Adding foreign keys" + printdate("Adding foreign keys") do_query(query, 'ALTER TABLE threads ' 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)') @@ -663,8 +682,8 @@ def trace_end(): do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') if (unhandled_count): - print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events" - print datetime.datetime.today(), "Done" + printdate("Warning: ", unhandled_count, " unhandled events") + printdate("Done") def trace_unhandled(event_name, context, event_fields_dict): global unhandled_count @@ -674,12 +693,14 @@ def sched__sched_switch(*x): pass def evsel_table(evsel_id, evsel_name, *x): + evsel_name = toserverstr(evsel_name) n = len(evsel_name) fmt = "!hiqi" + str(n) + "s" value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name) evsel_file.write(value) def machine_table(machine_id, pid, root_dir, *x): + root_dir = toserverstr(root_dir) n = len(root_dir) fmt = "!hiqiii" + str(n) + "s" value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir) @@ -690,6 +711,7 @@ def thread_table(thread_id, machine_id, process_id, pid, tid, *x): thread_file.write(value) def comm_table(comm_id, comm_str, *x): + comm_str = toserverstr(comm_str) n = len(comm_str) fmt = "!hiqi" + str(n) + "s" value = struct.pack(fmt, 2, 8, comm_id, n, comm_str) @@ -701,6 +723,9 @@ def comm_thread_table(comm_thread_id, comm_id, thread_id, *x): comm_thread_file.write(value) def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x): + short_name = toserverstr(short_name) + long_name = toserverstr(long_name) + build_id = toserverstr(build_id) n1 = len(short_name) n2 = len(long_name) n3 = len(build_id) @@ -709,12 +734,14 @@ def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x): dso_file.write(value) def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x): + symbol_name = toserverstr(symbol_name) n = len(symbol_name) fmt = "!hiqiqiqiqiii" + str(n) + "s" value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name) symbol_file.write(value) def branch_type_table(branch_type, name, *x): + name = toserverstr(name) n = len(name) fmt = "!hiii" + str(n) + "s" value = struct.pack(fmt, 2, 4, branch_type, n, name) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index eb63e6c7107f..bf271fbc3a88 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -10,6 +10,8 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. +from __future__ import print_function + import os import sys import struct @@ -60,11 +62,17 @@ perf_db_export_mode = True perf_db_export_calls = False perf_db_export_callchains = False +def printerr(*args, **keyword_args): + print(*args, file=sys.stderr, **keyword_args) + +def printdate(*args, **kw_args): + print(datetime.datetime.today(), *args, sep=' ', **kw_args) + def usage(): - print >> sys.stderr, "Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]" - print >> sys.stderr, "where: columns 'all' or 'branches'" - print >> sys.stderr, " calls 'calls' => create calls and call_paths table" - print >> sys.stderr, " callchains 'callchains' => create call_paths table" + printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]"); + printerr("where: columns 'all' or 'branches'"); + printerr(" calls 'calls' => create calls and call_paths table"); + printerr(" callchains 'callchains' => create call_paths table"); raise Exception("Too few arguments") if (len(sys.argv) < 2): @@ -100,7 +108,7 @@ def do_query_(q): return raise Exception("Query failed: " + q.lastError().text()) -print datetime.datetime.today(), "Creating database..." +printdate("Creating database ...") db_exists = False try: @@ -323,7 +331,7 @@ if perf_db_export_calls: 'return_id,' 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 'parent_call_path_id,' - 'parent_id' + 'calls.parent_id' ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') do_query(query, 'CREATE VIEW samples_view AS ' @@ -378,7 +386,7 @@ if perf_db_export_calls: call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") def trace_begin(): - print datetime.datetime.today(), "Writing records..." + printdate("Writing records...") do_query(query, 'BEGIN TRANSACTION') # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs evsel_table(0, "unknown") @@ -397,14 +405,14 @@ unhandled_count = 0 def trace_end(): do_query(query, 'END TRANSACTION') - print datetime.datetime.today(), "Adding indexes" + printdate("Adding indexes") if perf_db_export_calls: do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') if (unhandled_count): - print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events" - print datetime.datetime.today(), "Done" + printdate("Warning: ", unhandled_count, " unhandled events") + printdate("Done") def trace_unhandled(event_name, context, event_fields_dict): global unhandled_count diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index afec9479ca7f..74ef92f1d19a 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -88,20 +88,39 @@ # 7fab593ea956 48 89 15 3b 13 22 00 movq %rdx, 0x22133b(%rip) # 8107675243232 2 ls 22011 22011 hardware interrupt No 7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) +from __future__ import print_function + import sys import weakref import threading import string -import cPickle +try: + # Python2 + import cPickle as pickle + # size of pickled integer big enough for record size + glb_nsz = 8 +except ImportError: + import pickle + glb_nsz = 16 import re import os from PySide.QtCore import * from PySide.QtGui import * from PySide.QtSql import * +pyside_version_1 = True from decimal import * from ctypes import * from multiprocessing import Process, Array, Value, Event +# xrange is range in Python3 +try: + xrange +except NameError: + xrange = range + +def printerr(*args, **keyword_args): + print(*args, file=sys.stderr, **keyword_args) + # Data formatting helpers def tohex(ip): @@ -1004,10 +1023,6 @@ class ChildDataItemFinder(): glb_chunk_sz = 10000 -# size of pickled integer big enough for record size - -glb_nsz = 8 - # Background process for SQL data fetcher class SQLFetcherProcess(): @@ -1066,7 +1081,7 @@ class SQLFetcherProcess(): return True if space >= glb_nsz: # Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer - nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL) + nd = pickle.dumps(0, pickle.HIGHEST_PROTOCOL) self.buffer[self.local_head : self.local_head + len(nd)] = nd self.local_head = 0 if self.local_tail - self.local_head > sz: @@ -1084,9 +1099,9 @@ class SQLFetcherProcess(): self.wait_event.wait() def AddToBuffer(self, obj): - d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL) + d = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) n = len(d) - nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL) + nd = pickle.dumps(n, pickle.HIGHEST_PROTOCOL) sz = n + glb_nsz self.WaitForSpace(sz) pos = self.local_head @@ -1198,12 +1213,12 @@ class SQLFetcher(QObject): pos = self.local_tail if len(self.buffer) - pos < glb_nsz: pos = 0 - n = cPickle.loads(self.buffer[pos : pos + glb_nsz]) + n = pickle.loads(self.buffer[pos : pos + glb_nsz]) if n == 0: pos = 0 - n = cPickle.loads(self.buffer[0 : glb_nsz]) + n = pickle.loads(self.buffer[0 : glb_nsz]) pos += glb_nsz - obj = cPickle.loads(self.buffer[pos : pos + n]) + obj = pickle.loads(self.buffer[pos : pos + n]) self.local_tail = pos + n return obj @@ -1512,6 +1527,19 @@ def BranchDataPrep(query): " (" + dsoname(query.value(15)) + ")") return data +def BranchDataPrepWA(query): + data = [] + data.append(query.value(0)) + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string + data.append("{:>19}".format(query.value(1))) + for i in xrange(2, 8): + data.append(query.value(i)) + data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + + " (" + dsoname(query.value(11)) + ")" + " -> " + + tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + + " (" + dsoname(query.value(15)) + ")") + return data + # Branch data model class BranchModel(TreeModel): @@ -1539,7 +1567,11 @@ class BranchModel(TreeModel): " AND evsel_id = " + str(self.event_id) + " ORDER BY samples.id" " LIMIT " + str(glb_chunk_sz)) - self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample) + if pyside_version_1 and sys.version_info[0] == 3: + prep = BranchDataPrepWA + else: + prep = BranchDataPrep + self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample) self.fetcher.done.connect(self.Update) self.fetcher.Fetch(glb_chunk_sz) @@ -2065,14 +2097,6 @@ def IsSelectable(db, table, sql = ""): return False return True -# SQL data preparation - -def SQLTableDataPrep(query, count): - data = [] - for i in xrange(count): - data.append(query.value(i)) - return data - # SQL table data model item class SQLTableItem(): @@ -2096,7 +2120,7 @@ class SQLTableModel(TableModel): self.more = True self.populated = 0 self.column_headers = column_headers - self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): SQLTableDataPrep(x, y), self.AddSample) + self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): self.SQLTableDataPrep(x, y), self.AddSample) self.fetcher.done.connect(self.Update) self.fetcher.Fetch(glb_chunk_sz) @@ -2140,6 +2164,12 @@ class SQLTableModel(TableModel): def columnHeader(self, column): return self.column_headers[column] + def SQLTableDataPrep(self, query, count): + data = [] + for i in xrange(count): + data.append(query.value(i)) + return data + # SQL automatic table data model class SQLAutoTableModel(SQLTableModel): @@ -2168,8 +2198,32 @@ class SQLAutoTableModel(SQLTableModel): QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'") while query.next(): column_headers.append(query.value(0)) + if pyside_version_1 and sys.version_info[0] == 3: + if table_name == "samples_view": + self.SQLTableDataPrep = self.samples_view_DataPrep + if table_name == "samples": + self.SQLTableDataPrep = self.samples_DataPrep super(SQLAutoTableModel, self).__init__(glb, sql, column_headers, parent) + def samples_view_DataPrep(self, query, count): + data = [] + data.append(query.value(0)) + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string + data.append("{:>19}".format(query.value(1))) + for i in xrange(2, count): + data.append(query.value(i)) + return data + + def samples_DataPrep(self, query, count): + data = [] + for i in xrange(9): + data.append(query.value(i)) + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string + data.append("{:>19}".format(query.value(9))) + for i in xrange(10, count): + data.append(query.value(i)) + return data + # Base class for custom ResizeColumnsToContents class ResizeColumnsToContentsBase(QObject): @@ -2854,9 +2908,13 @@ class LibXED(): ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0) if not ok: return 0, "" + if sys.version_info[0] == 2: + result = inst.buffer.value + else: + result = inst.buffer.value.decode() # Return instruction length and the disassembled instruction text # For now, assume the length is in byte 166 - return inst.xedd[166], inst.buffer.value + return inst.xedd[166], result def TryOpen(file_name): try: @@ -2872,9 +2930,14 @@ def Is64Bit(f): header = f.read(7) f.seek(pos) magic = header[0:4] - eclass = ord(header[4]) - encoding = ord(header[5]) - version = ord(header[6]) + if sys.version_info[0] == 2: + eclass = ord(header[4]) + encoding = ord(header[5]) + version = ord(header[6]) + else: + eclass = header[4] + encoding = header[5] + version = header[6] if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1: result = True if eclass == 2 else False return result @@ -2973,7 +3036,7 @@ class DBRef(): def Main(): if (len(sys.argv) < 2): - print >> sys.stderr, "Usage is: exported-sql-viewer.py {<database name> | --help-only}" + printerr("Usage is: exported-sql-viewer.py {<database name> | --help-only}"); raise Exception("Too few arguments") dbname = sys.argv[1] @@ -2986,8 +3049,8 @@ def Main(): is_sqlite3 = False try: - f = open(dbname) - if f.read(15) == "SQLite format 3": + f = open(dbname, "rb") + if f.read(15) == b'SQLite format 3': is_sqlite3 = True f.close() except: diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/attr/test-record-C0 index cb0a3138fa54..93818054ae20 100644 --- a/tools/perf/tests/attr/test-record-C0 +++ b/tools/perf/tests/attr/test-record-C0 @@ -1,6 +1,6 @@ [config] command = record -args = -C 0 kill >/dev/null 2>&1 +args = --no-bpf-event -C 0 kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-basic b/tools/perf/tests/attr/test-record-basic index 85a23cf35ba1..b0ca42a5ecc9 100644 --- a/tools/perf/tests/attr/test-record-basic +++ b/tools/perf/tests/attr/test-record-basic @@ -1,6 +1,6 @@ [config] command = record -args = kill >/dev/null 2>&1 +args = --no-bpf-event kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-any b/tools/perf/tests/attr/test-record-branch-any index 81f839e2fad0..1a99b3ce6b89 100644 --- a/tools/perf/tests/attr/test-record-branch-any +++ b/tools/perf/tests/attr/test-record-branch-any @@ -1,6 +1,6 @@ [config] command = record -args = -b kill >/dev/null 2>&1 +args = --no-bpf-event -b kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-any b/tools/perf/tests/attr/test-record-branch-filter-any index 357421f4dfce..709768b508c6 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any +++ b/tools/perf/tests/attr/test-record-branch-filter-any @@ -1,6 +1,6 @@ [config] command = record -args = -j any kill >/dev/null 2>&1 +args = --no-bpf-event -j any kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_call b/tools/perf/tests/attr/test-record-branch-filter-any_call index dbc55f2ab845..f943221f7825 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_call +++ b/tools/perf/tests/attr/test-record-branch-filter-any_call @@ -1,6 +1,6 @@ [config] command = record -args = -j any_call kill >/dev/null 2>&1 +args = --no-bpf-event -j any_call kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_ret b/tools/perf/tests/attr/test-record-branch-filter-any_ret index a0824ff8e131..fd4f5b4154a9 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_ret +++ b/tools/perf/tests/attr/test-record-branch-filter-any_ret @@ -1,6 +1,6 @@ [config] command = record -args = -j any_ret kill >/dev/null 2>&1 +args = --no-bpf-event -j any_ret kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-hv b/tools/perf/tests/attr/test-record-branch-filter-hv index f34d6f120181..4e52d685ebe1 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-hv +++ b/tools/perf/tests/attr/test-record-branch-filter-hv @@ -1,6 +1,6 @@ [config] command = record -args = -j hv kill >/dev/null 2>&1 +args = --no-bpf-event -j hv kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-ind_call b/tools/perf/tests/attr/test-record-branch-filter-ind_call index b86a35232248..e08c6ab3796e 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-ind_call +++ b/tools/perf/tests/attr/test-record-branch-filter-ind_call @@ -1,6 +1,6 @@ [config] command = record -args = -j ind_call kill >/dev/null 2>&1 +args = --no-bpf-event -j ind_call kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-k b/tools/perf/tests/attr/test-record-branch-filter-k index d3fbc5e1858a..b4b98f84fc2f 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-k +++ b/tools/perf/tests/attr/test-record-branch-filter-k @@ -1,6 +1,6 @@ [config] command = record -args = -j k kill >/dev/null 2>&1 +args = --no-bpf-event -j k kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-u b/tools/perf/tests/attr/test-record-branch-filter-u index a318f0dda173..fb9610edbb0d 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-u +++ b/tools/perf/tests/attr/test-record-branch-filter-u @@ -1,6 +1,6 @@ [config] command = record -args = -j u kill >/dev/null 2>&1 +args = --no-bpf-event -j u kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-count b/tools/perf/tests/attr/test-record-count index 34f6cc577263..5e9b9019d786 100644 --- a/tools/perf/tests/attr/test-record-count +++ b/tools/perf/tests/attr/test-record-count @@ -1,6 +1,6 @@ [config] command = record -args = -c 123 kill >/dev/null 2>&1 +args = --no-bpf-event -c 123 kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/attr/test-record-data index a9cf2233b0ce..a99bb13149c2 100644 --- a/tools/perf/tests/attr/test-record-data +++ b/tools/perf/tests/attr/test-record-data @@ -1,6 +1,6 @@ [config] command = record -args = -d kill >/dev/null 2>&1 +args = --no-bpf-event -d kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-freq b/tools/perf/tests/attr/test-record-freq index bf4cb459f0d5..89e29f6b2ae0 100644 --- a/tools/perf/tests/attr/test-record-freq +++ b/tools/perf/tests/attr/test-record-freq @@ -1,6 +1,6 @@ [config] command = record -args = -F 100 kill >/dev/null 2>&1 +args = --no-bpf-event -F 100 kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default index 0b216e69760c..5d8234d50845 100644 --- a/tools/perf/tests/attr/test-record-graph-default +++ b/tools/perf/tests/attr/test-record-graph-default @@ -1,6 +1,6 @@ [config] command = record -args = -g kill >/dev/null 2>&1 +args = --no-bpf-event -g kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/attr/test-record-graph-dwarf index da2fa73bd0a2..ae92061d611d 100644 --- a/tools/perf/tests/attr/test-record-graph-dwarf +++ b/tools/perf/tests/attr/test-record-graph-dwarf @@ -1,6 +1,6 @@ [config] command = record -args = --call-graph dwarf -- kill >/dev/null 2>&1 +args = --no-bpf-event --call-graph dwarf -- kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp index 625d190bb798..5630521c0b0f 100644 --- a/tools/perf/tests/attr/test-record-graph-fp +++ b/tools/perf/tests/attr/test-record-graph-fp @@ -1,6 +1,6 @@ [config] command = record -args = --call-graph fp kill >/dev/null 2>&1 +args = --no-bpf-event --call-graph fp kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group index 618ba1c17474..14ee60fd3f41 100644 --- a/tools/perf/tests/attr/test-record-group +++ b/tools/perf/tests/attr/test-record-group @@ -1,6 +1,6 @@ [config] command = record -args = --group -e cycles,instructions kill >/dev/null 2>&1 +args = --no-bpf-event --group -e cycles,instructions kill >/dev/null 2>&1 ret = 1 [event-1:base-record] diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index f0729c454f16..300b9f7e6d69 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -1,6 +1,6 @@ [config] command = record -args = -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 +args = --no-bpf-event -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 ret = 1 [event-1:base-record] diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1 index 48e8bd12fe46..3ffe246e0228 100644 --- a/tools/perf/tests/attr/test-record-group1 +++ b/tools/perf/tests/attr/test-record-group1 @@ -1,6 +1,6 @@ [config] command = record -args = -e '{cycles,instructions}' kill >/dev/null 2>&1 +args = --no-bpf-event -e '{cycles,instructions}' kill >/dev/null 2>&1 ret = 1 [event-1:base-record] diff --git a/tools/perf/tests/attr/test-record-no-buffering b/tools/perf/tests/attr/test-record-no-buffering index aa3956d8fe20..583dcbb078ba 100644 --- a/tools/perf/tests/attr/test-record-no-buffering +++ b/tools/perf/tests/attr/test-record-no-buffering @@ -1,6 +1,6 @@ [config] command = record -args = --no-buffering kill >/dev/null 2>&1 +args = --no-bpf-event --no-buffering kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/attr/test-record-no-inherit index 560943decb87..15d1dc162e1c 100644 --- a/tools/perf/tests/attr/test-record-no-inherit +++ b/tools/perf/tests/attr/test-record-no-inherit @@ -1,6 +1,6 @@ [config] command = record -args = -i kill >/dev/null 2>&1 +args = --no-bpf-event -i kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-no-samples b/tools/perf/tests/attr/test-record-no-samples index 8eb73ab639e0..596fbd6d5a2c 100644 --- a/tools/perf/tests/attr/test-record-no-samples +++ b/tools/perf/tests/attr/test-record-no-samples @@ -1,6 +1,6 @@ [config] command = record -args = -n kill >/dev/null 2>&1 +args = --no-bpf-event -n kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-period b/tools/perf/tests/attr/test-record-period index 69bc748f0f27..119101154c5e 100644 --- a/tools/perf/tests/attr/test-record-period +++ b/tools/perf/tests/attr/test-record-period @@ -1,6 +1,6 @@ [config] command = record -args = -c 100 -P kill >/dev/null 2>&1 +args = --no-bpf-event -c 100 -P kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-raw b/tools/perf/tests/attr/test-record-raw index a188a614a44c..13a5f7860c78 100644 --- a/tools/perf/tests/attr/test-record-raw +++ b/tools/perf/tests/attr/test-record-raw @@ -1,6 +1,6 @@ [config] command = record -args = -R kill >/dev/null 2>&1 +args = --no-bpf-event -R kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 6d598cc071ae..1a9c3becf5ff 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -18,7 +18,7 @@ static void testcase(void) int i; for (i = 0; i < NR_ITERS; i++) { - char proc_name[10]; + char proc_name[15]; snprintf(proc_name, sizeof(proc_name), "p:%d\n", i); prctl(PR_SET_NAME, proc_name); diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index ea7acf403727..71f60c0f9faa 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -85,5 +85,6 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) ret = -1; + perf_evsel__delete(evsel); return ret; } diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 01f0706995a9..9acc1e80b936 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -19,7 +19,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) const char *p; const char **other; double val; - int ret; + int i, ret; struct parse_ctx ctx; int num_other; @@ -56,6 +56,9 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); TEST_ASSERT_VAL("find other", other[3] == NULL); + + for (i = 0; i < num_other; i++) + free((void *)other[i]); free((void *)other); return 0; diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index c531e6deb104..493ecb611540 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -45,7 +45,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); pr_debug("%s\n", errbuf); - goto out_thread_map_delete; + goto out_cpu_map_delete; } if (perf_evsel__open(evsel, cpus, threads) < 0) { @@ -119,6 +119,8 @@ out_close_fd: perf_evsel__close_fd(evsel); out_evsel_delete: perf_evsel__delete(evsel); +out_cpu_map_delete: + cpu_map__put(cpus); out_thread_map_delete: thread_map__put(threads); return err; diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh index 32bac9c0d694..5f5eefcb3c74 100755 --- a/tools/perf/trace/beauty/mmap_flags.sh +++ b/tools/perf/trace/beauty/mmap_flags.sh @@ -1,15 +1,18 @@ #!/bin/sh # SPDX-License-Identifier: LGPL-2.1 -if [ $# -ne 2 ] ; then +if [ $# -ne 3 ] ; then [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/` + linux_header_dir=tools/include/uapi/linux header_dir=tools/include/uapi/asm-generic arch_header_dir=tools/arch/${hostarch}/include/uapi/asm else - header_dir=$1 - arch_header_dir=$2 + linux_header_dir=$1 + header_dir=$2 + arch_header_dir=$3 fi +linux_mman=${linux_header_dir}/mman.h arch_mman=${arch_header_dir}/mman.h # those in egrep -vw are flags, we want just the bits @@ -20,6 +23,11 @@ egrep -q $regex ${arch_mman} && \ (egrep $regex ${arch_mman} | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +egrep -q $regex ${linux_mman} && \ +(egrep $regex ${linux_mman} | \ + egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") ([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) && (egrep $regex ${header_dir}/mman-common.h | \ egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 4f75561424ed..4ad37d8c7d6a 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -611,14 +611,16 @@ void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence) browser->top = browser->entries; break; case SEEK_CUR: - browser->top = browser->top + browser->top_idx + offset; + browser->top = (char **)browser->top + offset; break; case SEEK_END: - browser->top = browser->top + browser->nr_entries - 1 + offset; + browser->top = (char **)browser->entries + browser->nr_entries - 1 + offset; break; default: return; } + assert((char **)browser->top < (char **)browser->entries + browser->nr_entries); + assert((char **)browser->top >= (char **)browser->entries); } unsigned int ui_browser__argv_refresh(struct ui_browser *browser) @@ -630,7 +632,9 @@ unsigned int ui_browser__argv_refresh(struct ui_browser *browser) browser->top = browser->entries; pos = (char **)browser->top; - while (idx < browser->nr_entries) { + while (idx < browser->nr_entries && + row < (unsigned)SLtt_Screen_Rows - 1) { + assert(pos < (char **)browser->entries + browser->nr_entries); if (!browser->filter || !browser->filter(browser, *pos)) { ui_browser__gotorc(browser, row, 0); browser->write(browser, pos, row); diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build index 8fee56b46502..fdf86f7981ca 100644 --- a/tools/perf/ui/browsers/Build +++ b/tools/perf/ui/browsers/Build @@ -3,6 +3,7 @@ perf-y += hists.o perf-y += map.o perf-y += scripts.o perf-y += header.o +perf-y += res_sample.o CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 35bdfd8b1e71..98d934a36d86 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -750,7 +750,7 @@ static int annotate_browser__run(struct annotate_browser *browser, continue; case 'r': { - script_browse(NULL); + script_browse(NULL, NULL); continue; } case 'k': diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index aef800d97ea1..3421ecbdd3f0 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -7,6 +7,7 @@ #include <string.h> #include <linux/rbtree.h> #include <sys/ttydefaults.h> +#include <linux/time64.h> #include "../../util/callchain.h" #include "../../util/evsel.h" @@ -30,6 +31,7 @@ #include "srcline.h" #include "string2.h" #include "units.h" +#include "time-utils.h" #include "sane_ctype.h" @@ -1224,6 +1226,8 @@ void hist_browser__init_hpp(void) hist_browser__hpp_color_overhead_guest_us; perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = hist_browser__hpp_color_overhead_acc; + + res_sample_init(); } static int hist_browser__show_entry(struct hist_browser *browser, @@ -2338,9 +2342,12 @@ close_file_and_continue: } struct popup_action { + unsigned long time; struct thread *thread; struct map_symbol ms; int socket; + struct perf_evsel *evsel; + enum rstype rstype; int (*fn)(struct hist_browser *browser, struct popup_action *act); }; @@ -2527,46 +2534,137 @@ static int do_run_script(struct hist_browser *browser __maybe_unused, struct popup_action *act) { - char script_opt[64]; - memset(script_opt, 0, sizeof(script_opt)); + char *script_opt; + int len; + int n = 0; + len = 100; + if (act->thread) + len += strlen(thread__comm_str(act->thread)); + else if (act->ms.sym) + len += strlen(act->ms.sym->name); + script_opt = malloc(len); + if (!script_opt) + return -1; + + script_opt[0] = 0; if (act->thread) { - scnprintf(script_opt, sizeof(script_opt), " -c %s ", + n = scnprintf(script_opt, len, " -c %s ", thread__comm_str(act->thread)); } else if (act->ms.sym) { - scnprintf(script_opt, sizeof(script_opt), " -S %s ", + n = scnprintf(script_opt, len, " -S %s ", act->ms.sym->name); } - script_browse(script_opt); + if (act->time) { + char start[32], end[32]; + unsigned long starttime = act->time; + unsigned long endtime = act->time + symbol_conf.time_quantum; + + if (starttime == endtime) { /* Display 1ms as fallback */ + starttime -= 1*NSEC_PER_MSEC; + endtime += 1*NSEC_PER_MSEC; + } + timestamp__scnprintf_usec(starttime, start, sizeof start); + timestamp__scnprintf_usec(endtime, end, sizeof end); + n += snprintf(script_opt + n, len - n, " --time %s,%s", start, end); + } + + script_browse(script_opt, act->evsel); + free(script_opt); return 0; } static int -add_script_opt(struct hist_browser *browser __maybe_unused, +do_res_sample_script(struct hist_browser *browser __maybe_unused, + struct popup_action *act) +{ + struct hist_entry *he; + + he = hist_browser__selected_entry(browser); + res_sample_browse(he->res_samples, he->num_res, act->evsel, act->rstype); + return 0; +} + +static int +add_script_opt_2(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, - struct thread *thread, struct symbol *sym) + struct thread *thread, struct symbol *sym, + struct perf_evsel *evsel, const char *tstr) { + if (thread) { - if (asprintf(optstr, "Run scripts for samples of thread [%s]", - thread__comm_str(thread)) < 0) + if (asprintf(optstr, "Run scripts for samples of thread [%s]%s", + thread__comm_str(thread), tstr) < 0) return 0; } else if (sym) { - if (asprintf(optstr, "Run scripts for samples of symbol [%s]", - sym->name) < 0) + if (asprintf(optstr, "Run scripts for samples of symbol [%s]%s", + sym->name, tstr) < 0) return 0; } else { - if (asprintf(optstr, "Run scripts for all samples") < 0) + if (asprintf(optstr, "Run scripts for all samples%s", tstr) < 0) return 0; } act->thread = thread; act->ms.sym = sym; + act->evsel = evsel; act->fn = do_run_script; return 1; } static int +add_script_opt(struct hist_browser *browser, + struct popup_action *act, char **optstr, + struct thread *thread, struct symbol *sym, + struct perf_evsel *evsel) +{ + int n, j; + struct hist_entry *he; + + n = add_script_opt_2(browser, act, optstr, thread, sym, evsel, ""); + + he = hist_browser__selected_entry(browser); + if (sort_order && strstr(sort_order, "time")) { + char tstr[128]; + + optstr++; + act++; + j = sprintf(tstr, " in "); + j += timestamp__scnprintf_usec(he->time, tstr + j, + sizeof tstr - j); + j += sprintf(tstr + j, "-"); + timestamp__scnprintf_usec(he->time + symbol_conf.time_quantum, + tstr + j, sizeof tstr - j); + n += add_script_opt_2(browser, act, optstr, thread, sym, + evsel, tstr); + act->time = he->time; + } + return n; +} + +static int +add_res_sample_opt(struct hist_browser *browser __maybe_unused, + struct popup_action *act, char **optstr, + struct res_sample *res_sample, + struct perf_evsel *evsel, + enum rstype type) +{ + if (!res_sample) + return 0; + + if (asprintf(optstr, "Show context for individual samples %s", + type == A_ASM ? "with assembler" : + type == A_SOURCE ? "with source" : "") < 0) + return 0; + + act->fn = do_res_sample_script; + act->evsel = evsel; + act->rstype = type; + return 1; +} + +static int do_switch_data(struct hist_browser *browser __maybe_unused, struct popup_action *act __maybe_unused) { @@ -3031,7 +3129,7 @@ skip_annotation: nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], - thread, NULL); + thread, NULL, evsel); } /* * Note that browser->selection != NULL @@ -3046,11 +3144,24 @@ skip_annotation: nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], - NULL, browser->selection->sym); + NULL, browser->selection->sym, + evsel); } } nr_options += add_script_opt(browser, &actions[nr_options], - &options[nr_options], NULL, NULL); + &options[nr_options], NULL, NULL, evsel); + nr_options += add_res_sample_opt(browser, &actions[nr_options], + &options[nr_options], + hist_browser__selected_entry(browser)->res_samples, + evsel, A_NORMAL); + nr_options += add_res_sample_opt(browser, &actions[nr_options], + &options[nr_options], + hist_browser__selected_entry(browser)->res_samples, + evsel, A_ASM); + nr_options += add_res_sample_opt(browser, &actions[nr_options], + &options[nr_options], + hist_browser__selected_entry(browser)->res_samples, + evsel, A_SOURCE); nr_options += add_switch_opt(browser, &actions[nr_options], &options[nr_options]); skip_scripting: diff --git a/tools/perf/ui/browsers/res_sample.c b/tools/perf/ui/browsers/res_sample.c new file mode 100644 index 000000000000..c0dd73176d42 --- /dev/null +++ b/tools/perf/ui/browsers/res_sample.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Display a menu with individual samples to browse with perf script */ +#include "util.h" +#include "hist.h" +#include "evsel.h" +#include "hists.h" +#include "sort.h" +#include "config.h" +#include "time-utils.h" +#include <linux/time64.h> + +static u64 context_len = 10 * NSEC_PER_MSEC; + +static int res_sample_config(const char *var, const char *value, void *data __maybe_unused) +{ + if (!strcmp(var, "samples.context")) + return perf_config_u64(&context_len, var, value); + return 0; +} + +void res_sample_init(void) +{ + perf_config(res_sample_config, NULL); +} + +int res_sample_browse(struct res_sample *res_samples, int num_res, + struct perf_evsel *evsel, enum rstype rstype) +{ + char **names; + int i, n; + int choice; + char *cmd; + char pbuf[256], tidbuf[32], cpubuf[32]; + const char *perf = perf_exe(pbuf, sizeof pbuf); + char trange[128], tsample[64]; + struct res_sample *r; + char extra_format[256]; + + names = calloc(num_res, sizeof(char *)); + if (!names) + return -1; + for (i = 0; i < num_res; i++) { + char tbuf[64]; + + timestamp__scnprintf_nsec(res_samples[i].time, tbuf, sizeof tbuf); + if (asprintf(&names[i], "%s: CPU %d tid %d", tbuf, + res_samples[i].cpu, res_samples[i].tid) < 0) { + while (--i >= 0) + free(names[i]); + free(names); + return -1; + } + } + choice = ui__popup_menu(num_res, names); + for (i = 0; i < num_res; i++) + free(names[i]); + free(names); + + if (choice < 0 || choice >= num_res) + return -1; + r = &res_samples[choice]; + + n = timestamp__scnprintf_nsec(r->time - context_len, trange, sizeof trange); + trange[n++] = ','; + timestamp__scnprintf_nsec(r->time + context_len, trange + n, sizeof trange - n); + + timestamp__scnprintf_nsec(r->time, tsample, sizeof tsample); + + attr_to_script(extra_format, &evsel->attr); + + if (asprintf(&cmd, "%s script %s%s --time %s %s%s %s%s --ns %s %s %s %s %s | less +/%s", + perf, + input_name ? "-i " : "", + input_name ? input_name : "", + trange, + r->cpu >= 0 ? "--cpu " : "", + r->cpu >= 0 ? (sprintf(cpubuf, "%d", r->cpu), cpubuf) : "", + r->tid ? "--tid " : "", + r->tid ? (sprintf(tidbuf, "%d", r->tid), tidbuf) : "", + extra_format, + rstype == A_ASM ? "-F +insn --xed" : + rstype == A_SOURCE ? "-F +srcline,+srccode" : "", + symbol_conf.inline_name ? "--inline" : "", + "--show-lost-events ", + r->tid ? "--show-switch-events --show-task-events " : "", + tsample) < 0) + return -1; + run_script(cmd); + free(cmd); + return 0; +} diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 90a32ac69e76..27cf3ab88d13 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -1,34 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 -#include <elf.h> -#include <inttypes.h> -#include <sys/ttydefaults.h> -#include <string.h> #include "../../util/sort.h" #include "../../util/util.h" #include "../../util/hist.h" #include "../../util/debug.h" #include "../../util/symbol.h" #include "../browser.h" -#include "../helpline.h" #include "../libslang.h" - -/* 2048 lines should be enough for a script output */ -#define MAX_LINES 2048 - -/* 160 bytes for one output line */ -#define AVERAGE_LINE_LEN 160 - -struct script_line { - struct list_head node; - char line[AVERAGE_LINE_LEN]; -}; - -struct perf_script_browser { - struct ui_browser b; - struct list_head entries; - const char *script_name; - int nr_lines; -}; +#include "config.h" #define SCRIPT_NAMELEN 128 #define SCRIPT_MAX_NO 64 @@ -40,149 +18,169 @@ struct perf_script_browser { */ #define SCRIPT_FULLPATH_LEN 256 +struct script_config { + const char **names; + char **paths; + int index; + const char *perf; + char extra_format[256]; +}; + +void attr_to_script(char *extra_format, struct perf_event_attr *attr) +{ + extra_format[0] = 0; + if (attr->read_format & PERF_FORMAT_GROUP) + strcat(extra_format, " -F +metric"); + if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) + strcat(extra_format, " -F +brstackinsn --xed"); + if (attr->sample_type & PERF_SAMPLE_REGS_INTR) + strcat(extra_format, " -F +iregs"); + if (attr->sample_type & PERF_SAMPLE_REGS_USER) + strcat(extra_format, " -F +uregs"); + if (attr->sample_type & PERF_SAMPLE_PHYS_ADDR) + strcat(extra_format, " -F +phys_addr"); +} + +static int add_script_option(const char *name, const char *opt, + struct script_config *c) +{ + c->names[c->index] = name; + if (asprintf(&c->paths[c->index], + "%s script %s -F +metric %s %s", + c->perf, opt, symbol_conf.inline_name ? " --inline" : "", + c->extra_format) < 0) + return -1; + c->index++; + return 0; +} + +static int scripts_config(const char *var, const char *value, void *data) +{ + struct script_config *c = data; + + if (!strstarts(var, "scripts.")) + return -1; + if (c->index >= SCRIPT_MAX_NO) + return -1; + c->names[c->index] = strdup(var + 7); + if (!c->names[c->index]) + return -1; + if (asprintf(&c->paths[c->index], "%s %s", value, + c->extra_format) < 0) + return -1; + c->index++; + return 0; +} + /* * When success, will copy the full path of the selected script * into the buffer pointed by script_name, and return 0. * Return -1 on failure. */ -static int list_scripts(char *script_name) +static int list_scripts(char *script_name, bool *custom, + struct perf_evsel *evsel) { - char *buf, *names[SCRIPT_MAX_NO], *paths[SCRIPT_MAX_NO]; - int i, num, choice, ret = -1; + char *buf, *paths[SCRIPT_MAX_NO], *names[SCRIPT_MAX_NO]; + int i, num, choice; + int ret = 0; + int max_std, custom_perf; + char pbuf[256]; + const char *perf = perf_exe(pbuf, sizeof pbuf); + struct script_config scriptc = { + .names = (const char **)names, + .paths = paths, + .perf = perf + }; + + script_name[0] = 0; /* Preset the script name to SCRIPT_NAMELEN */ buf = malloc(SCRIPT_MAX_NO * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN)); if (!buf) - return ret; + return -1; - for (i = 0; i < SCRIPT_MAX_NO; i++) { - names[i] = buf + i * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN); + if (evsel) + attr_to_script(scriptc.extra_format, &evsel->attr); + add_script_option("Show individual samples", "", &scriptc); + add_script_option("Show individual samples with assembler", "-F +insn --xed", + &scriptc); + add_script_option("Show individual samples with source", "-F +srcline,+srccode", + &scriptc); + perf_config(scripts_config, &scriptc); + custom_perf = scriptc.index; + add_script_option("Show samples with custom perf script arguments", "", &scriptc); + i = scriptc.index; + max_std = i; + + for (; i < SCRIPT_MAX_NO; i++) { + names[i] = buf + (i - max_std) * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN); paths[i] = names[i] + SCRIPT_NAMELEN; } - num = find_scripts(names, paths); - if (num > 0) { - choice = ui__popup_menu(num, names); - if (choice < num && choice >= 0) { - strcpy(script_name, paths[choice]); - ret = 0; - } + num = find_scripts(names + max_std, paths + max_std, SCRIPT_MAX_NO - max_std, + SCRIPT_FULLPATH_LEN); + if (num < 0) + num = 0; + choice = ui__popup_menu(num + max_std, (char * const *)names); + if (choice < 0) { + ret = -1; + goto out; } + if (choice == custom_perf) { + char script_args[50]; + int key = ui_browser__input_window("perf script command", + "Enter perf script command line (without perf script prefix)", + script_args, "", 0); + if (key != K_ENTER) + return -1; + sprintf(script_name, "%s script %s", perf, script_args); + } else if (choice < num + max_std) { + strcpy(script_name, paths[choice]); + } + *custom = choice >= max_std; +out: free(buf); + for (i = 0; i < max_std; i++) + free(paths[i]); return ret; } -static void script_browser__write(struct ui_browser *browser, - void *entry, int row) +void run_script(char *cmd) { - struct script_line *sline = list_entry(entry, struct script_line, node); - bool current_entry = ui_browser__is_current_entry(browser, row); - - ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : - HE_COLORSET_NORMAL); - - ui_browser__write_nstring(browser, sline->line, browser->width); + pr_debug("Running %s\n", cmd); + SLang_reset_tty(); + if (system(cmd) < 0) + pr_warning("Cannot run %s\n", cmd); + /* + * SLang doesn't seem to reset the whole terminal, so be more + * forceful to get back to the original state. + */ + printf("\033[c\033[H\033[J"); + fflush(stdout); + SLang_init_tty(0, 0, 0); + SLsmg_refresh(); } -static int script_browser__run(struct perf_script_browser *browser) +int script_browse(const char *script_opt, struct perf_evsel *evsel) { - int key; + char *cmd, script_name[SCRIPT_FULLPATH_LEN]; + bool custom = false; - if (ui_browser__show(&browser->b, browser->script_name, - "Press ESC to exit") < 0) + memset(script_name, 0, SCRIPT_FULLPATH_LEN); + if (list_scripts(script_name, &custom, evsel)) return -1; - while (1) { - key = ui_browser__run(&browser->b, 0); - - /* We can add some special key handling here if needed */ - break; - } - - ui_browser__hide(&browser->b); - return key; -} - - -int script_browse(const char *script_opt) -{ - char cmd[SCRIPT_FULLPATH_LEN*2], script_name[SCRIPT_FULLPATH_LEN]; - char *line = NULL; - size_t len = 0; - ssize_t retlen; - int ret = -1, nr_entries = 0; - FILE *fp; - void *buf; - struct script_line *sline; - - struct perf_script_browser script = { - .b = { - .refresh = ui_browser__list_head_refresh, - .seek = ui_browser__list_head_seek, - .write = script_browser__write, - }, - .script_name = script_name, - }; - - INIT_LIST_HEAD(&script.entries); - - /* Save each line of the output in one struct script_line object. */ - buf = zalloc((sizeof(*sline)) * MAX_LINES); - if (!buf) + if (asprintf(&cmd, "%s%s %s %s%s 2>&1 | less", + custom ? "perf script -s " : "", + script_name, + script_opt ? script_opt : "", + input_name ? "-i " : "", + input_name ? input_name : "") < 0) return -1; - sline = buf; - - memset(script_name, 0, SCRIPT_FULLPATH_LEN); - if (list_scripts(script_name)) - goto exit; - - sprintf(cmd, "perf script -s %s ", script_name); - if (script_opt) - strcat(cmd, script_opt); + run_script(cmd); + free(cmd); - if (input_name) { - strcat(cmd, " -i "); - strcat(cmd, input_name); - } - - strcat(cmd, " 2>&1"); - - fp = popen(cmd, "r"); - if (!fp) - goto exit; - - while ((retlen = getline(&line, &len, fp)) != -1) { - strncpy(sline->line, line, AVERAGE_LINE_LEN); - - /* If one output line is very large, just cut it short */ - if (retlen >= AVERAGE_LINE_LEN) { - sline->line[AVERAGE_LINE_LEN - 1] = '\0'; - sline->line[AVERAGE_LINE_LEN - 2] = '\n'; - } - list_add_tail(&sline->node, &script.entries); - - if (script.b.width < retlen) - script.b.width = retlen; - - if (nr_entries++ >= MAX_LINES - 1) - break; - sline++; - } - - if (script.b.width > AVERAGE_LINE_LEN) - script.b.width = AVERAGE_LINE_LEN; - - free(line); - pclose(fp); - - script.nr_lines = nr_entries; - script.b.nr_entries = nr_entries; - script.b.entries = &script.entries; - - ret = script_browser__run(&script); -exit: - free(buf); - return ret; + return 0; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5f6dbbf5d749..c8b01176c9e1 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -10,6 +10,10 @@ #include <errno.h> #include <inttypes.h> #include <libgen.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> +#include <linux/btf.h> #include "util.h" #include "ui/ui.h" #include "sort.h" @@ -24,6 +28,7 @@ #include "annotate.h" #include "evsel.h" #include "evlist.h" +#include "bpf-event.h" #include "block-range.h" #include "string2.h" #include "arch/common.h" @@ -31,6 +36,7 @@ #include <pthread.h> #include <linux/bitops.h> #include <linux/kernel.h> +#include <bpf/libbpf.h> /* FIXME: For the HE_COLORSET */ #include "ui/browser.h" @@ -1615,6 +1621,9 @@ int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map * " --vmlinux vmlinux\n", build_id_msg ?: ""); } break; + case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: + scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); + break; default: scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); break; @@ -1674,6 +1683,156 @@ fallback: return 0; } +#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +#define PACKAGE "perf" +#include <bfd.h> +#include <dis-asm.h> + +static int symbol__disassemble_bpf(struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct annotation_options *opts = args->options; + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_linfo *prog_linfo = NULL; + struct bpf_prog_info_node *info_node; + int len = sym->end - sym->start; + disassembler_ftype disassemble; + struct map *map = args->ms.map; + struct disassemble_info info; + struct dso *dso = map->dso; + int pc = 0, count, sub_id; + struct btf *btf = NULL; + char tpath[PATH_MAX]; + size_t buf_size; + int nr_skip = 0; + int ret = -1; + char *buf; + bfd *bfdf; + FILE *s; + + if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO) + return -1; + + pr_debug("%s: handling sym %s addr %lx len %lx\n", __func__, + sym->name, sym->start, sym->end - sym->start); + + memset(tpath, 0, sizeof(tpath)); + perf_exe(tpath, sizeof(tpath)); + + bfdf = bfd_openr(tpath, NULL); + assert(bfdf); + assert(bfd_check_format(bfdf, bfd_object)); + + s = open_memstream(&buf, &buf_size); + if (!s) + goto out; + init_disassemble_info(&info, s, + (fprintf_ftype) fprintf); + + info.arch = bfd_get_arch(bfdf); + info.mach = bfd_get_mach(bfdf); + + info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, + dso->bpf_prog.id); + if (!info_node) + goto out; + info_linear = info_node->info_linear; + sub_id = dso->bpf_prog.sub_id; + + info.buffer = (void *)(info_linear->info.jited_prog_insns); + info.buffer_length = info_linear->info.jited_prog_len; + + if (info_linear->info.nr_line_info) + prog_linfo = bpf_prog_linfo__new(&info_linear->info); + + if (info_linear->info.btf_id) { + struct btf_node *node; + + node = perf_env__find_btf(dso->bpf_prog.env, + info_linear->info.btf_id); + if (node) + btf = btf__new((__u8 *)(node->data), + node->data_size); + } + + disassemble_init_for_target(&info); + +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else + disassemble = disassembler(bfdf); +#endif + assert(disassemble); + + fflush(s); + do { + const struct bpf_line_info *linfo = NULL; + struct disasm_line *dl; + size_t prev_buf_size; + const char *srcline; + u64 addr; + + addr = pc + ((u64 *)(info_linear->info.jited_ksyms))[sub_id]; + count = disassemble(pc, &info); + + if (prog_linfo) + linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, + addr, sub_id, + nr_skip); + + if (linfo && btf) { + srcline = btf__name_by_offset(btf, linfo->line_off); + nr_skip++; + } else + srcline = NULL; + + fprintf(s, "\n"); + prev_buf_size = buf_size; + fflush(s); + + if (!opts->hide_src_code && srcline) { + args->offset = -1; + args->line = strdup(srcline); + args->line_nr = 0; + args->ms.sym = sym; + dl = disasm_line__new(args); + if (dl) { + annotation_line__add(&dl->al, + ¬es->src->source); + } + } + + args->offset = pc; + args->line = buf + prev_buf_size; + args->line_nr = 0; + args->ms.sym = sym; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + pc += count; + } while (count > 0 && pc < len); + + ret = 0; +out: + free(prog_linfo); + free(btf); + fclose(s); + bfd_close(bfdf); + return ret; +} +#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, + struct annotate_args *args __maybe_unused) +{ + return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; +} +#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) + static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct annotation_options *opts = args->options; @@ -1701,7 +1860,9 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso->long_name, sym, sym->name); - if (dso__is_kcore(dso)) { + if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) { + return symbol__disassemble_bpf(sym, args); + } else if (dso__is_kcore(dso)) { kce.kcore_filename = symfs_filename; kce.addr = map__rip_2objdump(map, sym->start); kce.offs = sym->start; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index df34fe483164..5bc0cf655d37 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -369,6 +369,7 @@ enum symbol_disassemble_errno { __SYMBOL_ANNOTATE_ERRNO__START = -10000, SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX = __SYMBOL_ANNOTATE_ERRNO__START, + SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF, __SYMBOL_ANNOTATE_ERRNO__END, }; diff --git a/tools/perf/util/archinsn.h b/tools/perf/util/archinsn.h new file mode 100644 index 000000000000..448cbb6b8d7e --- /dev/null +++ b/tools/perf/util/archinsn.h @@ -0,0 +1,12 @@ +#ifndef INSN_H +#define INSN_H 1 + +struct perf_sample; +struct machine; +struct thread; + +void arch_fetch_insn(struct perf_sample *sample, + struct thread *thread, + struct machine *machine); + +#endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 028c8ec1f62a..2a4a0da35632 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -3,11 +3,17 @@ #include <stdlib.h> #include <bpf/bpf.h> #include <bpf/btf.h> +#include <bpf/libbpf.h> #include <linux/btf.h> +#include <linux/err.h> #include "bpf-event.h" #include "debug.h" #include "symbol.h" #include "machine.h" +#include "env.h" +#include "session.h" +#include "map.h" +#include "evlist.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) @@ -21,15 +27,122 @@ static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) return ret; } +static int machine__process_bpf_event_load(struct machine *machine, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info_node *info_node; + struct perf_env *env = machine->env; + int id = event->bpf_event.id; + unsigned int i; + + /* perf-record, no need to handle bpf-event */ + if (env == NULL) + return 0; + + info_node = perf_env__find_bpf_prog_info(env, id); + if (!info_node) + return 0; + info_linear = info_node->info_linear; + + for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) { + u64 *addrs = (u64 *)(uintptr_t)(info_linear->info.jited_ksyms); + u64 addr = addrs[i]; + struct map *map; + + map = map_groups__find(&machine->kmaps, addr); + + if (map) { + map->dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO; + map->dso->bpf_prog.id = id; + map->dso->bpf_prog.sub_id = i; + map->dso->bpf_prog.env = env; + } + } + return 0; +} + int machine__process_bpf_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused) { if (dump_trace) perf_event__fprintf_bpf_event(event, stdout); + + switch (event->bpf_event.type) { + case PERF_BPF_EVENT_PROG_LOAD: + return machine__process_bpf_event_load(machine, event, sample); + + case PERF_BPF_EVENT_PROG_UNLOAD: + /* + * Do not free bpf_prog_info and btf of the program here, + * as annotation still need them. They will be freed at + * the end of the session. + */ + break; + default: + pr_debug("unexpected bpf_event type of %d\n", + event->bpf_event.type); + break; + } return 0; } +static int perf_env__fetch_btf(struct perf_env *env, + u32 btf_id, + struct btf *btf) +{ + struct btf_node *node; + u32 data_size; + const void *data; + + data = btf__get_raw_data(btf, &data_size); + + node = malloc(data_size + sizeof(struct btf_node)); + if (!node) + return -1; + + node->id = btf_id; + node->data_size = data_size; + memcpy(node->data, data, data_size); + + perf_env__insert_btf(env, node); + return 0; +} + +static int synthesize_bpf_prog_name(char *buf, int size, + struct bpf_prog_info *info, + struct btf *btf, + u32 sub_id) +{ + u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(uintptr_t)(info->prog_tags); + void *func_infos = (void *)(uintptr_t)(info->func_info); + u32 sub_prog_cnt = info->nr_jited_ksyms; + const struct bpf_func_info *finfo; + const char *short_name = NULL; + const struct btf_type *t; + int name_len; + + name_len = snprintf(buf, size, "bpf_prog_"); + name_len += snprintf_hex(buf + name_len, size - name_len, + prog_tags[sub_id], BPF_TAG_SIZE); + if (btf) { + finfo = func_infos + sub_id * info->func_info_rec_size; + t = btf__type_by_id(btf, finfo->type_id); + short_name = btf__name_by_offset(btf, t->name_off); + } else if (sub_id == 0 && sub_prog_cnt == 1) { + /* no subprog */ + if (info->name[0]) + short_name = info->name; + } else + short_name = "F"; + if (short_name) + name_len += snprintf(buf + name_len, size - name_len, + "_%s", short_name); + return name_len; +} + /* * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf * program. One PERF_RECORD_BPF_EVENT is generated for the program. And @@ -40,7 +153,7 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused, * -1 for failures; * -2 for lack of kernel support. */ -static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, +static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, perf_event__handler_t process, struct machine *machine, int fd, @@ -49,102 +162,71 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, { struct ksymbol_event *ksymbol_event = &event->ksymbol_event; struct bpf_event *bpf_event = &event->bpf_event; - u32 sub_prog_cnt, i, func_info_rec_size = 0; - u8 (*prog_tags)[BPF_TAG_SIZE] = NULL; - struct bpf_prog_info info = { .type = 0, }; - u32 info_len = sizeof(info); - void *func_infos = NULL; - u64 *prog_addrs = NULL; + struct bpf_prog_info_linear *info_linear; + struct perf_tool *tool = session->tool; + struct bpf_prog_info_node *info_node; + struct bpf_prog_info *info; struct btf *btf = NULL; - u32 *prog_lens = NULL; - bool has_btf = false; - char errbuf[512]; + struct perf_env *env; + u32 sub_prog_cnt, i; int err = 0; + u64 arrays; + + /* + * for perf-record and perf-report use header.env; + * otherwise, use global perf_env. + */ + env = session->data ? &session->header.env : &perf_env; - /* Call bpf_obj_get_info_by_fd() to get sizes of arrays */ - err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; + arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; - if (err) { - pr_debug("%s: failed to get BPF program info: %s, aborting\n", - __func__, str_error_r(errno, errbuf, sizeof(errbuf))); + info_linear = bpf_program__get_prog_info_linear(fd, arrays); + if (IS_ERR_OR_NULL(info_linear)) { + info_linear = NULL; + pr_debug("%s: failed to get BPF program info. aborting\n", __func__); return -1; } - if (info_len < offsetof(struct bpf_prog_info, prog_tags)) { + + if (info_linear->info_len < offsetof(struct bpf_prog_info, prog_tags)) { pr_debug("%s: the kernel is too old, aborting\n", __func__); return -2; } + info = &info_linear->info; + /* number of ksyms, func_lengths, and tags should match */ - sub_prog_cnt = info.nr_jited_ksyms; - if (sub_prog_cnt != info.nr_prog_tags || - sub_prog_cnt != info.nr_jited_func_lens) + sub_prog_cnt = info->nr_jited_ksyms; + if (sub_prog_cnt != info->nr_prog_tags || + sub_prog_cnt != info->nr_jited_func_lens) return -1; /* check BTF func info support */ - if (info.btf_id && info.nr_func_info && info.func_info_rec_size) { + if (info->btf_id && info->nr_func_info && info->func_info_rec_size) { /* btf func info number should be same as sub_prog_cnt */ - if (sub_prog_cnt != info.nr_func_info) { + if (sub_prog_cnt != info->nr_func_info) { pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__); - return -1; - } - if (btf__get_from_id(info.btf_id, &btf)) { - pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id); - return -1; + err = -1; + goto out; } - func_info_rec_size = info.func_info_rec_size; - func_infos = calloc(sub_prog_cnt, func_info_rec_size); - if (!func_infos) { - pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__); - return -1; + if (btf__get_from_id(info->btf_id, &btf)) { + pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id); + err = -1; + btf = NULL; + goto out; } - has_btf = true; - } - - /* - * We need address, length, and tag for each sub program. - * Allocate memory and call bpf_obj_get_info_by_fd() again - */ - prog_addrs = calloc(sub_prog_cnt, sizeof(u64)); - if (!prog_addrs) { - pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__); - goto out; - } - prog_lens = calloc(sub_prog_cnt, sizeof(u32)); - if (!prog_lens) { - pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__); - goto out; - } - prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE); - if (!prog_tags) { - pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__); - goto out; - } - - memset(&info, 0, sizeof(info)); - info.nr_jited_ksyms = sub_prog_cnt; - info.nr_jited_func_lens = sub_prog_cnt; - info.nr_prog_tags = sub_prog_cnt; - info.jited_ksyms = ptr_to_u64(prog_addrs); - info.jited_func_lens = ptr_to_u64(prog_lens); - info.prog_tags = ptr_to_u64(prog_tags); - info_len = sizeof(info); - if (has_btf) { - info.nr_func_info = sub_prog_cnt; - info.func_info_rec_size = func_info_rec_size; - info.func_info = ptr_to_u64(func_infos); - } - - err = bpf_obj_get_info_by_fd(fd, &info, &info_len); - if (err) { - pr_debug("%s: failed to get BPF program info, aborting\n", __func__); - goto out; + perf_env__fetch_btf(env, info->btf_id, btf); } /* Synthesize PERF_RECORD_KSYMBOL */ for (i = 0; i < sub_prog_cnt; i++) { - const struct bpf_func_info *finfo; - const char *short_name = NULL; - const struct btf_type *t; + __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); + __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); int name_len; *ksymbol_event = (struct ksymbol_event){ @@ -157,26 +239,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF, .flags = 0, }; - name_len = snprintf(ksymbol_event->name, KSYM_NAME_LEN, - "bpf_prog_"); - name_len += snprintf_hex(ksymbol_event->name + name_len, - KSYM_NAME_LEN - name_len, - prog_tags[i], BPF_TAG_SIZE); - if (has_btf) { - finfo = func_infos + i * info.func_info_rec_size; - t = btf__type_by_id(btf, finfo->type_id); - short_name = btf__name_by_offset(btf, t->name_off); - } else if (i == 0 && sub_prog_cnt == 1) { - /* no subprog */ - if (info.name[0]) - short_name = info.name; - } else - short_name = "F"; - if (short_name) - name_len += snprintf(ksymbol_event->name + name_len, - KSYM_NAME_LEN - name_len, - "_%s", short_name); + name_len = synthesize_bpf_prog_name(ksymbol_event->name, + KSYM_NAME_LEN, info, btf, i); ksymbol_event->header.size += PERF_ALIGN(name_len + 1, sizeof(u64)); @@ -186,8 +251,8 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, machine, process); } - /* Synthesize PERF_RECORD_BPF_EVENT */ - if (opts->bpf_event) { + if (!opts->no_bpf_event) { + /* Synthesize PERF_RECORD_BPF_EVENT */ *bpf_event = (struct bpf_event){ .header = { .type = PERF_RECORD_BPF_EVENT, @@ -195,25 +260,38 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, }, .type = PERF_BPF_EVENT_PROG_LOAD, .flags = 0, - .id = info.id, + .id = info->id, }; - memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE); + memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE); memset((void *)event + event->header.size, 0, machine->id_hdr_size); event->header.size += machine->id_hdr_size; + + /* save bpf_prog_info to env */ + info_node = malloc(sizeof(struct bpf_prog_info_node)); + if (!info_node) { + err = -1; + goto out; + } + + info_node->info_linear = info_linear; + perf_env__insert_bpf_prog_info(env, info_node); + info_linear = NULL; + + /* + * process after saving bpf_prog_info to env, so that + * required information is ready for look up + */ err = perf_tool__process_synth_event(tool, event, machine, process); } out: - free(prog_tags); - free(prog_lens); - free(prog_addrs); - free(func_infos); + free(info_linear); free(btf); return err ? -1 : 0; } -int perf_event__synthesize_bpf_events(struct perf_tool *tool, +int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts) @@ -247,7 +325,7 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool, continue; } - err = perf_event__synthesize_one_bpf_prog(tool, process, + err = perf_event__synthesize_one_bpf_prog(session, process, machine, fd, event, opts); close(fd); @@ -261,3 +339,142 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool, free(event); return err; } + +static void perf_env__add_bpf_info(struct perf_env *env, u32 id) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info_node *info_node; + struct btf *btf = NULL; + u64 arrays; + u32 btf_id; + int fd; + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) + return; + + arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; + arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + + info_linear = bpf_program__get_prog_info_linear(fd, arrays); + if (IS_ERR_OR_NULL(info_linear)) { + pr_debug("%s: failed to get BPF program info. aborting\n", __func__); + goto out; + } + + btf_id = info_linear->info.btf_id; + + info_node = malloc(sizeof(struct bpf_prog_info_node)); + if (info_node) { + info_node->info_linear = info_linear; + perf_env__insert_bpf_prog_info(env, info_node); + } else + free(info_linear); + + if (btf_id == 0) + goto out; + + if (btf__get_from_id(btf_id, &btf)) { + pr_debug("%s: failed to get BTF of id %u, aborting\n", + __func__, btf_id); + goto out; + } + perf_env__fetch_btf(env, btf_id, btf); + +out: + free(btf); + close(fd); +} + +static int bpf_event__sb_cb(union perf_event *event, void *data) +{ + struct perf_env *env = data; + + if (event->header.type != PERF_RECORD_BPF_EVENT) + return -1; + + switch (event->bpf_event.type) { + case PERF_BPF_EVENT_PROG_LOAD: + perf_env__add_bpf_info(env, event->bpf_event.id); + + case PERF_BPF_EVENT_PROG_UNLOAD: + /* + * Do not free bpf_prog_info and btf of the program here, + * as annotation still need them. They will be freed at + * the end of the session. + */ + break; + default: + pr_debug("unexpected bpf_event type of %d\n", + event->bpf_event.type); + break; + } + + return 0; +} + +int bpf_event__add_sb_event(struct perf_evlist **evlist, + struct perf_env *env) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_DUMMY, + .sample_id_all = 1, + .watermark = 1, + .bpf_event = 1, + .size = sizeof(attr), /* to capture ABI version */ + }; + + /* + * Older gcc versions don't support designated initializers, like above, + * for unnamed union members, such as the following: + */ + attr.wakeup_watermark = 1; + + return perf_evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); +} + +void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp) +{ + __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); + __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); + char name[KSYM_NAME_LEN]; + struct btf *btf = NULL; + u32 sub_prog_cnt, i; + + sub_prog_cnt = info->nr_jited_ksyms; + if (sub_prog_cnt != info->nr_prog_tags || + sub_prog_cnt != info->nr_jited_func_lens) + return; + + if (info->btf_id) { + struct btf_node *node; + + node = perf_env__find_btf(env, info->btf_id); + if (node) + btf = btf__new((__u8 *)(node->data), + node->data_size); + } + + if (sub_prog_cnt == 1) { + synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0); + fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n", + info->id, name, prog_addrs[0], prog_lens[0]); + return; + } + + fprintf(fp, "# bpf_prog_info %u:\n", info->id); + for (i = 0; i < sub_prog_cnt; i++) { + synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, i); + + fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n", + i, name, prog_addrs[i], prog_lens[i]); + } +} diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 7890067e1a37..04c33b3bfe28 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -3,22 +3,45 @@ #define __PERF_BPF_EVENT_H #include <linux/compiler.h> +#include <linux/rbtree.h> +#include <pthread.h> +#include <api/fd/array.h> #include "event.h" +#include <stdio.h> struct machine; union perf_event; +struct perf_env; struct perf_sample; -struct perf_tool; struct record_opts; +struct evlist; +struct target; + +struct bpf_prog_info_node { + struct bpf_prog_info_linear *info_linear; + struct rb_node rb_node; +}; + +struct btf_node { + struct rb_node rb_node; + u32 id; + u32 data_size; + char data[]; +}; #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); -int perf_event__synthesize_bpf_events(struct perf_tool *tool, +int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts); +int bpf_event__add_sb_event(struct perf_evlist **evlist, + struct perf_env *env); +void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp); #else static inline int machine__process_bpf_event(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -27,12 +50,25 @@ static inline int machine__process_bpf_event(struct machine *machine __maybe_unu return 0; } -static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused, +static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused, perf_event__handler_t process __maybe_unused, struct machine *machine __maybe_unused, struct record_opts *opts __maybe_unused) { return 0; } + +static inline int bpf_event__add_sb_event(struct perf_evlist **evlist __maybe_unused, + struct perf_env *env __maybe_unused) +{ + return 0; +} + +static inline void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, + struct perf_env *env __maybe_unused, + FILE *fp __maybe_unused) +{ + +} #endif // HAVE_LIBBPF_SUPPORT #endif diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index bff0d17920ed..0c5517a8d0b7 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -185,6 +185,7 @@ char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size) return bf; } +/* The caller is responsible to free the returned buffer. */ char *build_id_cache__origname(const char *sbuild_id) { char *linkname; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index fa092511c52b..7e3c1b60120c 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -633,11 +633,10 @@ static int collect_config(const char *var, const char *value, } ret = set_value(item, value); - return ret; out_free: free(key); - return -1; + return ret; } int perf_config_set__collect(struct perf_config_set *set, const char *file_name, diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index ba4c623cd8de..39fe21e1cf93 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -387,6 +387,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, break; case OCSD_INSTR_ISB: case OCSD_INSTR_DSB_DMB: + case OCSD_INSTR_WFI_WFE: case OCSD_INSTR_OTHER: default: packet->last_instr_taken_branch = false; diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index e098e189f93e..6a64f713710d 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -14,6 +14,7 @@ #include "data.h" #include "util.h" #include "debug.h" +#include "header.h" static void close_dir(struct perf_data_file *files, int nr) { @@ -34,12 +35,16 @@ int perf_data__create_dir(struct perf_data *data, int nr) struct perf_data_file *files = NULL; int i, ret = -1; + if (WARN_ON(!data->is_dir)) + return -EINVAL; + files = zalloc(nr * sizeof(*files)); if (!files) return -ENOMEM; - data->dir.files = files; - data->dir.nr = nr; + data->dir.version = PERF_DIR_VERSION; + data->dir.files = files; + data->dir.nr = nr; for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; @@ -69,6 +74,13 @@ int perf_data__open_dir(struct perf_data *data) DIR *dir; int nr = 0; + if (WARN_ON(!data->is_dir)) + return -EINVAL; + + /* The version is provided by DIR_FORMAT feature. */ + if (WARN_ON(data->dir.version != PERF_DIR_VERSION)) + return -1; + dir = opendir(data->path); if (!dir) return -EINVAL; @@ -118,6 +130,26 @@ out_err: return ret; } +int perf_data__update_dir(struct perf_data *data) +{ + int i; + + if (WARN_ON(!data->is_dir)) + return -EINVAL; + + for (i = 0; i < data->dir.nr; i++) { + struct perf_data_file *file = &data->dir.files[i]; + struct stat st; + + if (fstat(file->fd, &st)) + return -1; + + file->size = st.st_size; + } + + return 0; +} + static bool check_pipe(struct perf_data *data) { struct stat st; @@ -173,6 +205,16 @@ static int check_backup(struct perf_data *data) return 0; } +static bool is_dir(struct perf_data *data) +{ + struct stat st; + + if (stat(data->path, &st)) + return false; + + return (st.st_mode & S_IFMT) == S_IFDIR; +} + static int open_file_read(struct perf_data *data) { struct stat st; @@ -254,6 +296,30 @@ static int open_file_dup(struct perf_data *data) return open_file(data); } +static int open_dir(struct perf_data *data) +{ + int ret; + + /* + * So far we open only the header, so we can read the data version and + * layout. + */ + if (asprintf(&data->file.path, "%s/header", data->path) < 0) + return -1; + + if (perf_data__is_write(data) && + mkdir(data->path, S_IRWXU) < 0) + return -1; + + ret = open_file(data); + + /* Cleanup whatever we managed to create so far. */ + if (ret && perf_data__is_write(data)) + rm_rf_perf_data(data->path); + + return ret; +} + int perf_data__open(struct perf_data *data) { if (check_pipe(data)) @@ -265,11 +331,18 @@ int perf_data__open(struct perf_data *data) if (check_backup(data)) return -1; - return open_file_dup(data); + if (perf_data__is_read(data)) + data->is_dir = is_dir(data); + + return perf_data__is_dir(data) ? + open_dir(data) : open_file_dup(data); } void perf_data__close(struct perf_data *data) { + if (perf_data__is_dir(data)) + perf_data__close_dir(data); + zfree(&data->file.path); close(data->file.fd); } @@ -288,9 +361,9 @@ ssize_t perf_data__write(struct perf_data *data, int perf_data__switch(struct perf_data *data, const char *postfix, - size_t pos, bool at_exit) + size_t pos, bool at_exit, + char **new_filepath) { - char *new_filepath; int ret; if (check_pipe(data)) @@ -298,15 +371,15 @@ int perf_data__switch(struct perf_data *data, if (perf_data__is_read(data)) return -EINVAL; - if (asprintf(&new_filepath, "%s.%s", data->path, postfix) < 0) + if (asprintf(new_filepath, "%s.%s", data->path, postfix) < 0) return -ENOMEM; /* * Only fire a warning, don't return error, continue fill * original file. */ - if (rename(data->path, new_filepath)) - pr_warning("Failed to rename %s to %s\n", data->path, new_filepath); + if (rename(data->path, *new_filepath)) + pr_warning("Failed to rename %s to %s\n", data->path, *new_filepath); if (!at_exit) { close(data->file.fd); @@ -323,6 +396,22 @@ int perf_data__switch(struct perf_data *data, } ret = data->file.fd; out: - free(new_filepath); return ret; } + +unsigned long perf_data__size(struct perf_data *data) +{ + u64 size = data->file.size; + int i; + + if (!data->is_dir) + return size; + + for (i = 0; i < data->dir.nr; i++) { + struct perf_data_file *file = &data->dir.files[i]; + + size += file->size; + } + + return size; +} diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 14b47be2bd69..259868a39019 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -19,10 +19,12 @@ struct perf_data { const char *path; struct perf_data_file file; bool is_pipe; + bool is_dir; bool force; enum perf_data_mode mode; struct { + u64 version; struct perf_data_file *files; int nr; } dir; @@ -43,14 +45,14 @@ static inline int perf_data__is_pipe(struct perf_data *data) return data->is_pipe; } -static inline int perf_data__fd(struct perf_data *data) +static inline bool perf_data__is_dir(struct perf_data *data) { - return data->file.fd; + return data->is_dir; } -static inline unsigned long perf_data__size(struct perf_data *data) +static inline int perf_data__fd(struct perf_data *data) { - return data->file.size; + return data->file.fd; } int perf_data__open(struct perf_data *data); @@ -68,9 +70,11 @@ ssize_t perf_data_file__write(struct perf_data_file *file, */ int perf_data__switch(struct perf_data *data, const char *postfix, - size_t pos, bool at_exit); + size_t pos, bool at_exit, char **new_filepath); int perf_data__create_dir(struct perf_data *data, int nr); int perf_data__open_dir(struct perf_data *data); void perf_data__close_dir(struct perf_data *data); +int perf_data__update_dir(struct perf_data *data); +unsigned long perf_data__size(struct perf_data *data); #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index ba58ba603b69..e059976d9d93 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -184,6 +184,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__KALLSYMS: case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__JAVA_JIT: + case DSO_BINARY_TYPE__BPF_PROG_INFO: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; break; @@ -1141,28 +1142,34 @@ void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) static void dso__set_basename(struct dso *dso) { - /* - * basename() may modify path buffer, so we must pass - * a copy. - */ - char *base, *lname = strdup(dso->long_name); + char *base, *lname; + int tid; - if (!lname) - return; - - /* - * basename() may return a pointer to internal - * storage which is reused in subsequent calls - * so copy the result. - */ - base = strdup(basename(lname)); + if (sscanf(dso->long_name, "/tmp/perf-%d.map", &tid) == 1) { + if (asprintf(&base, "[JIT] tid %d", tid) < 0) + return; + } else { + /* + * basename() may modify path buffer, so we must pass + * a copy. + */ + lname = strdup(dso->long_name); + if (!lname) + return; - free(lname); + /* + * basename() may return a pointer to internal + * storage which is reused in subsequent calls + * so copy the result. + */ + base = strdup(basename(lname)); - if (!base) - return; + free(lname); - dso__set_short_name(dso, base, true); + if (!base) + return; + } + dso__set_short_name(dso, base, true); } int dso__name_len(const struct dso *dso) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index bb417c54c25a..6e3f63781e51 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -14,6 +14,7 @@ struct machine; struct map; +struct perf_env; enum dso_binary_type { DSO_BINARY_TYPE__KALLSYMS = 0, @@ -35,6 +36,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__KCORE, DSO_BINARY_TYPE__GUEST_KCORE, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + DSO_BINARY_TYPE__BPF_PROG_INFO, DSO_BINARY_TYPE__NOT_FOUND, }; @@ -189,6 +191,12 @@ struct dso { u64 debug_frame_offset; u64 eh_frame_hdr_offset; } data; + /* bpf prog information */ + struct { + u32 id; + u32 sub_id; + struct perf_env *env; + } bpf_prog; union { /* Tool specific area */ void *priv; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 4c23779e271a..9494f9dc61ec 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,15 +3,167 @@ #include "env.h" #include "sane_ctype.h" #include "util.h" +#include "bpf-event.h" #include <errno.h> #include <sys/utsname.h> +#include <bpf/libbpf.h> struct perf_env perf_env; +void perf_env__insert_bpf_prog_info(struct perf_env *env, + struct bpf_prog_info_node *info_node) +{ + __u32 prog_id = info_node->info_linear->info.id; + struct bpf_prog_info_node *node; + struct rb_node *parent = NULL; + struct rb_node **p; + + down_write(&env->bpf_progs.lock); + p = &env->bpf_progs.infos.rb_node; + + while (*p != NULL) { + parent = *p; + node = rb_entry(parent, struct bpf_prog_info_node, rb_node); + if (prog_id < node->info_linear->info.id) { + p = &(*p)->rb_left; + } else if (prog_id > node->info_linear->info.id) { + p = &(*p)->rb_right; + } else { + pr_debug("duplicated bpf prog info %u\n", prog_id); + goto out; + } + } + + rb_link_node(&info_node->rb_node, parent, p); + rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); + env->bpf_progs.infos_cnt++; +out: + up_write(&env->bpf_progs.lock); +} + +struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, + __u32 prog_id) +{ + struct bpf_prog_info_node *node = NULL; + struct rb_node *n; + + down_read(&env->bpf_progs.lock); + n = env->bpf_progs.infos.rb_node; + + while (n) { + node = rb_entry(n, struct bpf_prog_info_node, rb_node); + if (prog_id < node->info_linear->info.id) + n = n->rb_left; + else if (prog_id > node->info_linear->info.id) + n = n->rb_right; + else + goto out; + } + node = NULL; + +out: + up_read(&env->bpf_progs.lock); + return node; +} + +void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +{ + struct rb_node *parent = NULL; + __u32 btf_id = btf_node->id; + struct btf_node *node; + struct rb_node **p; + + down_write(&env->bpf_progs.lock); + p = &env->bpf_progs.btfs.rb_node; + + while (*p != NULL) { + parent = *p; + node = rb_entry(parent, struct btf_node, rb_node); + if (btf_id < node->id) { + p = &(*p)->rb_left; + } else if (btf_id > node->id) { + p = &(*p)->rb_right; + } else { + pr_debug("duplicated btf %u\n", btf_id); + goto out; + } + } + + rb_link_node(&btf_node->rb_node, parent, p); + rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs); + env->bpf_progs.btfs_cnt++; +out: + up_write(&env->bpf_progs.lock); +} + +struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) +{ + struct btf_node *node = NULL; + struct rb_node *n; + + down_read(&env->bpf_progs.lock); + n = env->bpf_progs.btfs.rb_node; + + while (n) { + node = rb_entry(n, struct btf_node, rb_node); + if (btf_id < node->id) + n = n->rb_left; + else if (btf_id > node->id) + n = n->rb_right; + else + goto out; + } + node = NULL; + + up_read(&env->bpf_progs.lock); +out: + return node; +} + +/* purge data in bpf_progs.infos tree */ +static void perf_env__purge_bpf(struct perf_env *env) +{ + struct rb_root *root; + struct rb_node *next; + + down_write(&env->bpf_progs.lock); + + root = &env->bpf_progs.infos; + next = rb_first(root); + + while (next) { + struct bpf_prog_info_node *node; + + node = rb_entry(next, struct bpf_prog_info_node, rb_node); + next = rb_next(&node->rb_node); + rb_erase(&node->rb_node, root); + free(node); + } + + env->bpf_progs.infos_cnt = 0; + + root = &env->bpf_progs.btfs; + next = rb_first(root); + + while (next) { + struct btf_node *node; + + node = rb_entry(next, struct btf_node, rb_node); + next = rb_next(&node->rb_node); + rb_erase(&node->rb_node, root); + free(node); + } + + env->bpf_progs.btfs_cnt = 0; + + up_write(&env->bpf_progs.lock); +} + void perf_env__exit(struct perf_env *env) { int i; + perf_env__purge_bpf(env); zfree(&env->hostname); zfree(&env->os_release); zfree(&env->version); @@ -38,6 +190,13 @@ void perf_env__exit(struct perf_env *env) zfree(&env->memory_nodes); } +void perf_env__init(struct perf_env *env) +{ + env->bpf_progs.infos = RB_ROOT; + env->bpf_progs.btfs = RB_ROOT; + init_rwsem(&env->bpf_progs.lock); +} + int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) { int i; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index d01b8355f4ca..4f8e2b485c01 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -3,7 +3,9 @@ #define __PERF_ENV_H #include <linux/types.h> +#include <linux/rbtree.h> #include "cpumap.h" +#include "rwsem.h" struct cpu_topology_map { int socket_id; @@ -64,8 +66,23 @@ struct perf_env { struct memory_node *memory_nodes; unsigned long long memory_bsize; u64 clockid_res_ns; + + /* + * bpf_info_lock protects bpf rbtrees. This is needed because the + * trees are accessed by different threads in perf-top + */ + struct { + struct rw_semaphore lock; + struct rb_root infos; + u32 infos_cnt; + struct rb_root btfs; + u32 btfs_cnt; + } bpf_progs; }; +struct bpf_prog_info_node; +struct btf_node; + extern struct perf_env perf_env; void perf_env__exit(struct perf_env *env); @@ -80,4 +97,11 @@ const char *perf_env__arch(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); +void perf_env__init(struct perf_env *env); +void perf_env__insert_bpf_prog_info(struct perf_env *env, + struct bpf_prog_info_node *info_node); +struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, + __u32 prog_id); +void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ed20f4379956..51ead577533f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -19,6 +19,7 @@ #include "debug.h" #include "units.h" #include "asm/bug.h" +#include "bpf-event.h" #include <signal.h> #include <unistd.h> @@ -230,35 +231,6 @@ void perf_evlist__set_leader(struct perf_evlist *evlist) } } -void perf_event_attr__set_max_precise_ip(struct perf_event_attr *pattr) -{ - struct perf_event_attr attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, - .exclude_kernel = 1, - .precise_ip = 3, - }; - - event_attr_init(&attr); - - /* - * Unnamed union member, not supported as struct member named - * initializer in older compilers such as gcc 4.4.7 - */ - attr.sample_period = 1; - - while (attr.precise_ip != 0) { - int fd = sys_perf_event_open(&attr, 0, -1, -1, 0); - if (fd != -1) { - close(fd); - break; - } - --attr.precise_ip; - } - - pattr->precise_ip = attr.precise_ip; -} - int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise) { struct perf_evsel *evsel = perf_evsel__new_cycles(precise); @@ -1856,3 +1828,125 @@ struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, } return leader; } + +int perf_evlist__add_sb_event(struct perf_evlist **evlist, + struct perf_event_attr *attr, + perf_evsel__sb_cb_t cb, + void *data) +{ + struct perf_evsel *evsel; + bool new_evlist = (*evlist) == NULL; + + if (*evlist == NULL) + *evlist = perf_evlist__new(); + if (*evlist == NULL) + return -1; + + if (!attr->sample_id_all) { + pr_warning("enabling sample_id_all for all side band events\n"); + attr->sample_id_all = 1; + } + + evsel = perf_evsel__new_idx(attr, (*evlist)->nr_entries); + if (!evsel) + goto out_err; + + evsel->side_band.cb = cb; + evsel->side_band.data = data; + perf_evlist__add(*evlist, evsel); + return 0; + +out_err: + if (new_evlist) { + perf_evlist__delete(*evlist); + *evlist = NULL; + } + return -1; +} + +static void *perf_evlist__poll_thread(void *arg) +{ + struct perf_evlist *evlist = arg; + bool draining = false; + int i, done = 0; + + while (!done) { + bool got_data = false; + + if (evlist->thread.done) + draining = true; + + if (!draining) + perf_evlist__poll(evlist, 1000); + + for (i = 0; i < evlist->nr_mmaps; i++) { + struct perf_mmap *map = &evlist->mmap[i]; + union perf_event *event; + + if (perf_mmap__read_init(map)) + continue; + while ((event = perf_mmap__read_event(map)) != NULL) { + struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (evsel && evsel->side_band.cb) + evsel->side_band.cb(event, evsel->side_band.data); + else + pr_warning("cannot locate proper evsel for the side band event\n"); + + perf_mmap__consume(map); + got_data = true; + } + perf_mmap__read_done(map); + } + + if (draining && !got_data) + break; + } + return NULL; +} + +int perf_evlist__start_sb_thread(struct perf_evlist *evlist, + struct target *target) +{ + struct perf_evsel *counter; + + if (!evlist) + return 0; + + if (perf_evlist__create_maps(evlist, target)) + goto out_delete_evlist; + + evlist__for_each_entry(evlist, counter) { + if (perf_evsel__open(counter, evlist->cpus, + evlist->threads) < 0) + goto out_delete_evlist; + } + + if (perf_evlist__mmap(evlist, UINT_MAX)) + goto out_delete_evlist; + + evlist__for_each_entry(evlist, counter) { + if (perf_evsel__enable(counter)) + goto out_delete_evlist; + } + + evlist->thread.done = 0; + if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) + goto out_delete_evlist; + + return 0; + +out_delete_evlist: + perf_evlist__delete(evlist); + evlist = NULL; + return -1; +} + +void perf_evlist__stop_sb_thread(struct perf_evlist *evlist) +{ + if (!evlist) + return; + evlist->thread.done = 1; + pthread_join(evlist->thread.th, NULL); + perf_evlist__delete(evlist); +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 744906dd4887..6a94785b9100 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -54,6 +54,10 @@ struct perf_evlist { struct perf_sample *sample); u64 first_sample_time; u64 last_sample_time; + struct { + pthread_t th; + volatile int done; + } thread; }; struct perf_evsel_str_handler { @@ -87,6 +91,14 @@ int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, int perf_evlist__add_dummy(struct perf_evlist *evlist); +int perf_evlist__add_sb_event(struct perf_evlist **evlist, + struct perf_event_attr *attr, + perf_evsel__sb_cb_t cb, + void *data); +int perf_evlist__start_sb_thread(struct perf_evlist *evlist, + struct target *target); +void perf_evlist__stop_sb_thread(struct perf_evlist *evlist); + int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); @@ -303,8 +315,6 @@ void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); -void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); - struct perf_evsel * perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3bbf73e979c0..966360844fff 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -295,7 +295,6 @@ struct perf_evsel *perf_evsel__new_cycles(bool precise) if (!precise) goto new_event; - perf_event_attr__set_max_precise_ip(&attr); /* * Now let the usual logic to set up the perf_event_attr defaults * to kick in when we return and before perf_evsel__open() is called. @@ -305,6 +304,8 @@ new_event: if (evsel == NULL) goto out; + evsel->precise_max = true; + /* use asprintf() because free(evsel) assumes name is allocated */ if (asprintf(&evsel->name, "cycles%s%s%.*s", (attr.precise_ip || attr.exclude_kernel) ? ":" : "", @@ -1036,7 +1037,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; attr->ksymbol = track && !perf_missing_features.ksymbol; - attr->bpf_event = track && opts->bpf_event && + attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf_event; if (opts->record_namespaces) @@ -1083,7 +1084,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, } if (evsel->precise_max) - perf_event_attr__set_max_precise_ip(attr); + attr->precise_ip = 3; if (opts->all_user) { attr->exclude_kernel = 1; @@ -1292,6 +1293,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); assert(evsel->evlist == NULL); + perf_evsel__free_counts(evsel); perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); perf_evsel__free_config_terms(evsel); @@ -1342,10 +1344,9 @@ void perf_counts_values__scale(struct perf_counts_values *count, count->val = 0; } else if (count->run < count->ena) { scaled = 1; - count->val = (u64)((double) count->val * count->ena / count->run + 0.5); + count->val = (u64)((double) count->val * count->ena / count->run); } - } else - count->ena = count->run = 0; + } if (pscaled) *pscaled = scaled; @@ -1749,6 +1750,59 @@ static bool ignore_missing_thread(struct perf_evsel *evsel, return true; } +static void display_attr(struct perf_event_attr *attr) +{ + if (verbose >= 2) { + fprintf(stderr, "%.60s\n", graph_dotted_line); + fprintf(stderr, "perf_event_attr:\n"); + perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL); + fprintf(stderr, "%.60s\n", graph_dotted_line); + } +} + +static int perf_event_open(struct perf_evsel *evsel, + pid_t pid, int cpu, int group_fd, + unsigned long flags) +{ + int precise_ip = evsel->attr.precise_ip; + int fd; + + while (1) { + pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", + pid, cpu, group_fd, flags); + + fd = sys_perf_event_open(&evsel->attr, pid, cpu, group_fd, flags); + if (fd >= 0) + break; + + /* + * Do quick precise_ip fallback if: + * - there is precise_ip set in perf_event_attr + * - maximum precise is requested + * - sys_perf_event_open failed with ENOTSUP error, + * which is associated with wrong precise_ip + */ + if (!precise_ip || !evsel->precise_max || (errno != ENOTSUP)) + break; + + /* + * We tried all the precise_ip values, and it's + * still failing, so leave it to standard fallback. + */ + if (!evsel->attr.precise_ip) { + evsel->attr.precise_ip = precise_ip; + break; + } + + pr_debug2("\nsys_perf_event_open failed, error %d\n", -ENOTSUP); + evsel->attr.precise_ip--; + pr_debug2("decreasing precise_ip by one (%d)\n", evsel->attr.precise_ip); + display_attr(&evsel->attr); + } + + return fd; +} + int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { @@ -1824,12 +1878,7 @@ retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; - if (verbose >= 2) { - fprintf(stderr, "%.60s\n", graph_dotted_line); - fprintf(stderr, "perf_event_attr:\n"); - perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL); - fprintf(stderr, "%.60s\n", graph_dotted_line); - } + display_attr(&evsel->attr); for (cpu = 0; cpu < cpus->nr; cpu++) { @@ -1841,13 +1890,10 @@ retry_sample_id: group_fd = get_group_fd(evsel, cpu, thread); retry_open: - pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, cpus->map[cpu], group_fd, flags); - test_attr__ready(); - fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], - group_fd, flags); + fd = perf_event_open(evsel, pid, cpus->map[cpu], + group_fd, flags); FD(evsel, cpu, thread) = fd; @@ -2322,7 +2368,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (data->user_regs.abi) { u64 mask = evsel->attr.sample_regs_user; - sz = hweight_long(mask) * sizeof(u64); + sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); data->user_regs.mask = mask; data->user_regs.regs = (u64 *)array; @@ -2378,7 +2424,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) { u64 mask = evsel->attr.sample_regs_intr; - sz = hweight_long(mask) * sizeof(u64); + sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); data->intr_regs.mask = mask; data->intr_regs.regs = (u64 *)array; @@ -2506,7 +2552,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_REGS_USER) { if (sample->user_regs.abi) { result += sizeof(u64); - sz = hweight_long(sample->user_regs.mask) * sizeof(u64); + sz = hweight64(sample->user_regs.mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -2534,7 +2580,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_REGS_INTR) { if (sample->intr_regs.abi) { result += sizeof(u64); - sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -2664,7 +2710,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_REGS_USER) { if (sample->user_regs.abi) { *array++ = sample->user_regs.abi; - sz = hweight_long(sample->user_regs.mask) * sizeof(u64); + sz = hweight64(sample->user_regs.mask) * sizeof(u64); memcpy(array, sample->user_regs.regs, sz); array = (void *)array + sz; } else { @@ -2700,7 +2746,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_REGS_INTR) { if (sample->intr_regs.abi) { *array++ = sample->intr_regs.abi; - sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); memcpy(array, sample->intr_regs.regs, sz); array = (void *)array + sz; } else { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index cc578e02e08f..0f2c6c93d721 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -73,6 +73,8 @@ struct perf_evsel_config_term { struct perf_stat_evsel; +typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data); + /** struct perf_evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -151,6 +153,10 @@ struct perf_evsel { bool collect_stat; bool weak_group; const char *pmu_name; + struct { + perf_evsel__sb_cb_t *cb; + void *data; + } side_band; }; union u64_swap { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 01b324c275b9..2d2af2ac2b1e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -18,6 +18,7 @@ #include <sys/utsname.h> #include <linux/time64.h> #include <dirent.h> +#include <bpf/libbpf.h> #include "evlist.h" #include "evsel.h" @@ -40,6 +41,7 @@ #include "time-utils.h" #include "units.h" #include "cputopo.h" +#include "bpf-event.h" #include "sane_ctype.h" @@ -861,6 +863,104 @@ static int write_clockid(struct feat_fd *ff, sizeof(ff->ph->env.clockid_res_ns)); } +static int write_dir_format(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + struct perf_session *session; + struct perf_data *data; + + session = container_of(ff->ph, struct perf_session, header); + data = session->data; + + if (WARN_ON(!perf_data__is_dir(data))) + return -1; + + return do_write(ff, &data->dir.version, sizeof(data->dir.version)); +} + +#ifdef HAVE_LIBBPF_SUPPORT +static int write_bpf_prog_info(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + struct perf_env *env = &ff->ph->env; + struct rb_root *root; + struct rb_node *next; + int ret; + + down_read(&env->bpf_progs.lock); + + ret = do_write(ff, &env->bpf_progs.infos_cnt, + sizeof(env->bpf_progs.infos_cnt)); + if (ret < 0) + goto out; + + root = &env->bpf_progs.infos; + next = rb_first(root); + while (next) { + struct bpf_prog_info_node *node; + size_t len; + + node = rb_entry(next, struct bpf_prog_info_node, rb_node); + next = rb_next(&node->rb_node); + len = sizeof(struct bpf_prog_info_linear) + + node->info_linear->data_len; + + /* before writing to file, translate address to offset */ + bpf_program__bpil_addr_to_offs(node->info_linear); + ret = do_write(ff, node->info_linear, len); + /* + * translate back to address even when do_write() fails, + * so that this function never changes the data. + */ + bpf_program__bpil_offs_to_addr(node->info_linear); + if (ret < 0) + goto out; + } +out: + up_read(&env->bpf_progs.lock); + return ret; +} +#else // HAVE_LIBBPF_SUPPORT +static int write_bpf_prog_info(struct feat_fd *ff __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + return 0; +} +#endif // HAVE_LIBBPF_SUPPORT + +static int write_bpf_btf(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + struct perf_env *env = &ff->ph->env; + struct rb_root *root; + struct rb_node *next; + int ret; + + down_read(&env->bpf_progs.lock); + + ret = do_write(ff, &env->bpf_progs.btfs_cnt, + sizeof(env->bpf_progs.btfs_cnt)); + + if (ret < 0) + goto out; + + root = &env->bpf_progs.btfs; + next = rb_first(root); + while (next) { + struct btf_node *node; + + node = rb_entry(next, struct btf_node, rb_node); + next = rb_next(&node->rb_node); + ret = do_write(ff, &node->id, + sizeof(u32) * 2 + node->data_size); + if (ret < 0) + goto out; + } +out: + up_read(&env->bpf_progs.lock); + return ret; +} + static int cpu_cache_level__sort(const void *a, const void *b) { struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; @@ -1341,6 +1441,63 @@ static void print_clockid(struct feat_fd *ff, FILE *fp) ff->ph->env.clockid_res_ns * 1000); } +static void print_dir_format(struct feat_fd *ff, FILE *fp) +{ + struct perf_session *session; + struct perf_data *data; + + session = container_of(ff->ph, struct perf_session, header); + data = session->data; + + fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version); +} + +static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) +{ + struct perf_env *env = &ff->ph->env; + struct rb_root *root; + struct rb_node *next; + + down_read(&env->bpf_progs.lock); + + root = &env->bpf_progs.infos; + next = rb_first(root); + + while (next) { + struct bpf_prog_info_node *node; + + node = rb_entry(next, struct bpf_prog_info_node, rb_node); + next = rb_next(&node->rb_node); + + bpf_event__print_bpf_prog_info(&node->info_linear->info, + env, fp); + } + + up_read(&env->bpf_progs.lock); +} + +static void print_bpf_btf(struct feat_fd *ff, FILE *fp) +{ + struct perf_env *env = &ff->ph->env; + struct rb_root *root; + struct rb_node *next; + + down_read(&env->bpf_progs.lock); + + root = &env->bpf_progs.btfs; + next = rb_first(root); + + while (next) { + struct btf_node *node; + + node = rb_entry(next, struct btf_node, rb_node); + next = rb_next(&node->rb_node); + fprintf(fp, "# btf info of id %u\n", node->id); + } + + up_read(&env->bpf_progs.lock); +} + static void free_event_desc(struct perf_evsel *events) { struct perf_evsel *evsel; @@ -2373,6 +2530,143 @@ static int process_clockid(struct feat_fd *ff, return 0; } +static int process_dir_format(struct feat_fd *ff, + void *_data __maybe_unused) +{ + struct perf_session *session; + struct perf_data *data; + + session = container_of(ff->ph, struct perf_session, header); + data = session->data; + + if (WARN_ON(!perf_data__is_dir(data))) + return -1; + + return do_read_u64(ff, &data->dir.version); +} + +#ifdef HAVE_LIBBPF_SUPPORT +static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info_node *info_node; + struct perf_env *env = &ff->ph->env; + u32 count, i; + int err = -1; + + if (ff->ph->needs_swap) { + pr_warning("interpreting bpf_prog_info from systems with endianity is not yet supported\n"); + return 0; + } + + if (do_read_u32(ff, &count)) + return -1; + + down_write(&env->bpf_progs.lock); + + for (i = 0; i < count; ++i) { + u32 info_len, data_len; + + info_linear = NULL; + info_node = NULL; + if (do_read_u32(ff, &info_len)) + goto out; + if (do_read_u32(ff, &data_len)) + goto out; + + if (info_len > sizeof(struct bpf_prog_info)) { + pr_warning("detected invalid bpf_prog_info\n"); + goto out; + } + + info_linear = malloc(sizeof(struct bpf_prog_info_linear) + + data_len); + if (!info_linear) + goto out; + info_linear->info_len = sizeof(struct bpf_prog_info); + info_linear->data_len = data_len; + if (do_read_u64(ff, (u64 *)(&info_linear->arrays))) + goto out; + if (__do_read(ff, &info_linear->info, info_len)) + goto out; + if (info_len < sizeof(struct bpf_prog_info)) + memset(((void *)(&info_linear->info)) + info_len, 0, + sizeof(struct bpf_prog_info) - info_len); + + if (__do_read(ff, info_linear->data, data_len)) + goto out; + + info_node = malloc(sizeof(struct bpf_prog_info_node)); + if (!info_node) + goto out; + + /* after reading from file, translate offset to address */ + bpf_program__bpil_offs_to_addr(info_linear); + info_node->info_linear = info_linear; + perf_env__insert_bpf_prog_info(env, info_node); + } + + up_write(&env->bpf_progs.lock); + return 0; +out: + free(info_linear); + free(info_node); + up_write(&env->bpf_progs.lock); + return err; +} +#else // HAVE_LIBBPF_SUPPORT +static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data __maybe_unused) +{ + return 0; +} +#endif // HAVE_LIBBPF_SUPPORT + +static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) +{ + struct perf_env *env = &ff->ph->env; + struct btf_node *node = NULL; + u32 count, i; + int err = -1; + + if (ff->ph->needs_swap) { + pr_warning("interpreting btf from systems with endianity is not yet supported\n"); + return 0; + } + + if (do_read_u32(ff, &count)) + return -1; + + down_write(&env->bpf_progs.lock); + + for (i = 0; i < count; ++i) { + u32 id, data_size; + + if (do_read_u32(ff, &id)) + goto out; + if (do_read_u32(ff, &data_size)) + goto out; + + node = malloc(sizeof(struct btf_node) + data_size); + if (!node) + goto out; + + node->id = id; + node->data_size = data_size; + + if (__do_read(ff, node->data, data_size)) + goto out; + + perf_env__insert_btf(env, node); + node = NULL; + } + + err = 0; +out: + up_write(&env->bpf_progs.lock); + free(node); + return err; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2432,7 +2726,10 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(CACHE, cache, true), FEAT_OPR(SAMPLE_TIME, sample_time, false), FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), - FEAT_OPR(CLOCKID, clockid, false) + FEAT_OPR(CLOCKID, clockid, false), + FEAT_OPN(DIR_FORMAT, dir_format, false), + FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), + FEAT_OPR(BPF_BTF, bpf_btf, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 0d553ddca0a3..386da49e1bfa 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -39,6 +39,9 @@ enum { HEADER_SAMPLE_TIME, HEADER_MEM_TOPOLOGY, HEADER_CLOCKID, + HEADER_DIR_FORMAT, + HEADER_BPF_PROG_INFO, + HEADER_BPF_BTF, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -48,6 +51,10 @@ enum perf_header_version { PERF_HEADER_VERSION_2, }; +enum perf_dir_version { + PERF_DIR_VERSION = 1, +}; + struct perf_file_section { u64 offset; u64 size; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f9eb95bf3938..7ace7a10054d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -19,6 +19,7 @@ #include <math.h> #include <inttypes.h> #include <sys/param.h> +#include <linux/time64.h> static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he); @@ -192,6 +193,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3); hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + hists__new_col_len(hists, HISTC_TIME, 12); if (h->srcline) { len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header)); @@ -246,6 +248,14 @@ static void he_stat__add_cpumode_period(struct he_stat *he_stat, } } +static long hist_time(unsigned long htime) +{ + unsigned long time_quantum = symbol_conf.time_quantum; + if (time_quantum) + return (htime / time_quantum) * time_quantum; + return htime; +} + static void he_stat__add_period(struct he_stat *he_stat, u64 period, u64 weight) { @@ -426,6 +436,13 @@ static int hist_entry__init(struct hist_entry *he, goto err_rawdata; } + if (symbol_conf.res_sample) { + he->res_samples = calloc(sizeof(struct res_sample), + symbol_conf.res_sample); + if (!he->res_samples) + goto err_srcline; + } + INIT_LIST_HEAD(&he->pairs.node); thread__get(he->thread); he->hroot_in = RB_ROOT_CACHED; @@ -436,6 +453,9 @@ static int hist_entry__init(struct hist_entry *he, return 0; +err_srcline: + free(he->srcline); + err_rawdata: free(he->raw_data); @@ -593,6 +613,32 @@ out: return he; } +static unsigned random_max(unsigned high) +{ + unsigned thresh = -high % high; + for (;;) { + unsigned r = random(); + if (r >= thresh) + return r % high; + } +} + +static void hists__res_sample(struct hist_entry *he, struct perf_sample *sample) +{ + struct res_sample *r; + int j; + + if (he->num_res < symbol_conf.res_sample) { + j = he->num_res++; + } else { + j = random_max(symbol_conf.res_sample); + } + r = &he->res_samples[j]; + r->time = sample->time; + r->cpu = sample->cpu; + r->tid = sample->tid; +} + static struct hist_entry* __hists__add_entry(struct hists *hists, struct addr_location *al, @@ -635,10 +681,13 @@ __hists__add_entry(struct hists *hists, .raw_data = sample->raw_data, .raw_size = sample->raw_size, .ops = ops, + .time = hist_time(sample->time), }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) hists->has_callchains = true; + if (he && symbol_conf.res_sample) + hists__res_sample(he, sample); return he; } @@ -1062,8 +1111,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); - if (err) + if (err) { + map__put(alm); return err; + } err = iter->ops->prepare_entry(iter, al); if (err) @@ -1162,6 +1213,7 @@ void hist_entry__delete(struct hist_entry *he) mem_info__zput(he->mem_info); } + zfree(&he->res_samples); zfree(&he->stat_acc); free_srcline(he->srcline); if (he->srcfile && he->srcfile[0]) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4af27fbab24f..76ff6c6d03b8 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -31,6 +31,7 @@ enum hist_filter { enum hist_column { HISTC_SYMBOL, + HISTC_TIME, HISTC_DSO, HISTC_THREAD, HISTC_COMM, @@ -432,9 +433,18 @@ struct hist_browser_timer { }; struct annotation_options; +struct res_sample; + +enum rstype { + A_NORMAL, + A_ASM, + A_SOURCE +}; #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" +void attr_to_script(char *buf, struct perf_event_attr *attr); + int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, struct hist_browser_timer *hbt, struct annotation_options *annotation_opts); @@ -449,7 +459,13 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct perf_env *env, bool warn_lost_event, struct annotation_options *annotation_options); -int script_browse(const char *script_opt); + +int script_browse(const char *script_opt, struct perf_evsel *evsel); + +void run_script(char *cmd); +int res_sample_browse(struct res_sample *res_samples, int num_res, + struct perf_evsel *evsel, enum rstype rstype); +void res_sample_init(void); #else static inline int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, @@ -478,11 +494,22 @@ static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, return 0; } -static inline int script_browse(const char *script_opt __maybe_unused) +static inline int script_browse(const char *script_opt __maybe_unused, + struct perf_evsel *evsel __maybe_unused) { return 0; } +static inline int res_sample_browse(struct res_sample *res_samples __maybe_unused, + int num_res __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + enum rstype rstype __maybe_unused) +{ + return 0; +} + +static inline void res_sample_init(void) {} + #define K_LEFT -1000 #define K_RIGHT -2000 #define K_SWITCH_INPUT_DATA -3000 diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 6e03db142091..872fab163585 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -251,19 +251,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / decoder->tsc_ctc_ratio_d; - - /* - * Allow for timestamps appearing to backwards because a TSC - * packet has slipped past a MTC packet, so allow 2 MTC ticks - * or ... - */ - decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, - decoder->tsc_ctc_ratio_n, - decoder->tsc_ctc_ratio_d); } - /* ... or 0x100 paranoia */ - if (decoder->tsc_slip < 0x100) - decoder->tsc_slip = 0x100; + + /* + * A TSC packet can slip past MTC packets so that the timestamp appears + * to go backwards. One estimate is that can be up to about 40 CPU + * cycles, which is certainly less than 0x1000 TSC ticks, but accept + * slippage an order of magnitude more to be on the safe side. + */ + decoder->tsc_slip = 0x10000; intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 61959aba7e27..3c520baa198c 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1421,6 +1421,20 @@ static void machine__set_kernel_mmap(struct machine *machine, machine->vmlinux_map->end = ~0ULL; } +static void machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct map *map = machine__kernel_map(machine); + + map__get(map); + map_groups__remove(&machine->kmaps, map); + + machine__set_kernel_mmap(machine, start, end); + + map_groups__insert(&machine->kmaps, map); + map__put(map); +} + int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); @@ -1453,17 +1467,11 @@ int machine__create_kernel_maps(struct machine *machine) goto out_put; } - /* we have a real start address now, so re-order the kmaps */ - map = machine__kernel_map(machine); - - map__get(map); - map_groups__remove(&machine->kmaps, map); - - /* assume it's the last in the kmaps */ - machine__set_kernel_mmap(machine, addr, ~0ULL); - - map_groups__insert(&machine->kmaps, map); - map__put(map); + /* + * we have a real start address now, so re-order the kmaps + * assume it's the last in the kmaps + */ + machine__update_kernel_mmap(machine, addr, ~0ULL); } if (machine__create_extra_kernel_maps(machine, kernel)) @@ -1599,7 +1607,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); - machine__set_kernel_mmap(machine, event->mmap.start, + machine__update_kernel_mmap(machine, event->mmap.start, event->mmap.start + event->mmap.len); /* diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index fbeb0c6efaa6..ee71efb9db62 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -261,6 +261,22 @@ bool __map__is_extra_kernel_map(const struct map *map) return kmap && kmap->name[0]; } +bool __map__is_bpf_prog(const struct map *map) +{ + const char *name; + + if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) + return true; + + /* + * If PERF_RECORD_BPF_EVENT is not included, the dso will not have + * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can + * guess the type based on name. + */ + name = map->dso->short_name; + return name && (strstr(name, "bpf_prog_") == name); +} + bool map__has_symbols(const struct map *map) { return dso__has_symbols(map->dso); @@ -577,10 +593,25 @@ static void __maps__purge(struct maps *maps) } } +static void __maps__purge_names(struct maps *maps) +{ + struct rb_root *root = &maps->names; + struct rb_node *next = rb_first(root); + + while (next) { + struct map *pos = rb_entry(next, struct map, rb_node_name); + + next = rb_next(&pos->rb_node_name); + rb_erase_init(&pos->rb_node_name, root); + map__put(pos); + } +} + static void maps__exit(struct maps *maps) { down_write(&maps->lock); __maps__purge(maps); + __maps__purge_names(maps); up_write(&maps->lock); } @@ -895,10 +926,8 @@ static void __maps__insert_name(struct maps *maps, struct map *map) rc = strcmp(m->dso->short_name, map->dso->short_name); if (rc < 0) p = &(*p)->rb_left; - else if (rc > 0) - p = &(*p)->rb_right; else - return; + p = &(*p)->rb_right; } rb_link_node(&map->rb_node_name, parent, p); rb_insert_color(&map->rb_node_name, &maps->names); @@ -917,6 +946,9 @@ static void __maps__remove(struct maps *maps, struct map *map) { rb_erase_init(&map->rb_node, &maps->entries); map__put(map); + + rb_erase_init(&map->rb_node_name, &maps->names); + map__put(map); } void maps__remove(struct maps *maps, struct map *map) diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 0e20749f2c55..dc93787c74f0 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -159,10 +159,12 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, bool __map__is_kernel(const struct map *map); bool __map__is_extra_kernel_map(const struct map *map); +bool __map__is_bpf_prog(const struct map *map); static inline bool __map__is_kmodule(const struct map *map) { - return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map); + return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) && + !__map__is_bpf_prog(map); } bool map__has_symbols(const struct map *map); diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index ea523d3b248f..989fed6f43b5 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -270,6 +270,8 @@ static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how, "FINAL", "ROUND", "HALF ", + "TOP ", + "TIME ", }; int err; bool show_progress = false; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4dcc01b2532c..5ef4939408f2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2271,6 +2271,7 @@ static bool is_event_supported(u8 type, unsigned config) perf_evsel__delete(evsel); } + thread_map__put(tmap); return ret; } @@ -2341,6 +2342,7 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, printf(" %-50s [%s]\n", buf, "SDT event"); free(buf); } + free(path); } else printf(" %-50s [%s]\n", nd->s, "SDT event"); if (nd2) { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6199a3174ab9..e0429f4ef335 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -732,10 +732,20 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) if (!is_arm_pmu_core(name)) { pname = pe->pmu ? pe->pmu : "cpu"; + + /* + * uncore alias may be from different PMU + * with common prefix + */ + if (pmu_is_uncore(name) && + !strncmp(pname, name, strlen(pname))) + goto new_alias; + if (strcmp(pname, name)) continue; } +new_alias: /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, (char *)pe->desc, (char *)pe->event, diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a1b8d9649ca7..198e09ff611e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -160,8 +160,10 @@ static struct map *kernel_get_module_map(const char *module) if (module && strchr(module, '/')) return dso__new_map(module); - if (!module) - module = "kernel"; + if (!module) { + pos = machine__kernel_map(host_machine); + return map__get(pos); + } for (pos = maps__first(maps); pos; pos = map__next(pos)) { /* short_name is "[module]" */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index db643f3c2b95..b17f1c9bc965 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -132,6 +132,7 @@ struct perf_session *perf_session__new(struct perf_data *data, ordered_events__init(&session->ordered_events, ordered_events__deliver_event, NULL); + perf_env__init(&session->header.env); if (data) { if (perf_data__open(data)) goto out_delete; @@ -152,6 +153,10 @@ struct perf_session *perf_session__new(struct perf_data *data, } perf_evlist__init_trace_event_sample_raw(session->evlist); + + /* Open the directory data. */ + if (data->is_dir && perf_data__open_dir(data)) + goto out_delete; } } else { session->machines.host.env = &perf_env; @@ -1843,10 +1848,17 @@ fetch_mmaped_event(struct perf_session *session, #define NUM_MMAPS 128 #endif +struct reader; + +typedef s64 (*reader_cb_t)(struct perf_session *session, + union perf_event *event, + u64 file_offset); + struct reader { - int fd; - u64 data_size; - u64 data_offset; + int fd; + u64 data_size; + u64 data_offset; + reader_cb_t process; }; static int @@ -1917,7 +1929,7 @@ more: size = event->header.size; if (size < sizeof(struct perf_event_header) || - (skip = perf_session__process_event(session, event, file_pos)) < 0) { + (skip = rd->process(session, event, file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", file_offset + head, event->header.size, event->header.type); @@ -1943,12 +1955,20 @@ out: return err; } +static s64 process_simple(struct perf_session *session, + union perf_event *event, + u64 file_offset) +{ + return perf_session__process_event(session, event, file_offset); +} + static int __perf_session__process_events(struct perf_session *session) { struct reader rd = { .fd = perf_data__fd(session->data), .data_size = session->header.data_size, .data_offset = session->header.data_offset, + .process = process_simple, }; struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index d2299e912e59..5d2518e89fc4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -3,6 +3,7 @@ #include <inttypes.h> #include <regex.h> #include <linux/mman.h> +#include <linux/time64.h> #include "sort.h" #include "hist.h" #include "comm.h" @@ -12,9 +13,11 @@ #include "evsel.h" #include "evlist.h" #include "strlist.h" +#include "strbuf.h" #include <traceevent/event-parse.h> #include "mem-events.h" #include "annotate.h" +#include "time-utils.h" #include <linux/kernel.h> regex_t parent_regex; @@ -654,6 +657,42 @@ struct sort_entry sort_socket = { .se_width_idx = HISTC_SOCKET, }; +/* --sort time */ + +static int64_t +sort__time_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->time - left->time; +} + +static int hist_entry__time_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + unsigned long secs; + unsigned long long nsecs; + char he_time[32]; + + nsecs = he->time; + secs = nsecs / NSEC_PER_SEC; + nsecs -= secs * NSEC_PER_SEC; + + if (symbol_conf.nanosecs) + snprintf(he_time, sizeof he_time, "%5lu.%09llu: ", + secs, nsecs); + else + timestamp__scnprintf_usec(he->time, he_time, + sizeof(he_time)); + + return repsep_snprintf(bf, size, "%-.*s", width, he_time); +} + +struct sort_entry sort_time = { + .se_header = "Time", + .se_cmp = sort__time_cmp, + .se_snprintf = hist_entry__time_snprintf, + .se_width_idx = HISTC_TIME, +}; + /* --sort trace */ static char *get_trace_output(struct hist_entry *he) @@ -1634,6 +1673,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size), DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null), + DIM(SORT_TIME, "time", sort_time), }; #undef DIM @@ -3068,3 +3108,54 @@ void reset_output_field(void) reset_dimensions(); perf_hpp__reset_output_field(&perf_hpp_list); } + +#define INDENT (3*8 + 1) + +static void add_key(struct strbuf *sb, const char *str, int *llen) +{ + if (*llen >= 75) { + strbuf_addstr(sb, "\n\t\t\t "); + *llen = INDENT; + } + strbuf_addf(sb, " %s", str); + *llen += strlen(str) + 1; +} + +static void add_sort_string(struct strbuf *sb, struct sort_dimension *s, int n, + int *llen) +{ + int i; + + for (i = 0; i < n; i++) + add_key(sb, s[i].name, llen); +} + +static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int n, + int *llen) +{ + int i; + + for (i = 0; i < n; i++) + add_key(sb, s[i].name, llen); +} + +const char *sort_help(const char *prefix) +{ + struct strbuf sb; + char *s; + int len = strlen(prefix) + INDENT; + + strbuf_init(&sb, 300); + strbuf_addstr(&sb, prefix); + add_hpp_sort_string(&sb, hpp_sort_dimensions, + ARRAY_SIZE(hpp_sort_dimensions), &len); + add_sort_string(&sb, common_sort_dimensions, + ARRAY_SIZE(common_sort_dimensions), &len); + add_sort_string(&sb, bstack_sort_dimensions, + ARRAY_SIZE(bstack_sort_dimensions), &len); + add_sort_string(&sb, memory_sort_dimensions, + ARRAY_SIZE(memory_sort_dimensions), &len); + s = strbuf_detach(&sb, NULL); + strbuf_release(&sb); + return s; +} diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 2fbee0b1011c..ce376a73f964 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -47,6 +47,12 @@ extern struct sort_entry sort_srcline; extern enum sort_type sort__first_dimension; extern const char default_mem_sort_order[]; +struct res_sample { + u64 time; + int cpu; + int tid; +}; + struct he_stat { u64 period; u64 period_sys; @@ -135,10 +141,13 @@ struct hist_entry { char *srcfile; struct symbol *parent; struct branch_info *branch_info; + long time; struct hists *hists; struct mem_info *mem_info; void *raw_data; u32 raw_size; + int num_res; + struct res_sample *res_samples; void *trace_output; struct perf_hpp_list *hpp_list; struct hist_entry *parent_he; @@ -231,6 +240,7 @@ enum sort_type { SORT_DSO_SIZE, SORT_CGROUP_ID, SORT_SYM_IPC_NULL, + SORT_TIME, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -286,6 +296,8 @@ void reset_output_field(void); void sort__setup_elide(FILE *fp); void perf_hpp__set_elide(int idx, bool elide); +const char *sort_help(const char *prefix); + int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); bool is_strict_order(const char *order); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 4d40515307b8..2856cc9d5a31 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -291,10 +291,8 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel break; case AGGR_GLOBAL: aggr->val += count->val; - if (config->scale) { - aggr->ena += count->ena; - aggr->run += count->run; - } + aggr->ena += count->ena; + aggr->run += count->run; case AGGR_UNSET: default: break; @@ -442,10 +440,8 @@ int create_perf_stat_counter(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; struct perf_evsel *leader = evsel->leader; - if (config->scale) { - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - } + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; /* * The event is part of non trivial group, let's enable diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 758bf5f74e6e..5cbad55cd99d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -6,6 +6,7 @@ #include <string.h> #include <linux/kernel.h> #include <linux/mman.h> +#include <linux/time64.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/param.h> @@ -39,15 +40,18 @@ int vmlinux_path__nr_entries; char **vmlinux_path; struct symbol_conf symbol_conf = { + .nanosecs = false, .use_modules = true, .try_vmlinux_path = true, .demangle = true, .demangle_kernel = false, .cumulate_callchain = true, + .time_quantum = 100 * NSEC_PER_MSEC, /* 100ms */ .show_hist_headers = true, .symfs = "", .event_group = true, .inline_name = true, + .res_sample = 0, }; static enum dso_binary_type binary_type_symtab[] = { @@ -1451,6 +1455,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: return true; + case DSO_BINARY_TYPE__BPF_PROG_INFO: case DSO_BINARY_TYPE__NOT_FOUND: default: return false; diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index fffea68c1203..6c55fa6fccec 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -8,6 +8,7 @@ struct strlist; struct intlist; struct symbol_conf { + bool nanosecs; unsigned short priv_size; bool try_vmlinux_path, init_annotation, @@ -55,6 +56,7 @@ struct symbol_conf { *sym_list_str, *col_width_list_str, *bt_stop_list_str; + unsigned long time_quantum; struct strlist *dso_list, *comm_list, *sym_list, @@ -66,6 +68,7 @@ struct symbol_conf { struct intlist *pid_list, *tid_list; const char *symfs; + int res_sample; }; extern struct symbol_conf symbol_conf; diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 0f53baec660e..20663a460df3 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -453,6 +453,14 @@ int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec); } +int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz) +{ + u64 sec = timestamp / NSEC_PER_SEC, + nsec = timestamp % NSEC_PER_SEC; + + return scnprintf(buf, sz, "%" PRIu64 ".%09" PRIu64, sec, nsec); +} + int fetch_current_timestamp(char *buf, size_t sz) { struct timeval tv; diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index b923de44e36f..72a42ea1d513 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -30,6 +30,7 @@ int perf_time__parse_for_ranges(const char *str, struct perf_session *session, int *range_size, int *range_num); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); +int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz); int fetch_current_timestamp(char *buf, size_t sz); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9327c0ddc3a5..c7727be9719f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -44,6 +44,7 @@ #include <cpuid.h> #include <linux/capability.h> #include <errno.h> +#include <math.h> char *proc_stat = "/proc/stat"; FILE *outf; @@ -63,7 +64,6 @@ unsigned int dump_only; unsigned int do_snb_cstates; unsigned int do_knl_cstates; unsigned int do_slm_cstates; -unsigned int do_cnl_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; @@ -141,9 +141,21 @@ unsigned int first_counter_read = 1; #define RAPL_CORES_ENERGY_STATUS (1 << 9) /* 0x639 MSR_PP0_ENERGY_STATUS */ +#define RAPL_PER_CORE_ENERGY (1 << 10) + /* Indicates cores energy collection is per-core, + * not per-package. */ +#define RAPL_AMD_F17H (1 << 11) + /* 0xc0010299 MSR_RAPL_PWR_UNIT */ + /* 0xc001029a MSR_CORE_ENERGY_STAT */ + /* 0xc001029b MSR_PKG_ENERGY_STAT */ #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT) #define TJMAX_DEFAULT 100 +/* MSRs that are not yet in the kernel-provided header. */ +#define MSR_RAPL_PWR_UNIT 0xc0010299 +#define MSR_CORE_ENERGY_STAT 0xc001029a +#define MSR_PKG_ENERGY_STAT 0xc001029b + #define MAX(a, b) ((a) > (b) ? (a) : (b)) /* @@ -187,6 +199,7 @@ struct core_data { unsigned long long c7; unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ unsigned int core_temp_c; + unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */ unsigned int core_id; unsigned long long counter[MAX_ADDED_COUNTERS]; } *core_even, *core_odd; @@ -273,6 +286,7 @@ struct system_summary { struct cpu_topology { int physical_package_id; + int die_id; int logical_cpu_id; int physical_node_id; int logical_node_id; /* 0-based count within the package */ @@ -283,6 +297,7 @@ struct cpu_topology { struct topo_params { int num_packages; + int num_die; int num_cpus; int num_cores; int max_cpu_num; @@ -314,9 +329,8 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int retval, pkg_no, core_no, thread_no, node_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { - for (node_no = 0; node_no < topo.nodes_per_pkg; - node_no++) { + for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t; @@ -442,6 +456,7 @@ struct msr_counter bic[] = { { 0x0, "CPU" }, { 0x0, "APIC" }, { 0x0, "X2APIC" }, + { 0x0, "Die" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -495,6 +510,7 @@ struct msr_counter bic[] = { #define BIC_CPU (1ULL << 47) #define BIC_APIC (1ULL << 48) #define BIC_X2APIC (1ULL << 49) +#define BIC_Die (1ULL << 50) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -621,6 +637,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); + if (DO_BIC(BIC_Die)) + outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -667,7 +685,7 @@ void print_header(char *delim) if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) + if (DO_BIC(BIC_CPU_c3)) outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); @@ -680,6 +698,14 @@ void print_header(char *delim) if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); + if (do_rapl && !rapl_joules) { + if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); + } else if (do_rapl && rapl_joules) { + if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); + } + for (mp = sys.cp; mp; mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 64) @@ -734,7 +760,7 @@ void print_header(char *delim) if (do_rapl && !rapl_joules) { if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); - if (DO_BIC(BIC_CorWatt)) + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); if (DO_BIC(BIC_GFXWatt)) outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); @@ -747,7 +773,7 @@ void print_header(char *delim) } else if (do_rapl && rapl_joules) { if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); - if (DO_BIC(BIC_Cor_J)) + if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); @@ -808,6 +834,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "c6: %016llX\n", c->c6); outp += sprintf(outp, "c7: %016llX\n", c->c7); outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); + outp += sprintf(outp, "Joules: %0X\n", c->core_energy); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", @@ -904,6 +931,8 @@ int format_counters(struct thread_data *t, struct core_data *c, if (t == &average.threads) { if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_Die)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -921,6 +950,12 @@ int format_counters(struct thread_data *t, struct core_data *c, else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } + if (DO_BIC(BIC_Die)) { + if (c) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } if (DO_BIC(BIC_Node)) { if (t) outp += sprintf(outp, "%s%d", @@ -1003,7 +1038,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) + if (DO_BIC(BIC_CPU_c3)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); @@ -1033,6 +1068,20 @@ int format_counters(struct thread_data *t, struct core_data *c, } } + /* + * If measurement interval exceeds minimum RAPL Joule Counter range, + * indicate that results are suspect by printing "**" in fraction place. + */ + if (interval_float < rapl_joule_counter_range) + fmt8 = "%s%.2f"; + else + fmt8 = "%6.0f**"; + + if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); + if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units); + /* print per-package data only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) goto done; @@ -1085,18 +1134,9 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_SYS_LPI)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); - /* - * If measurement interval exceeds minimum RAPL Joule Counter range, - * indicate that results are suspect by printing "**" in fraction place. - */ - if (interval_float < rapl_joule_counter_range) - fmt8 = "%s%.2f"; - else - fmt8 = "%6.0f**"; - if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); - if (DO_BIC(BIC_CorWatt)) + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); if (DO_BIC(BIC_GFXWatt)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); @@ -1104,7 +1144,7 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float); if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); - if (DO_BIC(BIC_Cor_J)) + if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units); @@ -1249,6 +1289,8 @@ delta_core(struct core_data *new, struct core_data *old) old->core_temp_c = new->core_temp_c; old->mc6_us = new->mc6_us - old->mc6_us; + DELTA_WRAP32(new->core_energy, old->core_energy); + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) old->counter[i] = new->counter[i]; @@ -1391,6 +1433,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c7 = 0; c->mc6_us = 0; c->core_temp_c = 0; + c->core_energy = 0; p->pkg_wtd_core_c0 = 0; p->pkg_any_core_c0 = 0; @@ -1473,6 +1516,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + average.cores.core_energy += c->core_energy; + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) continue; @@ -1818,7 +1863,7 @@ retry: if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) { + if (DO_BIC(BIC_CPU_c3)) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } @@ -1845,6 +1890,12 @@ retry: c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } + if (do_rapl & RAPL_AMD_F17H) { + if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr)) + return -14; + c->core_energy = msr & 0xFFFFFFFF; + } + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (get_mp(cpu, mp, &c->counter[i])) return -10; @@ -1934,6 +1985,11 @@ retry: return -16; p->rapl_dram_perf_status = msr & 0xFFFFFFFF; } + if (do_rapl & RAPL_AMD_F17H) { + if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr)) + return -13; + p->energy_pkg = msr & 0xFFFFFFFF; + } if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) return -17; @@ -2456,6 +2512,8 @@ void free_all_buffers(void) /* * Parse a file containing a single int. + * Return 0 if file can not be opened + * Exit if file can be opened, but can not be parsed */ int parse_int_file(const char *fmt, ...) { @@ -2467,7 +2525,9 @@ int parse_int_file(const char *fmt, ...) va_start(args, fmt); vsnprintf(path, sizeof(path), fmt, args); va_end(args); - filep = fopen_or_die(path, "r"); + filep = fopen(path, "r"); + if (!filep) + return 0; if (fscanf(filep, "%d", &value) != 1) err(1, "%s: failed to parse number from file", path); fclose(filep); @@ -2488,6 +2548,11 @@ int get_physical_package_id(int cpu) return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); } +int get_die_id(int cpu) +{ + return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); +} + int get_core_id(int cpu) { return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); @@ -2578,7 +2643,8 @@ int get_thread_siblings(struct cpu_topology *thiscpu) filep = fopen_or_die(path, "r"); do { offset -= BITMASK_SIZE; - fscanf(filep, "%lx%c", &map, &character); + if (fscanf(filep, "%lx%c", &map, &character) != 2) + err(1, "%s: failed to parse file", path); for (shift = 0; shift < BITMASK_SIZE; shift++) { if ((map >> shift) & 0x1) { so = shift + offset; @@ -2855,8 +2921,11 @@ int snapshot_cpu_lpi_us(void) fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); - if (retval != 1) - err(1, "CPU LPI"); + if (retval != 1) { + fprintf(stderr, "Disabling Low Power Idle CPU output\n"); + BIC_NOT_PRESENT(BIC_CPU_LPI); + return -1; + } fclose(fp); @@ -2878,9 +2947,11 @@ int snapshot_sys_lpi_us(void) fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r"); retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); - if (retval != 1) - err(1, "SYS LPI"); - + if (retval != 1) { + fprintf(stderr, "Disabling Low Power Idle System output\n"); + BIC_NOT_PRESENT(BIC_SYS_LPI); + return -1; + } fclose(fp); return 0; @@ -3410,14 +3481,14 @@ dump_sysfs_cstate_config(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(name_buf, sizeof(name_buf), input); + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) sp = strchrnul(name_buf, '\n'); *sp = '\0'; - fclose(input); sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", @@ -3425,7 +3496,8 @@ dump_sysfs_cstate_config(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(desc, sizeof(desc), input); + if (!fgets(desc, sizeof(desc), input)) + err(1, "%s: failed to read file", path); fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); fclose(input); @@ -3444,20 +3516,22 @@ dump_sysfs_pstate_config(void) base_cpu); input = fopen(path, "r"); if (input == NULL) { - fprintf(stderr, "NSFOD %s\n", path); + fprintf(outf, "NSFOD %s\n", path); return; } - fgets(driver_buf, sizeof(driver_buf), input); + if (!fgets(driver_buf, sizeof(driver_buf), input)) + err(1, "%s: failed to read file", path); fclose(input); sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); input = fopen(path, "r"); if (input == NULL) { - fprintf(stderr, "NSFOD %s\n", path); + fprintf(outf, "NSFOD %s\n", path); return; } - fgets(governor_buf, sizeof(governor_buf), input); + if (!fgets(governor_buf, sizeof(governor_buf), input)) + err(1, "%s: failed to read file", path); fclose(input); fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); @@ -3466,7 +3540,8 @@ dump_sysfs_pstate_config(void) sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); input = fopen(path, "r"); if (input != NULL) { - fscanf(input, "%d", &turbo); + if (fscanf(input, "%d", &turbo) != 1) + err(1, "%s: failed to parse number from file", path); fprintf(outf, "cpufreq boost: %d\n", turbo); fclose(input); } @@ -3474,7 +3549,8 @@ dump_sysfs_pstate_config(void) sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); input = fopen(path, "r"); if (input != NULL) { - fscanf(input, "%d", &turbo); + if (fscanf(input, "%d", &turbo) != 1) + err(1, "%s: failed to parse number from file", path); fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); fclose(input); } @@ -3718,7 +3794,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp(unsigned int model) +double get_tdp_intel(unsigned int model) { unsigned long long msr; @@ -3735,6 +3811,16 @@ double get_tdp(unsigned int model) } } +double get_tdp_amd(unsigned int family) +{ + switch (family) { + case 0x17: + default: + /* This is the max stock TDP of HEDT/Server Fam17h chips */ + return 250.0; + } +} + /* * rapl_dram_energy_units_probe() * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. @@ -3754,21 +3840,12 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) } } - -/* - * rapl_probe() - * - * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units - */ -void rapl_probe(unsigned int family, unsigned int model) +void rapl_probe_intel(unsigned int family, unsigned int model) { unsigned long long msr; unsigned int time_unit; double tdp; - if (!genuine_intel) - return; - if (family != 6) return; @@ -3892,13 +3969,69 @@ void rapl_probe(unsigned int family, unsigned int model) rapl_time_units = 1.0 / (1 << (time_unit)); - tdp = get_tdp(model); + tdp = get_tdp_intel(model); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (!quiet) fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); +} - return; +void rapl_probe_amd(unsigned int family, unsigned int model) +{ + unsigned long long msr; + unsigned int eax, ebx, ecx, edx; + unsigned int has_rapl = 0; + double tdp; + + if (max_extended_level >= 0x80000007) { + __cpuid(0x80000007, eax, ebx, ecx, edx); + /* RAPL (Fam 17h) */ + has_rapl = edx & (1 << 14); + } + + if (!has_rapl) + return; + + switch (family) { + case 0x17: /* Zen, Zen+ */ + do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + } + break; + default: + return; + } + + if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) + return; + + rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); + rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); + rapl_power_units = ldexp(1.0, -(msr & 0xf)); + + tdp = get_tdp_amd(model); + + rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; + if (!quiet) + fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); +} + +/* + * rapl_probe() + * + * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units + */ +void rapl_probe(unsigned int family, unsigned int model) +{ + if (genuine_intel) + rapl_probe_intel(family, model); + if (authentic_amd) + rapl_probe_amd(family, model); } void perf_limit_reasons_probe(unsigned int family, unsigned int model) @@ -4003,6 +4136,7 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label) int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; + const char *msr_name; int cpu; if (!do_rapl) @@ -4018,10 +4152,17 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; } - if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) - return -1; + if (do_rapl & RAPL_AMD_F17H) { + msr_name = "MSR_RAPL_PWR_UNIT"; + if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) + return -1; + } else { + msr_name = "MSR_RAPL_POWER_UNIT"; + if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) + return -1; + } - fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr, + fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, rapl_power_units, rapl_energy_units, rapl_time_units); if (do_rapl & RAPL_PKG_POWER_INFO) { @@ -4451,6 +4592,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: return INTEL_FAM6_SKYLAKE_MOBILE; + + case INTEL_FAM6_ICELAKE_MOBILE: + return INTEL_FAM6_CANNONLAKE_MOBILE; } return model; } @@ -4702,7 +4846,9 @@ void process_cpuid() } do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); - do_cnl_cstates = is_cnl(family, model); + + if (do_slm_cstates || do_knl_cstates || is_cnl(family, model)) + BIC_NOT_PRESENT(BIC_CPU_c3); if (!quiet) decode_misc_pwr_mgmt_msr(); @@ -4769,6 +4915,7 @@ void topology_probe() int i; int max_core_id = 0; int max_package_id = 0; + int max_die_id = 0; int max_siblings = 0; /* Initialize num_cpus, max_cpu_num */ @@ -4835,6 +4982,11 @@ void topology_probe() if (cpus[i].physical_package_id > max_package_id) max_package_id = cpus[i].physical_package_id; + /* get die information */ + cpus[i].die_id = get_die_id(i); + if (cpus[i].die_id > max_die_id) + max_die_id = cpus[i].die_id; + /* get numa node information */ cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); if (cpus[i].physical_node_id > topo.max_node_num) @@ -4860,6 +5012,13 @@ void topology_probe() if (!summary_only && topo.cores_per_node > 1) BIC_PRESENT(BIC_Core); + topo.num_die = max_die_id + 1; + if (debug > 1) + fprintf(outf, "max_die_id %d, sizing for %d die\n", + max_die_id, topo.num_die); + if (!summary_only && topo.num_die > 1) + BIC_PRESENT(BIC_Die); + topo.num_packages = max_package_id + 1; if (debug > 1) fprintf(outf, "max_package_id %d, sizing for %d packages\n", @@ -4884,8 +5043,8 @@ void topology_probe() if (cpu_is_not_present(i)) continue; fprintf(outf, - "cpu %d pkg %d node %d lnode %d core %d thread %d\n", - i, cpus[i].physical_package_id, + "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", + i, cpus[i].physical_package_id, cpus[i].die_id, cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, @@ -5077,6 +5236,9 @@ int fork_it(char **argv) signal(SIGQUIT, SIG_IGN); if (waitpid(child_pid, &status, 0) == -1) err(status, "waitpid"); + + if (WIFEXITED(status)) + status = WEXITSTATUS(status); } /* * n.b. fork_it() does not check for errors from for_all_cpus() @@ -5119,7 +5281,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 18.07.27" + fprintf(outf, "turbostat version 19.03.20" " - Len Brown <lenb@kernel.org>\n"); } @@ -5316,7 +5478,8 @@ void probe_sysfs(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(name_buf, sizeof(name_buf), input); + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); @@ -5343,7 +5506,8 @@ void probe_sysfs(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(name_buf, sizeof(name_buf), input); + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index b579f962451d..85ffdcfa596b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -146,6 +146,7 @@ static int dimm_fail_cmd_code[ARRAY_SIZE(handle)]; struct nfit_test_sec { u8 state; u8 ext_state; + u8 old_state; u8 passphrase[32]; u8 master_passphrase[32]; u64 overwrite_end_time; @@ -225,6 +226,8 @@ static struct workqueue_struct *nfit_wq; static struct gen_pool *nfit_pool; +static const char zero_key[NVDIMM_PASSPHRASE_LEN]; + static struct nfit_test *to_nfit_test(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -1059,8 +1062,7 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t, struct device *dev = &t->pdev.dev; struct nfit_test_sec *sec = &dimm_sec_info[dimm]; - if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) || - (sec->state & ND_INTEL_SEC_STATE_FROZEN)) { + if (sec->state & ND_INTEL_SEC_STATE_FROZEN) { nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; dev_dbg(dev, "secure erase: wrong security state\n"); } else if (memcmp(nd_cmd->passphrase, sec->passphrase, @@ -1068,6 +1070,12 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t, nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; dev_dbg(dev, "secure erase: wrong passphrase\n"); } else { + if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) + && (memcmp(nd_cmd->passphrase, zero_key, + ND_INTEL_PASSPHRASE_SIZE) != 0)) { + dev_dbg(dev, "invalid zero key\n"); + return 0; + } memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); memset(sec->master_passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); sec->state = 0; @@ -1093,7 +1101,7 @@ static int nd_intel_test_cmd_overwrite(struct nfit_test *t, return 0; } - memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); + sec->old_state = sec->state; sec->state = ND_INTEL_SEC_STATE_OVERWRITE; dev_dbg(dev, "overwrite progressing.\n"); sec->overwrite_end_time = get_jiffies_64() + 5 * HZ; @@ -1115,7 +1123,8 @@ static int nd_intel_test_cmd_query_overwrite(struct nfit_test *t, if (time_is_before_jiffies64(sec->overwrite_end_time)) { sec->overwrite_end_time = 0; - sec->state = 0; + sec->state = sec->old_state; + sec->old_state = 0; sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED; dev_dbg(dev, "overwrite is complete\n"); } else diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 3b74d23fffab..41e8a689aa77 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -30,4 +30,5 @@ test_netcnt test_section_names test_tcpnotify_user test_libbpf +test_tcp_check_syncookie_user alu32 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2aed37ea61a4..66f2dca1dee1 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,7 +23,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ - test_netcnt test_tcpnotify_user test_sock_fields + test_netcnt test_tcpnotify_user test_sock_fields test_sysctl BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) @@ -51,7 +51,10 @@ TEST_PROGS := test_kmod.sh \ test_skb_cgroup_id.sh \ test_flow_dissector.sh \ test_xdp_vlan.sh \ - test_lwt_ip_encap.sh + test_lwt_ip_encap.sh \ + test_tcp_check_syncookie.sh \ + test_tc_tunnel.sh \ + test_tc_edt.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ @@ -60,7 +63,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ - flow_dissector_load test_flow_dissector + flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user include ../lib.mk @@ -69,7 +72,9 @@ TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c - $(CC) -o $@ -static $< -Wl,--build-id + $(CC) -o $@ $< -Wl,--build-id + +$(OUTPUT)/test_maps: map_tests/*.c BPFOBJ := $(OUTPUT)/libbpf.a @@ -90,6 +95,7 @@ $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c +$(OUTPUT)/test_sysctl: cgroup_helpers.c .PHONY: force @@ -206,7 +212,7 @@ ifeq ($(DWARF2BTF),y) endif PROG_TESTS_H := $(OUTPUT)/prog_tests/tests.h -$(OUTPUT)/test_progs: $(PROG_TESTS_H) +test_progs.c: $(PROG_TESTS_H) $(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS) $(OUTPUT)/test_progs: prog_tests/*.c @@ -228,8 +234,29 @@ $(PROG_TESTS_H): $(PROG_TESTS_DIR) $(PROG_TESTS_FILES) echo '#endif' \ ) > $(PROG_TESTS_H)) +TEST_MAPS_CFLAGS := -I. -I$(OUTPUT) +MAP_TESTS_DIR = $(OUTPUT)/map_tests +$(MAP_TESTS_DIR): + mkdir -p $@ +MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h +test_maps.c: $(MAP_TESTS_H) +$(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS) +MAP_TESTS_FILES := $(wildcard map_tests/*.c) +$(MAP_TESTS_H): $(MAP_TESTS_DIR) $(MAP_TESTS_FILES) + $(shell ( cd map_tests/; \ + echo '/* Generated header, do not edit */'; \ + echo '#ifdef DECLARE'; \ + ls *.c 2> /dev/null | \ + sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \ + echo '#endif'; \ + echo '#ifdef CALL'; \ + ls *.c 2> /dev/null | \ + sed -e 's@\([^\.]*\)\.c@test_\1();@'; \ + echo '#endif' \ + ) > $(MAP_TESTS_H)) + VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h -$(OUTPUT)/test_verifier: $(VERIFIER_TESTS_H) +test_verifier.c: $(VERIFIER_TESTS_H) $(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS) VERIFIER_TESTS_DIR = $(OUTPUT)/verifier @@ -247,4 +274,4 @@ $(OUTPUT)/verifier/tests.h: $(VERIFIER_TESTS_DIR) $(VERIFIER_TEST_FILES) ) > $(VERIFIER_TESTS_H)) EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \ - $(VERIFIER_TESTS_H) $(PROG_TESTS_H) + $(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index c9433a496d54..6e80b66d7fb1 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -9,14 +9,14 @@ #define SEC(NAME) __attribute__((section(NAME), used)) /* helper functions called from eBPF programs written in C */ -static void *(*bpf_map_lookup_elem)(void *map, void *key) = +static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) BPF_FUNC_map_lookup_elem; -static int (*bpf_map_update_elem)(void *map, void *key, void *value, +static int (*bpf_map_update_elem)(void *map, const void *key, const void *value, unsigned long long flags) = (void *) BPF_FUNC_map_update_elem; -static int (*bpf_map_delete_elem)(void *map, void *key) = +static int (*bpf_map_delete_elem)(void *map, const void *key) = (void *) BPF_FUNC_map_delete_elem; -static int (*bpf_map_push_elem)(void *map, void *value, +static int (*bpf_map_push_elem)(void *map, const void *value, unsigned long long flags) = (void *) BPF_FUNC_map_push_elem; static int (*bpf_map_pop_elem)(void *map, void *value) = @@ -159,6 +159,11 @@ static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_tcp; +static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned long long netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_skc_lookup_tcp; static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, int size, unsigned long long netns_id, @@ -180,8 +185,37 @@ static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) BPF_FUNC_sk_fullsock; static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) BPF_FUNC_tcp_sock; +static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = + (void *) BPF_FUNC_get_listener_sock; static int (*bpf_skb_ecn_set_ce)(void *ctx) = (void *) BPF_FUNC_skb_ecn_set_ce; +static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, + void *ip, int ip_len, void *tcp, int tcp_len) = + (void *) BPF_FUNC_tcp_check_syncookie; +static int (*bpf_sysctl_get_name)(void *ctx, char *buf, + unsigned long long buf_len, + unsigned long long flags) = + (void *) BPF_FUNC_sysctl_get_name; +static int (*bpf_sysctl_get_current_value)(void *ctx, char *buf, + unsigned long long buf_len) = + (void *) BPF_FUNC_sysctl_get_current_value; +static int (*bpf_sysctl_get_new_value)(void *ctx, char *buf, + unsigned long long buf_len) = + (void *) BPF_FUNC_sysctl_get_new_value; +static int (*bpf_sysctl_set_new_value)(void *ctx, const char *buf, + unsigned long long buf_len) = + (void *) BPF_FUNC_sysctl_set_new_value; +static int (*bpf_strtol)(const char *buf, unsigned long long buf_len, + unsigned long long flags, long *res) = + (void *) BPF_FUNC_strtol; +static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len, + unsigned long long flags, unsigned long *res) = + (void *) BPF_FUNC_strtoul; +static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, + void *value, __u64 flags) = + (void *) BPF_FUNC_sk_storage_get; +static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = + (void *)BPF_FUNC_sk_storage_delete; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions @@ -272,6 +306,9 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #elif defined(__TARGET_ARCH_s930x) #define bpf_target_s930x #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm) + #define bpf_target_arm + #define bpf_target_defined #elif defined(__TARGET_ARCH_arm64) #define bpf_target_arm64 #define bpf_target_defined @@ -294,6 +331,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #define bpf_target_x86 #elif defined(__s390x__) #define bpf_target_s930x +#elif defined(__arm__) + #define bpf_target_arm #elif defined(__aarch64__) #define bpf_target_arm64 #elif defined(__mips__) @@ -331,6 +370,19 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #define PT_REGS_SP(x) ((x)->gprs[15]) #define PT_REGS_IP(x) ((x)->psw.addr) +#elif defined(bpf_target_arm) + +#define PT_REGS_PARM1(x) ((x)->uregs[0]) +#define PT_REGS_PARM2(x) ((x)->uregs[1]) +#define PT_REGS_PARM3(x) ((x)->uregs[2]) +#define PT_REGS_PARM4(x) ((x)->uregs[3]) +#define PT_REGS_PARM5(x) ((x)->uregs[4]) +#define PT_REGS_RET(x) ((x)->uregs[14]) +#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->uregs[0]) +#define PT_REGS_SP(x) ((x)->uregs[13]) +#define PT_REGS_IP(x) ((x)->uregs[12]) + #elif defined(bpf_target_arm64) #define PT_REGS_PARM1(x) ((x)->regs[0]) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 37f947ec44ed..f7a0744db31e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -23,3 +23,14 @@ CONFIG_LWTUNNEL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_XDP_SOCKETS=y CONFIG_FTRACE_SYSCALLS=y +CONFIG_IPV6_TUNNEL=y +CONFIG_IPV6_GRE=y +CONFIG_NET_FOU=m +CONFIG_NET_FOU_IP_TUNNELS=y +CONFIG_IPV6_FOU=m +CONFIG_IPV6_FOU_TUNNEL=m +CONFIG_MPLS=y +CONFIG_NET_MPLS_GSO=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m +CONFIG_IPV6_SIT=m diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index 77cafa66d048..3fd83b9dc1bf 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -26,7 +26,7 @@ static void load_and_attach_program(void) struct bpf_object *obj; ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name, - cfg_map_name, &prog_fd); + cfg_map_name, NULL, &prog_fd, NULL); if (ret) error(1, 0, "bpf_flow_load %s", cfg_path_name); @@ -52,7 +52,7 @@ static void detach_program(void) sprintf(command, "rm -r %s", cfg_pin_path); ret = system(command); if (ret) - error(1, errno, command); + error(1, errno, "%s", command); } static void parse_opts(int argc, char **argv) diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index 41dd6959feb0..daeaeb518894 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -9,10 +9,12 @@ static inline int bpf_flow_load(struct bpf_object **obj, const char *path, const char *section_name, const char *map_name, - int *prog_fd) + const char *keys_map_name, + int *prog_fd, + int *keys_fd) { struct bpf_program *prog, *main_prog; - struct bpf_map *prog_array; + struct bpf_map *prog_array, *keys; int prog_array_fd; int ret, fd, i; @@ -23,19 +25,29 @@ static inline int bpf_flow_load(struct bpf_object **obj, main_prog = bpf_object__find_program_by_title(*obj, section_name); if (!main_prog) - return ret; + return -1; *prog_fd = bpf_program__fd(main_prog); if (*prog_fd < 0) - return ret; + return -1; prog_array = bpf_object__find_map_by_name(*obj, map_name); if (!prog_array) - return ret; + return -1; prog_array_fd = bpf_map__fd(prog_array); if (prog_array_fd < 0) - return ret; + return -1; + + if (keys_map_name && keys_fd) { + keys = bpf_object__find_map_by_name(*obj, keys_map_name); + if (!keys) + return -1; + + *keys_fd = bpf_map__fd(keys); + if (*keys_fd < 0) + return -1; + } i = 0; bpf_object__for_each_program(prog, *obj) { diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c new file mode 100644 index 000000000000..e569edc679d8 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -0,0 +1,629 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <linux/compiler.h> +#include <linux/err.h> + +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <linux/btf.h> +#include <unistd.h> +#include <signal.h> +#include <errno.h> +#include <string.h> +#include <pthread.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_btf.h> +#include <test_maps.h> + +static struct bpf_create_map_attr xattr = { + .name = "sk_storage_map", + .map_type = BPF_MAP_TYPE_SK_STORAGE, + .map_flags = BPF_F_NO_PREALLOC, + .max_entries = 0, + .key_size = 4, + .value_size = 8, + .btf_key_type_id = 1, + .btf_value_type_id = 3, + .btf_fd = -1, +}; + +static unsigned int nr_sk_threads_done; +static unsigned int nr_sk_threads_err; +static unsigned int nr_sk_per_thread = 4096; +static unsigned int nr_sk_threads = 4; +static int sk_storage_map = -1; +static unsigned int stop; +static int runtime_s = 5; + +static bool is_stopped(void) +{ + return READ_ONCE(stop); +} + +static unsigned int threads_err(void) +{ + return READ_ONCE(nr_sk_threads_err); +} + +static void notify_thread_err(void) +{ + __sync_add_and_fetch(&nr_sk_threads_err, 1); +} + +static bool wait_for_threads_err(void) +{ + while (!is_stopped() && !threads_err()) + usleep(500); + + return !is_stopped(); +} + +static unsigned int threads_done(void) +{ + return READ_ONCE(nr_sk_threads_done); +} + +static void notify_thread_done(void) +{ + __sync_add_and_fetch(&nr_sk_threads_done, 1); +} + +static void notify_thread_redo(void) +{ + __sync_sub_and_fetch(&nr_sk_threads_done, 1); +} + +static bool wait_for_threads_done(void) +{ + while (threads_done() != nr_sk_threads && !is_stopped() && + !threads_err()) + usleep(50); + + return !is_stopped() && !threads_err(); +} + +static bool wait_for_threads_redo(void) +{ + while (threads_done() && !is_stopped() && !threads_err()) + usleep(50); + + return !is_stopped() && !threads_err(); +} + +static bool wait_for_map(void) +{ + while (READ_ONCE(sk_storage_map) == -1 && !is_stopped()) + usleep(50); + + return !is_stopped(); +} + +static bool wait_for_map_close(void) +{ + while (READ_ONCE(sk_storage_map) != -1 && !is_stopped()) + ; + + return !is_stopped(); +} + +static int load_btf(void) +{ + const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; + __u32 btf_raw_types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct bpf_spin_lock */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), + BTF_MEMBER_ENC(15, 1, 0), /* int val; */ + /* struct val */ /* [3] */ + BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ + BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ + }; + struct btf_header btf_hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(btf_raw_types), + .str_off = sizeof(btf_raw_types), + .str_len = sizeof(btf_str_sec), + }; + __u8 raw_btf[sizeof(struct btf_header) + sizeof(btf_raw_types) + + sizeof(btf_str_sec)]; + + memcpy(raw_btf, &btf_hdr, sizeof(btf_hdr)); + memcpy(raw_btf + sizeof(btf_hdr), btf_raw_types, sizeof(btf_raw_types)); + memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types), + btf_str_sec, sizeof(btf_str_sec)); + + return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0); +} + +static int create_sk_storage_map(void) +{ + int btf_fd, map_fd; + + btf_fd = load_btf(); + CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", + btf_fd, errno); + xattr.btf_fd = btf_fd; + + map_fd = bpf_create_map_xattr(&xattr); + xattr.btf_fd = -1; + close(btf_fd); + CHECK(map_fd == -1, + "bpf_create_map_xattr()", "errno:%d\n", errno); + + return map_fd; +} + +static void *insert_close_thread(void *arg) +{ + struct { + int cnt; + int lock; + } value = { .cnt = 0xeB9F, .lock = 0, }; + int i, map_fd, err, *sk_fds; + + sk_fds = malloc(sizeof(*sk_fds) * nr_sk_per_thread); + if (!sk_fds) { + notify_thread_err(); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < nr_sk_per_thread; i++) + sk_fds[i] = -1; + + while (!is_stopped()) { + if (!wait_for_map()) + goto close_all; + + map_fd = READ_ONCE(sk_storage_map); + for (i = 0; i < nr_sk_per_thread && !is_stopped(); i++) { + sk_fds[i] = socket(AF_INET6, SOCK_STREAM, 0); + if (sk_fds[i] == -1) { + err = -errno; + fprintf(stderr, "socket(): errno:%d\n", errno); + goto errout; + } + err = bpf_map_update_elem(map_fd, &sk_fds[i], &value, + BPF_NOEXIST); + if (err) { + err = -errno; + fprintf(stderr, + "bpf_map_update_elem(): errno:%d\n", + errno); + goto errout; + } + } + + notify_thread_done(); + wait_for_map_close(); + +close_all: + for (i = 0; i < nr_sk_per_thread; i++) { + close(sk_fds[i]); + sk_fds[i] = -1; + } + + notify_thread_redo(); + } + + free(sk_fds); + return NULL; + +errout: + for (i = 0; i < nr_sk_per_thread && sk_fds[i] != -1; i++) + close(sk_fds[i]); + free(sk_fds); + notify_thread_err(); + return ERR_PTR(err); +} + +static int do_sk_storage_map_stress_free(void) +{ + int i, map_fd = -1, err = 0, nr_threads_created = 0; + pthread_t *sk_thread_ids; + void *thread_ret; + + sk_thread_ids = malloc(sizeof(pthread_t) * nr_sk_threads); + if (!sk_thread_ids) { + fprintf(stderr, "malloc(sk_threads): NULL\n"); + return -ENOMEM; + } + + for (i = 0; i < nr_sk_threads; i++) { + err = pthread_create(&sk_thread_ids[i], NULL, + insert_close_thread, NULL); + if (err) { + err = -errno; + goto done; + } + nr_threads_created++; + } + + while (!is_stopped()) { + map_fd = create_sk_storage_map(); + WRITE_ONCE(sk_storage_map, map_fd); + + if (!wait_for_threads_done()) + break; + + WRITE_ONCE(sk_storage_map, -1); + close(map_fd); + map_fd = -1; + + if (!wait_for_threads_redo()) + break; + } + +done: + WRITE_ONCE(stop, 1); + for (i = 0; i < nr_threads_created; i++) { + pthread_join(sk_thread_ids[i], &thread_ret); + if (IS_ERR(thread_ret) && !err) { + err = PTR_ERR(thread_ret); + fprintf(stderr, "threads#%u: err:%d\n", i, err); + } + } + free(sk_thread_ids); + + if (map_fd != -1) + close(map_fd); + + return err; +} + +static void *update_thread(void *arg) +{ + struct { + int cnt; + int lock; + } value = { .cnt = 0xeB9F, .lock = 0, }; + int map_fd = READ_ONCE(sk_storage_map); + int sk_fd = *(int *)arg; + int err = 0; /* Suppress compiler false alarm */ + + while (!is_stopped()) { + err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); + if (err && errno != EAGAIN) { + err = -errno; + fprintf(stderr, "bpf_map_update_elem: %d %d\n", + err, errno); + break; + } + } + + if (!is_stopped()) { + notify_thread_err(); + return ERR_PTR(err); + } + + return NULL; +} + +static void *delete_thread(void *arg) +{ + int map_fd = READ_ONCE(sk_storage_map); + int sk_fd = *(int *)arg; + int err = 0; /* Suppress compiler false alarm */ + + while (!is_stopped()) { + err = bpf_map_delete_elem(map_fd, &sk_fd); + if (err && errno != ENOENT) { + err = -errno; + fprintf(stderr, "bpf_map_delete_elem: %d %d\n", + err, errno); + break; + } + } + + if (!is_stopped()) { + notify_thread_err(); + return ERR_PTR(err); + } + + return NULL; +} + +static int do_sk_storage_map_stress_change(void) +{ + int i, sk_fd, map_fd = -1, err = 0, nr_threads_created = 0; + pthread_t *sk_thread_ids; + void *thread_ret; + + sk_thread_ids = malloc(sizeof(pthread_t) * nr_sk_threads); + if (!sk_thread_ids) { + fprintf(stderr, "malloc(sk_threads): NULL\n"); + return -ENOMEM; + } + + sk_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (sk_fd == -1) { + err = -errno; + goto done; + } + + map_fd = create_sk_storage_map(); + WRITE_ONCE(sk_storage_map, map_fd); + + for (i = 0; i < nr_sk_threads; i++) { + if (i & 0x1) + err = pthread_create(&sk_thread_ids[i], NULL, + update_thread, &sk_fd); + else + err = pthread_create(&sk_thread_ids[i], NULL, + delete_thread, &sk_fd); + if (err) { + err = -errno; + goto done; + } + nr_threads_created++; + } + + wait_for_threads_err(); + +done: + WRITE_ONCE(stop, 1); + for (i = 0; i < nr_threads_created; i++) { + pthread_join(sk_thread_ids[i], &thread_ret); + if (IS_ERR(thread_ret) && !err) { + err = PTR_ERR(thread_ret); + fprintf(stderr, "threads#%u: err:%d\n", i, err); + } + } + free(sk_thread_ids); + + if (sk_fd != -1) + close(sk_fd); + close(map_fd); + + return err; +} + +static void stop_handler(int signum) +{ + if (signum != SIGALRM) + printf("stopping...\n"); + WRITE_ONCE(stop, 1); +} + +#define BPF_SK_STORAGE_MAP_TEST_NR_THREADS "BPF_SK_STORAGE_MAP_TEST_NR_THREADS" +#define BPF_SK_STORAGE_MAP_TEST_SK_PER_THREAD "BPF_SK_STORAGE_MAP_TEST_SK_PER_THREAD" +#define BPF_SK_STORAGE_MAP_TEST_RUNTIME_S "BPF_SK_STORAGE_MAP_TEST_RUNTIME_S" +#define BPF_SK_STORAGE_MAP_TEST_NAME "BPF_SK_STORAGE_MAP_TEST_NAME" + +static void test_sk_storage_map_stress_free(void) +{ + struct rlimit rlim_old, rlim_new = {}; + int err; + + getrlimit(RLIMIT_NOFILE, &rlim_old); + + signal(SIGTERM, stop_handler); + signal(SIGINT, stop_handler); + if (runtime_s > 0) { + signal(SIGALRM, stop_handler); + alarm(runtime_s); + } + + if (rlim_old.rlim_cur < nr_sk_threads * nr_sk_per_thread) { + rlim_new.rlim_cur = nr_sk_threads * nr_sk_per_thread + 128; + rlim_new.rlim_max = rlim_new.rlim_cur + 128; + err = setrlimit(RLIMIT_NOFILE, &rlim_new); + CHECK(err, "setrlimit(RLIMIT_NOFILE)", "rlim_new:%lu errno:%d", + rlim_new.rlim_cur, errno); + } + + err = do_sk_storage_map_stress_free(); + + signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); + if (runtime_s > 0) { + signal(SIGALRM, SIG_DFL); + alarm(0); + } + + if (rlim_new.rlim_cur) + setrlimit(RLIMIT_NOFILE, &rlim_old); + + CHECK(err, "test_sk_storage_map_stress_free", "err:%d\n", err); +} + +static void test_sk_storage_map_stress_change(void) +{ + int err; + + signal(SIGTERM, stop_handler); + signal(SIGINT, stop_handler); + if (runtime_s > 0) { + signal(SIGALRM, stop_handler); + alarm(runtime_s); + } + + err = do_sk_storage_map_stress_change(); + + signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); + if (runtime_s > 0) { + signal(SIGALRM, SIG_DFL); + alarm(0); + } + + CHECK(err, "test_sk_storage_map_stress_change", "err:%d\n", err); +} + +static void test_sk_storage_map_basic(void) +{ + struct { + int cnt; + int lock; + } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; + struct bpf_create_map_attr bad_xattr; + int btf_fd, map_fd, sk_fd, err; + + btf_fd = load_btf(); + CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", + btf_fd, errno); + xattr.btf_fd = btf_fd; + + sk_fd = socket(AF_INET6, SOCK_STREAM, 0); + CHECK(sk_fd == -1, "socket()", "sk_fd:%d errno:%d\n", + sk_fd, errno); + + map_fd = bpf_create_map_xattr(&xattr); + CHECK(map_fd == -1, "bpf_create_map_xattr(good_xattr)", + "map_fd:%d errno:%d\n", map_fd, errno); + + /* Add new elem */ + memcpy(&lookup_value, &value, sizeof(value)); + err = bpf_map_update_elem(map_fd, &sk_fd, &value, + BPF_NOEXIST | BPF_F_LOCK); + CHECK(err, "bpf_map_update_elem(BPF_NOEXIST|BPF_F_LOCK)", + "err:%d errno:%d\n", err, errno); + err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, + BPF_F_LOCK); + CHECK(err || lookup_value.cnt != value.cnt, + "bpf_map_lookup_elem_flags(BPF_F_LOCK)", + "err:%d errno:%d cnt:%x(%x)\n", + err, errno, lookup_value.cnt, value.cnt); + + /* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */ + value.cnt += 1; + err = bpf_map_update_elem(map_fd, &sk_fd, &value, + BPF_EXIST | BPF_F_LOCK); + CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)", + "err:%d errno:%d\n", err, errno); + err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, + BPF_F_LOCK); + CHECK(err || lookup_value.cnt != value.cnt, + "bpf_map_lookup_elem_flags(BPF_F_LOCK)", + "err:%d errno:%d cnt:%x(%x)\n", + err, errno, lookup_value.cnt, value.cnt); + + /* Bump the cnt and update with BPF_EXIST */ + value.cnt += 1; + err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST); + CHECK(err, "bpf_map_update_elem(BPF_EXIST)", + "err:%d errno:%d\n", err, errno); + err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, + BPF_F_LOCK); + CHECK(err || lookup_value.cnt != value.cnt, + "bpf_map_lookup_elem_flags(BPF_F_LOCK)", + "err:%d errno:%d cnt:%x(%x)\n", + err, errno, lookup_value.cnt, value.cnt); + + /* Update with BPF_NOEXIST */ + value.cnt += 1; + err = bpf_map_update_elem(map_fd, &sk_fd, &value, + BPF_NOEXIST | BPF_F_LOCK); + CHECK(!err || errno != EEXIST, + "bpf_map_update_elem(BPF_NOEXIST|BPF_F_LOCK)", + "err:%d errno:%d\n", err, errno); + err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_NOEXIST); + CHECK(!err || errno != EEXIST, "bpf_map_update_elem(BPF_NOEXIST)", + "err:%d errno:%d\n", err, errno); + value.cnt -= 1; + err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, + BPF_F_LOCK); + CHECK(err || lookup_value.cnt != value.cnt, + "bpf_map_lookup_elem_flags(BPF_F_LOCK)", + "err:%d errno:%d cnt:%x(%x)\n", + err, errno, lookup_value.cnt, value.cnt); + + /* Bump the cnt again and update with map_flags == 0 */ + value.cnt += 1; + err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); + CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n", + err, errno); + err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, + BPF_F_LOCK); + CHECK(err || lookup_value.cnt != value.cnt, + "bpf_map_lookup_elem_flags(BPF_F_LOCK)", + "err:%d errno:%d cnt:%x(%x)\n", + err, errno, lookup_value.cnt, value.cnt); + + /* Test delete elem */ + err = bpf_map_delete_elem(map_fd, &sk_fd); + CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", + err, errno); + err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, + BPF_F_LOCK); + CHECK(!err || errno != ENOENT, + "bpf_map_lookup_elem_flags(BPF_F_LOCK)", + "err:%d errno:%d\n", err, errno); + err = bpf_map_delete_elem(map_fd, &sk_fd); + CHECK(!err || errno != ENOENT, "bpf_map_delete_elem()", + "err:%d errno:%d\n", err, errno); + + memcpy(&bad_xattr, &xattr, sizeof(xattr)); + bad_xattr.btf_key_type_id = 0; + err = bpf_create_map_xattr(&bad_xattr); + CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + "err:%d errno:%d\n", err, errno); + + memcpy(&bad_xattr, &xattr, sizeof(xattr)); + bad_xattr.btf_key_type_id = 3; + err = bpf_create_map_xattr(&bad_xattr); + CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + "err:%d errno:%d\n", err, errno); + + memcpy(&bad_xattr, &xattr, sizeof(xattr)); + bad_xattr.max_entries = 1; + err = bpf_create_map_xattr(&bad_xattr); + CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + "err:%d errno:%d\n", err, errno); + + memcpy(&bad_xattr, &xattr, sizeof(xattr)); + bad_xattr.map_flags = 0; + err = bpf_create_map_xattr(&bad_xattr); + CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + "err:%d errno:%d\n", err, errno); + + xattr.btf_fd = -1; + close(btf_fd); + close(map_fd); + close(sk_fd); +} + +void test_sk_storage_map(void) +{ + const char *test_name, *env_opt; + bool test_ran = false; + + test_name = getenv(BPF_SK_STORAGE_MAP_TEST_NAME); + + env_opt = getenv(BPF_SK_STORAGE_MAP_TEST_NR_THREADS); + if (env_opt) + nr_sk_threads = atoi(env_opt); + + env_opt = getenv(BPF_SK_STORAGE_MAP_TEST_SK_PER_THREAD); + if (env_opt) + nr_sk_per_thread = atoi(env_opt); + + env_opt = getenv(BPF_SK_STORAGE_MAP_TEST_RUNTIME_S); + if (env_opt) + runtime_s = atoi(env_opt); + + if (!test_name || !strcmp(test_name, "basic")) { + test_sk_storage_map_basic(); + test_ran = true; + } + if (!test_name || !strcmp(test_name, "stress_free")) { + test_sk_storage_map_stress_free(); + test_ran = true; + } + if (!test_name || !strcmp(test_name, "stress_change")) { + test_sk_storage_map_stress_change(); + test_ran = true; + } + + if (test_ran) + printf("%s:PASS\n", __func__); + else + CHECK(1, "Invalid test_name", "%s\n", test_name); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index a64f7a02139c..cb827383db4d 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -73,7 +73,7 @@ void test_bpf_obj_id(void) info_len != sizeof(struct bpf_map_info) || strcmp((char *)map_infos[i].name, expected_map_name), "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", + "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", err, errno, map_infos[i].type, BPF_MAP_TYPE_ARRAY, info_len, sizeof(struct bpf_map_info), @@ -117,7 +117,7 @@ void test_bpf_obj_id(void) *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", + "err %d errno %d i %d type %d(%d) info_len %u(%zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), @@ -185,7 +185,7 @@ void test_bpf_obj_id(void) memcmp(&prog_info, &prog_infos[i], info_len) || *(int *)(long)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n", + "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), memcmp(&prog_info, &prog_infos[i], info_len), *(int *)(long)prog_info.map_ids, saved_map_id); @@ -231,7 +231,7 @@ void test_bpf_obj_id(void) memcmp(&map_info, &map_infos[i], info_len) || array_value != array_magic_value, "check get-map-info(next_id->fd)", - "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n", + "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n", err, errno, info_len, sizeof(struct bpf_map_info), memcmp(&map_info, &map_infos[i], info_len), array_value, array_magic_value); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c new file mode 100644 index 000000000000..23b159d95c3f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <test_progs.h> +static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level != LIBBPF_DEBUG) + return 0; + + if (!strstr(format, "verifier log")) + return 0; + return vfprintf(stderr, "%s", args); +} + +static int check_load(const char *file) +{ + struct bpf_prog_load_attr attr; + struct bpf_object *obj; + int err, prog_fd; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = BPF_PROG_TYPE_SCHED_CLS; + attr.log_level = 4; + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + bpf_object__close(obj); + if (err) + error_cnt++; + return err; +} + +void test_bpf_verif_scale(void) +{ + const char *file1 = "./test_verif_scale1.o"; + const char *file2 = "./test_verif_scale2.o"; + const char *file3 = "./test_verif_scale3.o"; + int err; + + if (verifier_stats) + libbpf_set_print(libbpf_debug_print); + + err = check_load(file1); + err |= check_load(file2); + err |= check_load(file3); + if (!err) + printf("test_verif_scale:OK\n"); + else + printf("test_verif_scale:FAIL\n"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index bcbd928c96ab..8b54adfd6264 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <error.h> +#include <linux/if.h> +#include <linux/if_tun.h> #define CHECK_FLOW_KEYS(desc, got, expected) \ - CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \ + CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \ desc, \ "nhoff=%u/%u " \ "thoff=%u/%u " \ @@ -10,6 +13,7 @@ "is_frag=%u/%u " \ "is_first_frag=%u/%u " \ "is_encap=%u/%u " \ + "ip_proto=0x%x/0x%x " \ "n_proto=0x%x/0x%x " \ "sport=%u/%u " \ "dport=%u/%u\n", \ @@ -19,54 +23,246 @@ got.is_frag, expected.is_frag, \ got.is_first_frag, expected.is_first_frag, \ got.is_encap, expected.is_encap, \ + got.ip_proto, expected.ip_proto, \ got.n_proto, expected.n_proto, \ got.sport, expected.sport, \ got.dport, expected.dport) -static struct bpf_flow_keys pkt_v4_flow_keys = { - .nhoff = 0, - .thoff = sizeof(struct iphdr), - .addr_proto = ETH_P_IP, - .ip_proto = IPPROTO_TCP, - .n_proto = __bpf_constant_htons(ETH_P_IP), +struct ipv4_pkt { + struct ethhdr eth; + struct iphdr iph; + struct tcphdr tcp; +} __packed; + +struct svlan_ipv4_pkt { + struct ethhdr eth; + __u16 vlan_tci; + __u16 vlan_proto; + struct iphdr iph; + struct tcphdr tcp; +} __packed; + +struct ipv6_pkt { + struct ethhdr eth; + struct ipv6hdr iph; + struct tcphdr tcp; +} __packed; + +struct dvlan_ipv6_pkt { + struct ethhdr eth; + __u16 vlan_tci; + __u16 vlan_proto; + __u16 vlan_tci2; + __u16 vlan_proto2; + struct ipv6hdr iph; + struct tcphdr tcp; +} __packed; + +struct test { + const char *name; + union { + struct ipv4_pkt ipv4; + struct svlan_ipv4_pkt svlan_ipv4; + struct ipv6_pkt ipv6; + struct dvlan_ipv6_pkt dvlan_ipv6; + } pkt; + struct bpf_flow_keys keys; }; -static struct bpf_flow_keys pkt_v6_flow_keys = { - .nhoff = 0, - .thoff = sizeof(struct ipv6hdr), - .addr_proto = ETH_P_IPV6, - .ip_proto = IPPROTO_TCP, - .n_proto = __bpf_constant_htons(ETH_P_IPV6), +#define VLAN_HLEN 4 + +struct test tests[] = { + { + .name = "ipv4", + .pkt.ipv4 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_TCP, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.doff = 5, + }, + .keys = { + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IP), + }, + }, + { + .name = "ipv6", + .pkt.ipv6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.doff = 5, + }, + .keys = { + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), + }, + }, + { + .name = "802.1q-ipv4", + .pkt.svlan_ipv4 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_8021Q), + .vlan_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_TCP, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.doff = 5, + }, + .keys = { + .nhoff = ETH_HLEN + VLAN_HLEN, + .thoff = ETH_HLEN + VLAN_HLEN + sizeof(struct iphdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IP), + }, + }, + { + .name = "802.1ad-ipv6", + .pkt.dvlan_ipv6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_8021AD), + .vlan_proto = __bpf_constant_htons(ETH_P_8021Q), + .vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.doff = 5, + }, + .keys = { + .nhoff = ETH_HLEN + VLAN_HLEN * 2, + .thoff = ETH_HLEN + VLAN_HLEN * 2 + + sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), + }, + }, }; +static int create_tap(const char *ifname) +{ + struct ifreq ifr = { + .ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS, + }; + int fd, ret; + + strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) + return -1; + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + return -1; + + return fd; +} + +static int tx_tap(int fd, void *pkt, size_t len) +{ + struct iovec iov[] = { + { + .iov_len = len, + .iov_base = pkt, + }, + }; + return writev(fd, iov, ARRAY_SIZE(iov)); +} + +static int ifup(const char *ifname) +{ + struct ifreq ifr = {}; + int sk, ret; + + strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + sk = socket(PF_INET, SOCK_DGRAM, 0); + if (sk < 0) + return -1; + + ret = ioctl(sk, SIOCGIFFLAGS, &ifr); + if (ret) { + close(sk); + return -1; + } + + ifr.ifr_flags |= IFF_UP; + ret = ioctl(sk, SIOCSIFFLAGS, &ifr); + if (ret) { + close(sk); + return -1; + } + + close(sk); + return 0; +} + void test_flow_dissector(void) { - struct bpf_flow_keys flow_keys; + int i, err, prog_fd, keys_fd = -1, tap_fd; struct bpf_object *obj; - __u32 duration, retval; - int err, prog_fd; - __u32 size; + __u32 duration = 0; err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector", - "jmp_table", &prog_fd); + "jmp_table", "last_dissection", &prog_fd, &keys_fd); if (err) { error_cnt++; return; } - err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4), - &flow_keys, &size, &retval, &duration); - CHECK(size != sizeof(flow_keys) || err || retval != 1, "ipv4", - "err %d errno %d retval %d duration %d size %u/%lu\n", - err, errno, retval, duration, size, sizeof(flow_keys)); - CHECK_FLOW_KEYS("ipv4_flow_keys", flow_keys, pkt_v4_flow_keys); - - err = bpf_prog_test_run(prog_fd, 10, &pkt_v6, sizeof(pkt_v6), - &flow_keys, &size, &retval, &duration); - CHECK(size != sizeof(flow_keys) || err || retval != 1, "ipv6", - "err %d errno %d retval %d duration %d size %u/%lu\n", - err, errno, retval, duration, size, sizeof(flow_keys)); - CHECK_FLOW_KEYS("ipv6_flow_keys", flow_keys, pkt_v6_flow_keys); + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct bpf_flow_keys flow_keys; + struct bpf_prog_test_run_attr tattr = { + .prog_fd = prog_fd, + .data_in = &tests[i].pkt, + .data_size_in = sizeof(tests[i].pkt), + .data_out = &flow_keys, + }; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) || + err || tattr.retval != 1, + tests[i].name, + "err %d errno %d retval %d duration %d size %u/%lu\n", + err, errno, tattr.retval, tattr.duration, + tattr.data_size_out, sizeof(flow_keys)); + CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + } + + /* Do the same tests but for skb-less flow dissector. + * We use a known path in the net/tun driver that calls + * eth_get_headlen and we manually export bpf_flow_keys + * via BPF map in this case. + */ + + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + CHECK(err, "bpf_prog_attach", "err %d errno %d", err, errno); + + tap_fd = create_tap("tap0"); + CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d", tap_fd, errno); + err = ifup("tap0"); + CHECK(err, "ifup", "err %d errno %d", err, errno); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct bpf_flow_keys flow_keys = {}; + struct bpf_prog_test_run_attr tattr = {}; + __u32 key = 0; + + err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); + CHECK(err < 0, "tx_tap", "err %d errno %d", err, errno); + + err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); + CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); + + CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err); + CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + } bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c new file mode 100644 index 000000000000..dc5ef155ec28 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +void test_flow_dissector_load_bytes(void) +{ + struct bpf_flow_keys flow_keys; + __u32 duration = 0, retval, size; + struct bpf_insn prog[] = { + // BPF_REG_1 - 1st argument: context + // BPF_REG_2 - 2nd argument: offset, start at first byte + BPF_MOV64_IMM(BPF_REG_2, 0), + // BPF_REG_3 - 3rd argument: destination, reserve byte on stack + BPF_ALU64_REG(BPF_MOV, BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -1), + // BPF_REG_4 - 4th argument: copy one byte + BPF_MOV64_IMM(BPF_REG_4, 1), + // bpf_skb_load_bytes(ctx, sizeof(pkt_v4), ptr, 1) + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + // if (ret == 0) return BPF_DROP (2) + BPF_MOV64_IMM(BPF_REG_0, BPF_DROP), + BPF_EXIT_INSN(), + // if (ret != 0) return BPF_OK (0) + BPF_MOV64_IMM(BPF_REG_0, BPF_OK), + BPF_EXIT_INSN(), + }; + int fd, err; + + /* make sure bpf_skb_load_bytes is not allowed from skb-less context + */ + fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + CHECK(fd < 0, + "flow_dissector-bpf_skb_load_bytes-load", + "fd %d errno %d\n", + fd, errno); + + err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4), + &flow_keys, &size, &retval, &duration); + CHECK(size != sizeof(flow_keys) || err || retval != 1, + "flow_dissector-bpf_skb_load_bytes", + "err %d errno %d retval %d duration %d size %u/%zu\n", + err, errno, retval, duration, size, sizeof(flow_keys)); + + if (fd >= -1) + close(fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c index d7bb5beb1c57..c2a0a9d5591b 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c @@ -39,7 +39,7 @@ static int get_stack_print_output(void *data, int size) } else { for (i = 0; i < num_stack; i++) { ks = ksym_search(raw_data[i]); - if (strcmp(ks->name, nonjit_func) == 0) { + if (ks && (strcmp(ks->name, nonjit_func) == 0)) { found = true; break; } @@ -56,7 +56,7 @@ static int get_stack_print_output(void *data, int size) } else { for (i = 0; i < num_stack; i++) { ks = ksym_search(e->kern_stack[i]); - if (strcmp(ks->name, nonjit_func) == 0) { + if (ks && (strcmp(ks->name, nonjit_func) == 0)) { good_kern_stack = true; break; } diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c new file mode 100644 index 000000000000..d011079fb0bf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +static void test_global_data_number(struct bpf_object *obj, __u32 duration) +{ + int i, err, map_fd; + uint64_t num; + + map_fd = bpf_find_map(__func__, obj, "result_number"); + if (map_fd < 0) { + error_cnt++; + return; + } + + struct { + char *name; + uint32_t key; + uint64_t num; + } tests[] = { + { "relocate .bss reference", 0, 0 }, + { "relocate .data reference", 1, 42 }, + { "relocate .rodata reference", 2, 24 }, + { "relocate .bss reference", 3, 0 }, + { "relocate .data reference", 4, 0xffeeff }, + { "relocate .rodata reference", 5, 0xabab }, + { "relocate .bss reference", 6, 1234 }, + { "relocate .bss reference", 7, 0 }, + { "relocate .rodata reference", 8, 0xab }, + { "relocate .rodata reference", 9, 0x1111111111111111 }, + { "relocate .rodata reference", 10, ~0 }, + }; + + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { + err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num); + CHECK(err || num != tests[i].num, tests[i].name, + "err %d result %lx expected %lx\n", + err, num, tests[i].num); + } +} + +static void test_global_data_string(struct bpf_object *obj, __u32 duration) +{ + int i, err, map_fd; + char str[32]; + + map_fd = bpf_find_map(__func__, obj, "result_string"); + if (map_fd < 0) { + error_cnt++; + return; + } + + struct { + char *name; + uint32_t key; + char str[32]; + } tests[] = { + { "relocate .rodata reference", 0, "abcdefghijklmnopqrstuvwxyz" }, + { "relocate .data reference", 1, "abcdefghijklmnopqrstuvwxyz" }, + { "relocate .bss reference", 2, "" }, + { "relocate .data reference", 3, "abcdexghijklmnopqrstuvwxyz" }, + { "relocate .bss reference", 4, "\0\0hello" }, + }; + + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { + err = bpf_map_lookup_elem(map_fd, &tests[i].key, str); + CHECK(err || memcmp(str, tests[i].str, sizeof(str)), + tests[i].name, "err %d result \'%s\' expected \'%s\'\n", + err, str, tests[i].str); + } +} + +struct foo { + __u8 a; + __u32 b; + __u64 c; +}; + +static void test_global_data_struct(struct bpf_object *obj, __u32 duration) +{ + int i, err, map_fd; + struct foo val; + + map_fd = bpf_find_map(__func__, obj, "result_struct"); + if (map_fd < 0) { + error_cnt++; + return; + } + + struct { + char *name; + uint32_t key; + struct foo val; + } tests[] = { + { "relocate .rodata reference", 0, { 42, 0xfefeefef, 0x1111111111111111ULL, } }, + { "relocate .bss reference", 1, { } }, + { "relocate .rodata reference", 2, { } }, + { "relocate .data reference", 3, { 41, 0xeeeeefef, 0x2111111111111111ULL, } }, + }; + + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { + err = bpf_map_lookup_elem(map_fd, &tests[i].key, &val); + CHECK(err || memcmp(&val, &tests[i].val, sizeof(val)), + tests[i].name, "err %d result { %u, %u, %llu } expected { %u, %u, %llu }\n", + err, val.a, val.b, val.c, tests[i].val.a, tests[i].val.b, tests[i].val.c); + } +} + +static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) +{ + int err = -ENOMEM, map_fd, zero = 0; + struct bpf_map *map; + __u8 *buff; + + map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); + if (!map || !bpf_map__is_internal(map)) { + error_cnt++; + return; + } + + map_fd = bpf_map__fd(map); + if (map_fd < 0) { + error_cnt++; + return; + } + + buff = malloc(bpf_map__def(map)->value_size); + if (buff) + err = bpf_map_update_elem(map_fd, &zero, buff, 0); + free(buff); + CHECK(!err || errno != EPERM, "test .rodata read-only map", + "err %d errno %d\n", err, errno); +} + +void test_global_data(void) +{ + const char *file = "./test_global_data.o"; + __u32 duration = 0, retval; + struct bpf_object *obj; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (CHECK(err, "load program", "error %d loading %s\n", err, file)) + return; + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "pass global data run", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + test_global_data_number(obj, duration); + test_global_data_string(obj, duration); + test_global_data_struct(obj, duration); + test_global_data_rdonly(obj, duration); + + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c index 90f8a206340a..ee99368c595c 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c @@ -37,7 +37,7 @@ void test_map_lock(void) const char *file = "./test_map_lock.o"; int prog_fd, map_fd[2], vars[17] = {}; pthread_t thread_id[6]; - struct bpf_object *obj; + struct bpf_object *obj = NULL; int err = 0, key = 0, i; void *ret; diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c new file mode 100644 index 000000000000..9807336a3016 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <linux/nbd.h> + +void test_raw_tp_writable_reject_nbd_invalid(void) +{ + __u32 duration = 0; + char error[4096]; + int bpf_fd = -1, tp_fd = -1; + + const struct bpf_insn program[] = { + /* r6 is our tp buffer */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* one byte beyond the end of the nbd_request struct */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, + sizeof(struct nbd_request)), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = program, + .insns_cnt = sizeof(program) / sizeof(struct bpf_insn), + .log_level = 2, + }; + + bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load", + "failed: %d errno %d\n", bpf_fd, errno)) + return; + + tp_fd = bpf_raw_tracepoint_open("nbd_send_request", bpf_fd); + if (CHECK(tp_fd >= 0, "bpf_raw_tracepoint_writable open", + "erroneously succeeded\n")) + goto out_bpffd; + + close(tp_fd); +out_bpffd: + close(bpf_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c new file mode 100644 index 000000000000..5c45424cac5f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <linux/nbd.h> + +void test_raw_tp_writable_test_run(void) +{ + __u32 duration = 0; + char error[4096]; + + const struct bpf_insn trace_program[] = { + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = trace_program, + .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), + .log_level = 2, + }; + + int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded", + "failed: %d errno %d\n", bpf_fd, errno)) + return; + + const struct bpf_insn skb_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr skb_load_attr = { + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .license = "GPL v2", + .insns = skb_program, + .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), + }; + + int filter_fd = + bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); + if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", + filter_fd, errno)) + goto out_bpffd; + + int tp_fd = bpf_raw_tracepoint_open("bpf_test_finish", bpf_fd); + if (CHECK(tp_fd < 0, "bpf_raw_tracepoint_writable opened", + "failed: %d errno %d\n", tp_fd, errno)) + goto out_filterfd; + + char test_skb[128] = { + 0, + }; + + __u32 prog_ret; + int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, + 0, &prog_ret, 0); + CHECK(err != 42, "test_run", + "tracepoint did not modify return value\n"); + CHECK(prog_ret != 0, "test_run_ret", + "socket_filter did not return 0\n"); + + close(tp_fd); + + err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0, + &prog_ret, 0); + CHECK(err != 0, "test_run_notrace", + "test_run failed with %d errno %d\n", err, errno); + CHECK(prog_ret != 0, "test_run_ret_notrace", + "socket_filter did not return 0\n"); + +out_filterfd: + close(filter_fd); +out_bpffd: + close(bpf_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c new file mode 100644 index 000000000000..e95baa32e277 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +void test_skb_ctx(void) +{ + struct __sk_buff skb = { + .cb[0] = 1, + .cb[1] = 2, + .cb[2] = 3, + .cb[3] = 4, + .cb[4] = 5, + .priority = 6, + }; + struct bpf_prog_test_run_attr tattr = { + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + .ctx_out = &skb, + .ctx_size_out = sizeof(skb), + }; + struct bpf_object *obj; + int err; + int i; + + err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; + + /* ctx_in != NULL, ctx_size_in == 0 */ + + tattr.ctx_size_in = 0; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno); + tattr.ctx_size_in = sizeof(skb); + + /* ctx_out != NULL, ctx_size_out == 0 */ + + tattr.ctx_size_out = 0; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno); + tattr.ctx_size_out = sizeof(skb); + + /* non-zero [len, tc_index] fields should be rejected*/ + + skb.len = 1; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno); + skb.len = 0; + + skb.tc_index = 1; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno); + skb.tc_index = 0; + + /* non-zero [hash, sk] fields should be rejected */ + + skb.hash = 1; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno); + skb.hash = 0; + + skb.sk = (struct bpf_sock *)1; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno); + skb.sk = 0; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != 0 || tattr.retval, + "run", + "err %d errno %d retval %d\n", + err, errno, tattr.retval); + + CHECK_ATTR(tattr.ctx_size_out != sizeof(skb), + "ctx_size_out", + "incorrect output size, want %lu have %u\n", + sizeof(skb), tattr.ctx_size_out); + + for (i = 0; i < 5; i++) + CHECK_ATTR(skb.cb[i] != i + 2, + "ctx_out_cb", + "skb->cb[i] == %d, expected %d\n", + skb.cb[i], i + 2); + CHECK_ATTR(skb.priority != 7, + "ctx_out_priority", + "skb->priority == %d, expected %d\n", + skb.priority, 7); +} diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c index 9a573a9675d7..114ebe6a438e 100644 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c @@ -5,7 +5,7 @@ void test_spinlock(void) { const char *file = "./test_spin_lock.o"; pthread_t thread_id[4]; - struct bpf_object *obj; + struct bpf_object *obj = NULL; int prog_fd; int err = 0, i; void *ret; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 8a114bb1c379..1c1a2f75f3d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -1,13 +1,25 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +static __u64 read_perf_max_sample_freq(void) +{ + __u64 sample_freq = 5000; /* fallback to 5000 on error */ + FILE *f; + + f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); + if (f == NULL) + return sample_freq; + fscanf(f, "%llu", &sample_freq); + fclose(f); + return sample_freq; +} + void test_stacktrace_build_id_nmi(void) { int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; const char *file = "./test_stacktrace_build_id.o"; int err, pmu_fd, prog_fd; struct perf_event_attr attr = { - .sample_freq = 5000, .freq = 1, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -20,6 +32,8 @@ void test_stacktrace_build_id_nmi(void) int build_id_matches = 0; int retry = 1; + attr.sample_freq = read_perf_max_sample_freq(); + retry: err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 284660f5aa95..81ad9a0b29d0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -64,6 +64,25 @@ struct bpf_map_def SEC("maps") jmp_table = { .max_entries = 8 }; +struct bpf_map_def SEC("maps") last_dissection = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_flow_keys), + .max_entries = 1, +}; + +static __always_inline int export_flow_keys(struct bpf_flow_keys *keys, + int ret) +{ + struct bpf_flow_keys *val; + __u32 key = 0; + + val = bpf_map_lookup_elem(&last_dissection, &key); + if (val) + memcpy(val, keys, sizeof(*val)); + return ret; +} + static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, __u16 hdr_size, void *buffer) @@ -92,7 +111,6 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) { struct bpf_flow_keys *keys = skb->flow_keys; - keys->n_proto = proto; switch (proto) { case bpf_htons(ETH_P_IP): bpf_tail_call(skb, &jmp_table, IP); @@ -110,19 +128,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) break; default: /* Protocol not supported */ - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } SEC("flow_dissector") int _dissect(struct __sk_buff *skb) { - if (!skb->vlan_present) - return parse_eth_proto(skb, skb->protocol); - else - return parse_eth_proto(skb, skb->vlan_proto); + struct bpf_flow_keys *keys = skb->flow_keys; + + return parse_eth_proto(skb, keys->n_proto); } /* Parses on IPPROTO_* */ @@ -141,8 +158,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) case IPPROTO_ICMP: icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); if (!icmp) - return BPF_DROP; - return BPF_OK; + return export_flow_keys(keys, BPF_DROP); + return export_flow_keys(keys, BPF_OK); case IPPROTO_IPIP: keys->is_encap = true; return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); @@ -152,11 +169,11 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) case IPPROTO_GRE: gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); if (!gre) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (bpf_htons(gre->flags & GRE_VERSION)) /* Only inspect standard GRE packets with version 0 */ - return BPF_OK; + return export_flow_keys(keys, BPF_OK); keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ if (GRE_IS_CSUM(gre->flags)) @@ -172,7 +189,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), &_eth); if (!eth) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->thoff += sizeof(*eth); @@ -183,31 +200,31 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) case IPPROTO_TCP: tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); if (!tcp) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (tcp->doff < 5) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if ((__u8 *)tcp + (tcp->doff << 2) > data_end) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->sport = tcp->source; keys->dport = tcp->dest; - return BPF_OK; + return export_flow_keys(keys, BPF_OK); case IPPROTO_UDP: case IPPROTO_UDPLITE: udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); if (!udp) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->sport = udp->source; keys->dport = udp->dest; - return BPF_OK; + return export_flow_keys(keys, BPF_OK); default: - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) @@ -227,7 +244,7 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) return parse_ip_proto(skb, nexthdr); } - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } PROG(IP)(struct __sk_buff *skb) @@ -240,11 +257,11 @@ PROG(IP)(struct __sk_buff *skb) iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); if (!iph) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); /* IP header cannot be smaller than 20 bytes */ if (iph->ihl < 5) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->addr_proto = ETH_P_IP; keys->ipv4_src = iph->saddr; @@ -252,7 +269,7 @@ PROG(IP)(struct __sk_buff *skb) keys->thoff += iph->ihl << 2; if (data + keys->thoff > data_end) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { keys->is_frag = true; @@ -266,7 +283,7 @@ PROG(IP)(struct __sk_buff *skb) } if (done) - return BPF_OK; + return export_flow_keys(keys, BPF_OK); return parse_ip_proto(skb, iph->protocol); } @@ -278,7 +295,7 @@ PROG(IPV6)(struct __sk_buff *skb) ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); if (!ip6h) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->addr_proto = ETH_P_IPV6; memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); @@ -290,11 +307,12 @@ PROG(IPV6)(struct __sk_buff *skb) PROG(IPV6OP)(struct __sk_buff *skb) { + struct bpf_flow_keys *keys = skb->flow_keys; struct ipv6_opt_hdr *ip6h, _ip6h; ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); if (!ip6h) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); /* hlen is in 8-octets and does not include the first 8 bytes * of the header @@ -311,7 +329,7 @@ PROG(IPV6FR)(struct __sk_buff *skb) fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); if (!fragh) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->thoff += sizeof(*fragh); keys->is_frag = true; @@ -323,48 +341,46 @@ PROG(IPV6FR)(struct __sk_buff *skb) PROG(MPLS)(struct __sk_buff *skb) { + struct bpf_flow_keys *keys = skb->flow_keys; struct mpls_label *mpls, _mpls; mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); if (!mpls) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); - return BPF_OK; + return export_flow_keys(keys, BPF_OK); } PROG(VLAN)(struct __sk_buff *skb) { struct bpf_flow_keys *keys = skb->flow_keys; struct vlan_hdr *vlan, _vlan; - __be16 proto; - - /* Peek back to see if single or double-tagging */ - if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto, - sizeof(proto))) - return BPF_DROP; /* Account for double-tagging */ - if (proto == bpf_htons(ETH_P_8021AD)) { + if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); + keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); } vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); + keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); + keys->n_proto = vlan->h_vlan_encapsulated_proto; return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); } diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c new file mode 100644 index 000000000000..5ab14e941980 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_data.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Isovalent, Inc. + +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <string.h> + +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") result_number = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 11, +}; + +struct bpf_map_def SEC("maps") result_string = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = 32, + .max_entries = 5, +}; + +struct foo { + __u8 a; + __u32 b; + __u64 c; +}; + +struct bpf_map_def SEC("maps") result_struct = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct foo), + .max_entries = 5, +}; + +/* Relocation tests for __u64s. */ +static __u64 num0; +static __u64 num1 = 42; +static const __u64 num2 = 24; +static __u64 num3 = 0; +static __u64 num4 = 0xffeeff; +static const __u64 num5 = 0xabab; +static const __u64 num6 = 0xab; + +/* Relocation tests for strings. */ +static const char str0[32] = "abcdefghijklmnopqrstuvwxyz"; +static char str1[32] = "abcdefghijklmnopqrstuvwxyz"; +static char str2[32]; + +/* Relocation tests for structs. */ +static const struct foo struct0 = { + .a = 42, + .b = 0xfefeefef, + .c = 0x1111111111111111ULL, +}; +static struct foo struct1; +static const struct foo struct2; +static struct foo struct3 = { + .a = 41, + .b = 0xeeeeefef, + .c = 0x2111111111111111ULL, +}; + +#define test_reloc(map, num, var) \ + do { \ + __u32 key = num; \ + bpf_map_update_elem(&result_##map, &key, var, 0); \ + } while (0) + +SEC("static_data_load") +int load_static_data(struct __sk_buff *skb) +{ + static const __u64 bar = ~0; + + test_reloc(number, 0, &num0); + test_reloc(number, 1, &num1); + test_reloc(number, 2, &num2); + test_reloc(number, 3, &num3); + test_reloc(number, 4, &num4); + test_reloc(number, 5, &num5); + num4 = 1234; + test_reloc(number, 6, &num4); + test_reloc(number, 7, &num0); + test_reloc(number, 8, &num6); + + test_reloc(string, 0, str0); + test_reloc(string, 1, str1); + test_reloc(string, 2, str2); + str1[5] = 'x'; + test_reloc(string, 3, str1); + __builtin_memcpy(&str2[2], "hello", sizeof("hello")); + test_reloc(string, 4, str2); + + test_reloc(struct, 0, &struct0); + test_reloc(struct, 1, &struct1); + test_reloc(struct, 2, &struct2); + test_reloc(struct, 3, &struct3); + + test_reloc(number, 9, &struct0.c); + test_reloc(number, 10, &bar); + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h new file mode 100644 index 000000000000..3d12c11a8d47 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_jhash.h @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +typedef unsigned int u32; + +static __attribute__((always_inline)) u32 rol32(u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +static ATTR +u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(volatile u32 *)(k); + b += *(volatile u32 *)(k + 4); + c += *(volatile u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + c ^= a; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c new file mode 100644 index 000000000000..7a80960d7df1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; + +SEC("skb_ctx") +int process(struct __sk_buff *skb) +{ + #pragma clang loop unroll(full) + for (int i = 0; i < 5; i++) { + if (skb->cb[i] != i + 1) + return 1; + skb->cb[i]++; + } + skb->priority++; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c index de1a43e8f610..1c39e4ccb7f1 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -8,40 +8,78 @@ #include "bpf_helpers.h" #include "bpf_endian.h" -enum bpf_array_idx { - SRV_IDX, - CLI_IDX, - __NR_BPF_ARRAY_IDX, +enum bpf_addr_array_idx { + ADDR_SRV_IDX, + ADDR_CLI_IDX, + __NR_BPF_ADDR_ARRAY_IDX, +}; + +enum bpf_result_array_idx { + EGRESS_SRV_IDX, + EGRESS_CLI_IDX, + INGRESS_LISTEN_IDX, + __NR_BPF_RESULT_ARRAY_IDX, +}; + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, }; struct bpf_map_def SEC("maps") addr_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct sockaddr_in6), - .max_entries = __NR_BPF_ARRAY_IDX, + .max_entries = __NR_BPF_ADDR_ARRAY_IDX, }; struct bpf_map_def SEC("maps") sock_result_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct bpf_sock), - .max_entries = __NR_BPF_ARRAY_IDX, + .max_entries = __NR_BPF_RESULT_ARRAY_IDX, }; struct bpf_map_def SEC("maps") tcp_sock_result_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct bpf_tcp_sock), - .max_entries = __NR_BPF_ARRAY_IDX, + .max_entries = __NR_BPF_RESULT_ARRAY_IDX, }; struct bpf_map_def SEC("maps") linum_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(__u32), - .max_entries = 1, + .max_entries = __NR_BPF_LINUM_ARRAY_IDX, +}; + +struct bpf_spinlock_cnt { + struct bpf_spin_lock lock; + __u32 cnt; }; +struct bpf_map_def SEC("maps") sk_pkt_out_cnt = { + .type = BPF_MAP_TYPE_SK_STORAGE, + .key_size = sizeof(int), + .value_size = sizeof(struct bpf_spinlock_cnt), + .max_entries = 0, + .map_flags = BPF_F_NO_PREALLOC, +}; + +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt); + +struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = { + .type = BPF_MAP_TYPE_SK_STORAGE, + .key_size = sizeof(int), + .value_size = sizeof(struct bpf_spinlock_cnt), + .max_entries = 0, + .map_flags = BPF_F_NO_PREALLOC, +}; + +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt); + static bool is_loopback6(__u32 *a6) { return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); @@ -100,18 +138,22 @@ static void tpcpy(struct bpf_tcp_sock *dst, #define RETURN { \ linum = __LINE__; \ - bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \ + bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \ return 1; \ } SEC("cgroup_skb/egress") -int read_sock_fields(struct __sk_buff *skb) +int egress_read_sock_fields(struct __sk_buff *skb) { - __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx; + struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; + __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; + struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; struct sockaddr_in6 *srv_sa6, *cli_sa6; struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; - __u32 linum, idx0 = 0; + __u32 linum, linum_idx; + + linum_idx = EGRESS_LINUM_IDX; sk = skb->sk; if (!sk || sk->state == 10) @@ -132,14 +174,81 @@ int read_sock_fields(struct __sk_buff *skb) RETURN; if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) - idx = srv_idx; + result_idx = EGRESS_SRV_IDX; else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) - idx = cli_idx; + result_idx = EGRESS_CLI_IDX; else RETURN; - sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx); + sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); + if (!sk_ret || !tp_ret) + RETURN; + + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); + + if (result_idx == EGRESS_SRV_IDX) { + /* The userspace has created it for srv sk */ + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, + 0, 0); + } else { + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, + &cli_cnt_init, + BPF_SK_STORAGE_GET_F_CREATE); + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, + sk, &cli_cnt_init, + BPF_SK_STORAGE_GET_F_CREATE); + } + + if (!pkt_out_cnt || !pkt_out_cnt10) + RETURN; + + /* Even both cnt and cnt10 have lock defined in their BTF, + * intentionally one cnt takes lock while one does not + * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE. + */ + pkt_out_cnt->cnt += 1; + bpf_spin_lock(&pkt_out_cnt10->lock); + pkt_out_cnt10->cnt += 10; + bpf_spin_unlock(&pkt_out_cnt10->lock); + + RETURN; +} + +SEC("cgroup_skb/ingress") +int ingress_read_sock_fields(struct __sk_buff *skb) +{ + __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX; + struct bpf_tcp_sock *tp, *tp_ret; + struct bpf_sock *sk, *sk_ret; + struct sockaddr_in6 *srv_sa6; + __u32 linum, linum_idx; + + linum_idx = INGRESS_LINUM_IDX; + + sk = skb->sk; + if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6)) + RETURN; + + srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); + if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port)) + RETURN; + + if (sk->state != 10 && sk->state != 12) + RETURN; + + sk = bpf_get_listener_sock(sk); + if (!sk) + RETURN; + + tp = bpf_tcp_sock(sk); + if (!tp) + RETURN; + + sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); if (!sk_ret || !tp_ret) RETURN; diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c new file mode 100644 index 000000000000..a295cad805d7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <stdint.h> +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> + +#include "bpf_helpers.h" +#include "bpf_util.h" + +/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */ +#define MAX_ULONG_STR_LEN 0xF + +/* Max supported length of sysctl value string (pow2). */ +#define MAX_VALUE_STR_LEN 0x40 + +static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) +{ + char tcp_mem_name[] = "net/ipv4/tcp_mem"; + unsigned char i; + char name[64]; + int ret; + + memset(name, 0, sizeof(name)); + ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0); + if (ret < 0 || ret != sizeof(tcp_mem_name) - 1) + return 0; + +#pragma clang loop unroll(full) + for (i = 0; i < sizeof(tcp_mem_name); ++i) + if (name[i] != tcp_mem_name[i]) + return 0; + + return 1; +} + +SEC("cgroup/sysctl") +int sysctl_tcp_mem(struct bpf_sysctl *ctx) +{ + unsigned long tcp_mem[3] = {0, 0, 0}; + char value[MAX_VALUE_STR_LEN]; + unsigned char i, off = 0; + int ret; + + if (ctx->write) + return 0; + + if (!is_tcp_mem(ctx)) + return 0; + + ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN); + if (ret < 0 || ret >= MAX_VALUE_STR_LEN) + return 0; + +#pragma clang loop unroll(full) + for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) { + ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0, + tcp_mem + i); + if (ret <= 0 || ret > MAX_ULONG_STR_LEN) + return 0; + off += ret & MAX_ULONG_STR_LEN; + } + + + return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2]; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c new file mode 100644 index 000000000000..3af64c470d64 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/pkt_cls.h> +#include <linux/tcp.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +/* the maximum delay we are willing to add (drop packets beyond that) */ +#define TIME_HORIZON_NS (2000 * 1000 * 1000) +#define NS_PER_SEC 1000000000 +#define ECN_HORIZON_NS 5000000 +#define THROTTLE_RATE_BPS (5 * 1000 * 1000) + +/* flow_key => last_tstamp timestamp used */ +struct bpf_map_def SEC("maps") flow_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(uint32_t), + .value_size = sizeof(uint64_t), + .max_entries = 1, +}; + +static inline int throttle_flow(struct __sk_buff *skb) +{ + int key = 0; + uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key); + uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC / + THROTTLE_RATE_BPS; + uint64_t now = bpf_ktime_get_ns(); + uint64_t tstamp, next_tstamp = 0; + + if (last_tstamp) + next_tstamp = *last_tstamp + delay_ns; + + tstamp = skb->tstamp; + if (tstamp < now) + tstamp = now; + + /* should we throttle? */ + if (next_tstamp <= tstamp) { + if (bpf_map_update_elem(&flow_map, &key, &tstamp, BPF_ANY)) + return TC_ACT_SHOT; + return TC_ACT_OK; + } + + /* do not queue past the time horizon */ + if (next_tstamp - now >= TIME_HORIZON_NS) + return TC_ACT_SHOT; + + /* set ecn bit, if needed */ + if (next_tstamp - now >= ECN_HORIZON_NS) + bpf_skb_ecn_set_ce(skb); + + if (bpf_map_update_elem(&flow_map, &key, &next_tstamp, BPF_EXIST)) + return TC_ACT_SHOT; + skb->tstamp = next_tstamp; + + return TC_ACT_OK; +} + +static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp) +{ + void *data_end = (void *)(long)skb->data_end; + + /* drop malformed packets */ + if ((void *)(tcp + 1) > data_end) + return TC_ACT_SHOT; + + if (tcp->dest == bpf_htons(9000)) + return throttle_flow(skb); + + return TC_ACT_OK; +} + +static inline int handle_ipv4(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct iphdr *iph; + uint32_t ihl; + + /* drop malformed packets */ + if (data + sizeof(struct ethhdr) > data_end) + return TC_ACT_SHOT; + iph = (struct iphdr *)(data + sizeof(struct ethhdr)); + if ((void *)(iph + 1) > data_end) + return TC_ACT_SHOT; + ihl = iph->ihl * 4; + if (((void *)iph) + ihl > data_end) + return TC_ACT_SHOT; + + if (iph->protocol == IPPROTO_TCP) + return handle_tcp(skb, (struct tcphdr *)(((void *)iph) + ihl)); + + return TC_ACT_OK; +} + +SEC("cls_test") int tc_prog(struct __sk_buff *skb) +{ + if (skb->protocol == bpf_htons(ETH_P_IP)) + return handle_ipv4(skb); + + return TC_ACT_OK; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c new file mode 100644 index 000000000000..74370e7e286d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* In-place tunneling */ + +#include <stdbool.h> +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/mpls.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/pkt_cls.h> +#include <linux/types.h> + +#include "bpf_endian.h" +#include "bpf_helpers.h" + +static const int cfg_port = 8000; + +static const int cfg_udp_src = 20000; + +#define UDP_PORT 5555 +#define MPLS_OVER_UDP_PORT 6635 +#define ETH_OVER_UDP_PORT 7777 + +/* MPLS label 1000 with S bit (last label) set and ttl of 255. */ +static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | + MPLS_LS_S_MASK | 0xff); + +struct gre_hdr { + __be16 flags; + __be16 protocol; +} __attribute__((packed)); + +union l4hdr { + struct udphdr udp; + struct gre_hdr gre; +}; + +struct v4hdr { + struct iphdr ip; + union l4hdr l4hdr; + __u8 pad[16]; /* enough space for L2 header */ +} __attribute__((packed)); + +struct v6hdr { + struct ipv6hdr ip; + union l4hdr l4hdr; + __u8 pad[16]; /* enough space for L2 header */ +} __attribute__((packed)); + +static __always_inline void set_ipv4_csum(struct iphdr *iph) +{ + __u16 *iph16 = (__u16 *)iph; + __u32 csum; + int i; + + iph->check = 0; + +#pragma clang loop unroll(full) + for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++) + csum += *iph16++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); +} + +static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, + __u16 l2_proto) +{ + __u16 udp_dst = UDP_PORT; + struct iphdr iph_inner; + struct v4hdr h_outer; + struct tcphdr tcph; + int olen, l2_len; + int tcp_off; + __u64 flags; + + /* Most tests encapsulate a packet into a tunnel with the same + * network protocol, and derive the outer header fields from + * the inner header. + * + * The 6in4 case tests different inner and outer protocols. As + * the inner is ipv6, but the outer expects an ipv4 header as + * input, manually build a struct iphdr based on the ipv6hdr. + */ + if (encap_proto == IPPROTO_IPV6) { + const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1; + const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2; + struct ipv6hdr iph6_inner; + + /* Read the IPv6 header */ + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner, + sizeof(iph6_inner)) < 0) + return TC_ACT_OK; + + /* Derive the IPv4 header fields from the IPv6 header */ + memset(&iph_inner, 0, sizeof(iph_inner)); + iph_inner.version = 4; + iph_inner.ihl = 5; + iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) + + bpf_ntohs(iph6_inner.payload_len)); + iph_inner.ttl = iph6_inner.hop_limit - 1; + iph_inner.protocol = iph6_inner.nexthdr; + iph_inner.saddr = __bpf_constant_htonl(saddr); + iph_inner.daddr = __bpf_constant_htonl(daddr); + + tcp_off = sizeof(iph6_inner); + } else { + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, + sizeof(iph_inner)) < 0) + return TC_ACT_OK; + + tcp_off = sizeof(iph_inner); + } + + /* filter only packets we want */ + if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP) + return TC_ACT_OK; + + if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off, + &tcph, sizeof(tcph)) < 0) + return TC_ACT_OK; + + if (tcph.dest != __bpf_constant_htons(cfg_port)) + return TC_ACT_OK; + + olen = sizeof(h_outer.ip); + l2_len = 0; + + flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4; + + switch (l2_proto) { + case ETH_P_MPLS_UC: + l2_len = sizeof(mpls_label); + udp_dst = MPLS_OVER_UDP_PORT; + break; + case ETH_P_TEB: + l2_len = ETH_HLEN; + udp_dst = ETH_OVER_UDP_PORT; + break; + } + flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); + + switch (encap_proto) { + case IPPROTO_GRE: + flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; + olen += sizeof(h_outer.l4hdr.gre); + h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto); + h_outer.l4hdr.gre.flags = 0; + break; + case IPPROTO_UDP: + flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP; + olen += sizeof(h_outer.l4hdr.udp); + h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); + h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); + h_outer.l4hdr.udp.check = 0; + h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) + + sizeof(h_outer.l4hdr.udp) + + l2_len); + break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + break; + default: + return TC_ACT_OK; + } + + /* add L2 encap (if specified) */ + switch (l2_proto) { + case ETH_P_MPLS_UC: + *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; + break; + case ETH_P_TEB: + if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, + ETH_HLEN)) + return TC_ACT_SHOT; + break; + } + olen += l2_len; + + /* add room between mac and network header */ + if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) + return TC_ACT_SHOT; + + /* prepare new outer network header */ + h_outer.ip = iph_inner; + h_outer.ip.tot_len = bpf_htons(olen + + bpf_ntohs(h_outer.ip.tot_len)); + h_outer.ip.protocol = encap_proto; + + set_ipv4_csum((void *)&h_outer.ip); + + /* store new outer network header */ + if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, + BPF_F_INVALIDATE_HASH) < 0) + return TC_ACT_SHOT; + + /* if changing outer proto type, update eth->h_proto */ + if (encap_proto == IPPROTO_IPV6) { + struct ethhdr eth; + + if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0) + return TC_ACT_SHOT; + eth.h_proto = bpf_htons(ETH_P_IP); + if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0) + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, + __u16 l2_proto) +{ + __u16 udp_dst = UDP_PORT; + struct ipv6hdr iph_inner; + struct v6hdr h_outer; + struct tcphdr tcph; + int olen, l2_len; + __u16 tot_len; + __u64 flags; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, + sizeof(iph_inner)) < 0) + return TC_ACT_OK; + + /* filter only packets we want */ + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner), + &tcph, sizeof(tcph)) < 0) + return TC_ACT_OK; + + if (tcph.dest != __bpf_constant_htons(cfg_port)) + return TC_ACT_OK; + + olen = sizeof(h_outer.ip); + l2_len = 0; + + flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6; + + switch (l2_proto) { + case ETH_P_MPLS_UC: + l2_len = sizeof(mpls_label); + udp_dst = MPLS_OVER_UDP_PORT; + break; + case ETH_P_TEB: + l2_len = ETH_HLEN; + udp_dst = ETH_OVER_UDP_PORT; + break; + } + flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); + + switch (encap_proto) { + case IPPROTO_GRE: + flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; + olen += sizeof(h_outer.l4hdr.gre); + h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto); + h_outer.l4hdr.gre.flags = 0; + break; + case IPPROTO_UDP: + flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP; + olen += sizeof(h_outer.l4hdr.udp); + h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); + h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); + tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) + + sizeof(h_outer.l4hdr.udp); + h_outer.l4hdr.udp.check = 0; + h_outer.l4hdr.udp.len = bpf_htons(tot_len); + break; + case IPPROTO_IPV6: + break; + default: + return TC_ACT_OK; + } + + /* add L2 encap (if specified) */ + switch (l2_proto) { + case ETH_P_MPLS_UC: + *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; + break; + case ETH_P_TEB: + if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, + ETH_HLEN)) + return TC_ACT_SHOT; + break; + } + olen += l2_len; + + /* add room between mac and network header */ + if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) + return TC_ACT_SHOT; + + /* prepare new outer network header */ + h_outer.ip = iph_inner; + h_outer.ip.payload_len = bpf_htons(olen + + bpf_ntohs(h_outer.ip.payload_len)); + + h_outer.ip.nexthdr = encap_proto; + + /* store new outer network header */ + if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, + BPF_F_INVALIDATE_HASH) < 0) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +SEC("encap_ipip_none") +int __encap_ipip_none(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP); + else + return TC_ACT_OK; +} + +SEC("encap_gre_none") +int __encap_gre_none(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP); + else + return TC_ACT_OK; +} + +SEC("encap_gre_mpls") +int __encap_gre_mpls(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC); + else + return TC_ACT_OK; +} + +SEC("encap_gre_eth") +int __encap_gre_eth(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB); + else + return TC_ACT_OK; +} + +SEC("encap_udp_none") +int __encap_udp_none(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP); + else + return TC_ACT_OK; +} + +SEC("encap_udp_mpls") +int __encap_udp_mpls(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC); + else + return TC_ACT_OK; +} + +SEC("encap_udp_eth") +int __encap_udp_eth(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB); + else + return TC_ACT_OK; +} + +SEC("encap_sit_none") +int __encap_sit_none(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP); + else + return TC_ACT_OK; +} + +SEC("encap_ip6tnl_none") +int __encap_ip6tnl_none(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6); + else + return TC_ACT_OK; +} + +SEC("encap_ip6gre_none") +int __encap_ip6gre_none(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6); + else + return TC_ACT_OK; +} + +SEC("encap_ip6gre_mpls") +int __encap_ip6gre_mpls(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC); + else + return TC_ACT_OK; +} + +SEC("encap_ip6gre_eth") +int __encap_ip6gre_eth(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB); + else + return TC_ACT_OK; +} + +SEC("encap_ip6udp_none") +int __encap_ip6udp_none(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6); + else + return TC_ACT_OK; +} + +SEC("encap_ip6udp_mpls") +int __encap_ip6udp_mpls(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC); + else + return TC_ACT_OK; +} + +SEC("encap_ip6udp_eth") +int __encap_ip6udp_eth(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB); + else + return TC_ACT_OK; +} + +static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) +{ + char buf[sizeof(struct v6hdr)]; + struct gre_hdr greh; + struct udphdr udph; + int olen = len; + + switch (proto) { + case IPPROTO_IPIP: + case IPPROTO_IPV6: + break; + case IPPROTO_GRE: + olen += sizeof(struct gre_hdr); + if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0) + return TC_ACT_OK; + switch (bpf_ntohs(greh.protocol)) { + case ETH_P_MPLS_UC: + olen += sizeof(mpls_label); + break; + case ETH_P_TEB: + olen += ETH_HLEN; + break; + } + break; + case IPPROTO_UDP: + olen += sizeof(struct udphdr); + if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0) + return TC_ACT_OK; + switch (bpf_ntohs(udph.dest)) { + case MPLS_OVER_UDP_PORT: + olen += sizeof(mpls_label); + break; + case ETH_OVER_UDP_PORT: + olen += ETH_HLEN; + break; + } + break; + default: + return TC_ACT_OK; + } + + if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, + BPF_F_ADJ_ROOM_FIXED_GSO)) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +static int decap_ipv4(struct __sk_buff *skb) +{ + struct iphdr iph_outer; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, + sizeof(iph_outer)) < 0) + return TC_ACT_OK; + + if (iph_outer.ihl != 5) + return TC_ACT_OK; + + return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), + iph_outer.protocol); +} + +static int decap_ipv6(struct __sk_buff *skb) +{ + struct ipv6hdr iph_outer; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, + sizeof(iph_outer)) < 0) + return TC_ACT_OK; + + return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), + iph_outer.nexthdr); +} + +SEC("decap") +int decap_f(struct __sk_buff *skb) +{ + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + return decap_ipv4(skb); + case __bpf_constant_htons(ETH_P_IPV6): + return decap_ipv6(skb); + default: + /* does not match, ignore */ + return TC_ACT_OK; + } +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c new file mode 100644 index 000000000000..1ab095bcacd8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook +// Copyright (c) 2019 Cloudflare + +#include <string.h> + +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <sys/socket.h> +#include <linux/tcp.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +struct bpf_map_def SEC("maps") results = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +static __always_inline void check_syncookie(void *ctx, void *data, + void *data_end) +{ + struct bpf_sock_tuple tup; + struct bpf_sock *sk; + struct ethhdr *ethh; + struct iphdr *ipv4h; + struct ipv6hdr *ipv6h; + struct tcphdr *tcph; + int ret; + __u32 key = 0; + __u64 value = 1; + + ethh = data; + if (ethh + 1 > data_end) + return; + + switch (bpf_ntohs(ethh->h_proto)) { + case ETH_P_IP: + ipv4h = data + sizeof(struct ethhdr); + if (ipv4h + 1 > data_end) + return; + + if (ipv4h->ihl != 5) + return; + + tcph = data + sizeof(struct ethhdr) + sizeof(struct iphdr); + if (tcph + 1 > data_end) + return; + + tup.ipv4.saddr = ipv4h->saddr; + tup.ipv4.daddr = ipv4h->daddr; + tup.ipv4.sport = tcph->source; + tup.ipv4.dport = tcph->dest; + + sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv4), + BPF_F_CURRENT_NETNS, 0); + if (!sk) + return; + + if (sk->state != BPF_TCP_LISTEN) + goto release; + + ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h), + tcph, sizeof(*tcph)); + break; + + case ETH_P_IPV6: + ipv6h = data + sizeof(struct ethhdr); + if (ipv6h + 1 > data_end) + return; + + if (ipv6h->nexthdr != IPPROTO_TCP) + return; + + tcph = data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + if (tcph + 1 > data_end) + return; + + memcpy(tup.ipv6.saddr, &ipv6h->saddr, sizeof(tup.ipv6.saddr)); + memcpy(tup.ipv6.daddr, &ipv6h->daddr, sizeof(tup.ipv6.daddr)); + tup.ipv6.sport = tcph->source; + tup.ipv6.dport = tcph->dest; + + sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv6), + BPF_F_CURRENT_NETNS, 0); + if (!sk) + return; + + if (sk->state != BPF_TCP_LISTEN) + goto release; + + ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h), + tcph, sizeof(*tcph)); + break; + + default: + return; + } + + if (ret == 0) + bpf_map_update_elem(&results, &key, &value, 0); + +release: + bpf_sk_release(sk); +} + +SEC("clsact/check_syncookie") +int check_syncookie_clsact(struct __sk_buff *skb) +{ + check_syncookie(skb, (void *)(long)skb->data, + (void *)(long)skb->data_end); + return TC_ACT_OK; +} + +SEC("xdp/check_syncookie") +int check_syncookie_xdp(struct xdp_md *ctx) +{ + check_syncookie(ctx, (void *)(long)ctx->data, + (void *)(long)ctx->data_end); + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 74f73b33a7b0..c7c3240e0dd4 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <stddef.h> #include <string.h> +#include <netinet/in.h> #include <linux/bpf.h> #include <linux/if_ether.h> #include <linux/if_packet.h> @@ -9,7 +10,6 @@ #include <linux/types.h> #include <linux/socket.h> #include <linux/tcp.h> -#include <netinet/in.h> #include "bpf_helpers.h" #include "bpf_endian.h" #include "test_tcpbpf.h" diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index edbca203ce2d..ec6db6e64c41 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <stddef.h> #include <string.h> +#include <netinet/in.h> #include <linux/bpf.h> #include <linux/if_ether.h> #include <linux/if_packet.h> @@ -9,7 +10,6 @@ #include <linux/types.h> #include <linux/socket.h> #include <linux/tcp.h> -#include <netinet/in.h> #include "bpf_helpers.h" #include "bpf_endian.h" #include "test_tcpnotify.h" diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c new file mode 100644 index 000000000000..f3236ce35f31 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include "bpf_helpers.h" +#define ATTR __attribute__((noinline)) +#include "test_jhash.h" + +SEC("scale90_noinline") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + void *ptr; + int ret = 0, nh_off, i = 0; + + nh_off = 14; + + /* pragma unroll doesn't work on large loops */ + +#define C do { \ + ptr = data + i; \ + if (ptr + nh_off > data_end) \ + break; \ + ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \ + } while (0); +#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C; + C30;C30;C30; /* 90 calls */ + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c new file mode 100644 index 000000000000..77830693eccb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include "bpf_helpers.h" +#define ATTR __attribute__((always_inline)) +#include "test_jhash.h" + +SEC("scale90_inline") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + void *ptr; + int ret = 0, nh_off, i = 0; + + nh_off = 14; + + /* pragma unroll doesn't work on large loops */ + +#define C do { \ + ptr = data + i; \ + if (ptr + nh_off > data_end) \ + break; \ + ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \ + } while (0); +#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C; + C30;C30;C30; /* 90 calls */ + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c new file mode 100644 index 000000000000..1848da04ea41 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include "bpf_helpers.h" +#define ATTR __attribute__((noinline)) +#include "test_jhash.h" + +SEC("scale90_noinline32") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + void *ptr; + int ret = 0, nh_off, i = 0; + + nh_off = 32; + + /* pragma unroll doesn't work on large loops */ + +#define C do { \ + ptr = data + i; \ + if (ptr + nh_off > data_end) \ + break; \ + ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \ + } while (0); +#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C; + C30;C30;C30; /* 90 calls */ + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 38797aa627a7..42c1ce988945 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -24,6 +24,7 @@ #include "bpf_rlimit.h" #include "bpf_util.h" +#include "test_btf.h" #define MAX_INSNS 512 #define MAX_SUBPROGS 16 @@ -58,63 +59,6 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)), return vfprintf(stderr, format, args); } -#define BTF_INFO_ENC(kind, kind_flag, vlen) \ - ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) - -#define BTF_TYPE_ENC(name, info, size_or_type) \ - (name), (info), (size_or_type) - -#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ - ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) -#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ - BTF_INT_ENC(encoding, bits_offset, bits) - -#define BTF_FWD_ENC(name, kind_flag) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FWD, kind_flag, 0), 0) - -#define BTF_ARRAY_ENC(type, index_type, nr_elems) \ - (type), (index_type), (nr_elems) -#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \ - BTF_ARRAY_ENC(type, index_type, nr_elems) - -#define BTF_STRUCT_ENC(name, nr_elems, sz) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, nr_elems), sz) - -#define BTF_UNION_ENC(name, nr_elems, sz) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz) - -#define BTF_MEMBER_ENC(name, type, bits_offset) \ - (name), (type), (bits_offset) -#define BTF_ENUM_ENC(name, val) (name), (val) -#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \ - ((bitfield_size) << 24 | (bits_offset)) - -#define BTF_TYPEDEF_ENC(name, type) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type) - -#define BTF_PTR_ENC(type) \ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) - -#define BTF_CONST_ENC(type) \ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type) - -#define BTF_VOLATILE_ENC(type) \ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), type) - -#define BTF_RESTRICT_ENC(type) \ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), type) - -#define BTF_FUNC_PROTO_ENC(ret_type, nargs) \ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type) - -#define BTF_FUNC_PROTO_ARG_ENC(name, type) \ - (name), (type) - -#define BTF_FUNC_ENC(name, func_proto) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto) - #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f @@ -291,7 +235,6 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 3, .max_entries = 4, }, - { .descr = "struct test #3 Invalid member offset", .raw_types = { @@ -319,7 +262,664 @@ static struct btf_raw_test raw_tests[] = { .btf_load_err = true, .err_str = "Invalid member bits_offset", }, - +/* + * struct A { + * unsigned long long m; + * int n; + * char o; + * [3 bytes hole] + * int p[8]; + * }; + */ +{ + .descr = "global data test #1", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test1_map", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 1, + .value_type_id = 5, + .max_entries = 4, +}, +/* + * struct A { + * unsigned long long m; + * int n; + * char o; + * [3 bytes hole] + * int p[8]; + * }; + * static struct A t; <- in .bss + */ +{ + .descr = "global data test #2", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* .bss section */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48), + BTF_VAR_SECINFO_ENC(6, 0, 48), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 0, + .value_type_id = 7, + .max_entries = 1, +}, +{ + .descr = "global data test #3", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* static int t */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0t\0.bss", + .str_sec_size = sizeof("\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 3, + .max_entries = 1, +}, +{ + .descr = "global data test #4, unsupported linkage", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* static int t */ + BTF_VAR_ENC(NAME_TBD, 1, 2), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0t\0.bss", + .str_sec_size = sizeof("\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 3, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Linkage not supported", +}, +{ + .descr = "global data test #5, invalid var type", + .raw_types = { + /* static void t */ + BTF_VAR_ENC(NAME_TBD, 0, 0), /* [1] */ + /* .bss section */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(1, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0t\0.bss", + .str_sec_size = sizeof("\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #6, invalid var type (fwd type)", + .raw_types = { + /* union A */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */ + /* static union A t */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0.bss", + .str_sec_size = sizeof("\0A\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type", +}, +{ + .descr = "global data test #7, invalid var type (fwd type)", + .raw_types = { + /* union A */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */ + /* static union A t */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(1, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0.bss", + .str_sec_size = sizeof("\0A\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type", +}, +{ + .descr = "global data test #8, invalid var size", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* .bss section */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48), + BTF_VAR_SECINFO_ENC(6, 0, 47), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 0, + .value_type_id = 7, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid size", +}, +{ + .descr = "global data test #9, invalid var size", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* .bss section */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46), + BTF_VAR_SECINFO_ENC(6, 0, 48), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 0, + .value_type_id = 7, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid size", +}, +{ + .descr = "global data test #10, invalid var size", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* .bss section */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46), + BTF_VAR_SECINFO_ENC(6, 0, 46), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 0, + .value_type_id = 7, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid size", +}, +{ + .descr = "global data test #11, multiple section members", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* static int u */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ + /* .bss section */ /* [8] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), + BTF_VAR_SECINFO_ENC(6, 10, 48), + BTF_VAR_SECINFO_ENC(7, 58, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 62, + .key_type_id = 0, + .value_type_id = 8, + .max_entries = 1, +}, +{ + .descr = "global data test #12, invalid offset", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* static int u */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ + /* .bss section */ /* [8] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), + BTF_VAR_SECINFO_ENC(6, 10, 48), + BTF_VAR_SECINFO_ENC(7, 60, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 62, + .key_type_id = 0, + .value_type_id = 8, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid offset+size", +}, +{ + .descr = "global data test #13, invalid offset", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* static int u */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ + /* .bss section */ /* [8] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), + BTF_VAR_SECINFO_ENC(6, 10, 48), + BTF_VAR_SECINFO_ENC(7, 12, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 62, + .key_type_id = 0, + .value_type_id = 8, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid offset", +}, +{ + .descr = "global data test #14, invalid offset", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* static int u */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ + /* .bss section */ /* [8] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), + BTF_VAR_SECINFO_ENC(7, 58, 4), + BTF_VAR_SECINFO_ENC(6, 10, 48), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 62, + .key_type_id = 0, + .value_type_id = 8, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid offset", +}, +{ + .descr = "global data test #15, not var kind", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(1, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0.bss", + .str_sec_size = sizeof("\0A\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 3, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Not a VAR kind member", +}, +{ + .descr = "global data test #16, invalid var referencing sec", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [2] */ + BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */ + /* a section */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(3, 0, 4), + /* a section */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(6, 0, 4), + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #17, invalid var referencing var", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */ + /* a section */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(3, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #18, invalid var loop", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 2, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0aaa", + .str_sec_size = sizeof("\0A\0t\0aaa"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #19, invalid var referencing var", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 3, 0), /* [2] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #20, invalid ptr referencing var", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* PTR type_id=3 */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #21, var included in struct", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* VAR type_id=3; */ + /* } */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid member", +}, +{ + .descr = "global data test #22, array of var", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid elem", +}, /* Test member exceeds the size of struct. * * struct A { @@ -3677,6 +4277,7 @@ struct pprint_mapv { } aenum; uint32_t ui32b; uint32_t bits2c:2; + uint8_t si8_4[2][2]; }; #ifdef __SIZEOF_INT128__ @@ -3729,7 +4330,7 @@ static struct btf_raw_test pprint_test_template[] = { BTF_ENUM_ENC(NAME_TBD, 2), BTF_ENUM_ENC(NAME_TBD, 3), /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 10), 40), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 11), 40), BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */ BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */ BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */ @@ -3740,9 +4341,12 @@ static struct btf_raw_test pprint_test_template[] = { BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */ BTF_MEMBER_ENC(NAME_TBD, 11, 224), /* uint32_t ui32b */ BTF_MEMBER_ENC(NAME_TBD, 6, 256), /* bits2c */ + BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */ + BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */ + BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */ BTF_END_RAW, }, - BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"), + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"), .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ @@ -3791,7 +4395,7 @@ static struct btf_raw_test pprint_test_template[] = { BTF_ENUM_ENC(NAME_TBD, 2), BTF_ENUM_ENC(NAME_TBD, 3), /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40), BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ @@ -3802,9 +4406,12 @@ static struct btf_raw_test pprint_test_template[] = { BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ + BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */ + BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */ + BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */ BTF_END_RAW, }, - BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"), + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"), .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ @@ -3855,7 +4462,7 @@ static struct btf_raw_test pprint_test_template[] = { BTF_ENUM_ENC(NAME_TBD, 2), BTF_ENUM_ENC(NAME_TBD, 3), /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40), BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ @@ -3866,13 +4473,16 @@ static struct btf_raw_test pprint_test_template[] = { BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ + BTF_MEMBER_ENC(NAME_TBD, 20, BTF_MEMBER_OFFSET(0, 264)), /* si8_4 */ /* typedef unsigned int ___int */ /* [17] */ BTF_TYPEDEF_ENC(NAME_TBD, 18), BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6), /* [18] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15), /* [19] */ + BTF_TYPE_ARRAY_ENC(21, 1, 2), /* [20] */ + BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [21] */ BTF_END_RAW, }, - BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int"), + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int\0si8_4"), .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ @@ -4007,6 +4617,10 @@ static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, v->aenum = i & 0x03; v->ui32b = 4; v->bits2c = 1; + v->si8_4[0][0] = (cpu + i) & 0xff; + v->si8_4[0][1] = (cpu + i + 1) & 0xff; + v->si8_4[1][0] = (cpu + i + 2) & 0xff; + v->si8_4[1][1] = (cpu + i + 3) & 0xff; v = (void *)v + rounded_value_size; } } @@ -4040,7 +4654,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind, nexpected_line = snprintf(expected_line, line_size, "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," - "%u,0x%x}\n", + "%u,0x%x,[[%d,%d],[%d,%d]]}\n", percpu_map ? "\tcpu" : "", percpu_map ? cpu : next_key, v->ui32, v->si32, @@ -4054,7 +4668,9 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind, v->ui8a[6], v->ui8a[7], pprint_enum_str[v->aenum], v->ui32b, - v->bits2c); + v->bits2c, + v->si8_4[0][0], v->si8_4[0][1], + v->si8_4[1][0], v->si8_4[1][1]); } #ifdef __SIZEOF_INT128__ @@ -5777,6 +6393,53 @@ const struct btf_dedup_test dedup_tests[] = { }, }, { + .descr = "dedup: void equiv check", + /* + * // CU 1: + * struct s { + * struct {} *x; + * }; + * // CU 2: + * struct s { + * int *x; + * }; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ + BTF_PTR_ENC(1), /* [2] ptr -> [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + /* CU 2 */ + BTF_PTR_ENC(0), /* [4] ptr -> void */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0x"), + }, + .expect = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ + BTF_PTR_ENC(1), /* [2] ptr -> [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + /* CU 2 */ + BTF_PTR_ENC(0), /* [4] ptr -> void */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0x"), + }, + .opts = { + .dont_resolve_fwds = false, + .dedup_table_size = 1, /* force hash collisions */ + }, +}, +{ .descr = "dedup: all possible kinds (no duplicates)", .input = { .raw_types = { @@ -5874,6 +6537,95 @@ const struct btf_dedup_test dedup_tests[] = { .dont_resolve_fwds = false, }, }, +{ + .descr = "dedup: enum fwd resolution", + .input = { + .raw_types = { + /* [1] fwd enum 'e1' before full enum */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [2] full enum 'e1' after fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 123), + /* [3] full enum 'e2' before fwd */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(4), 456), + /* [4] fwd enum 'e2' after full enum */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [5] incompatible fwd enum with different size */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), + /* [6] incompatible full enum with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .expect = { + .raw_types = { + /* [1] full enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 123), + /* [2] full enum 'e2' */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(4), 456), + /* [3] incompatible fwd enum with different size */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), + /* [4] incompatible full enum with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: datasec and vars pass-through", + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* static int t */ + BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + /* int, referenced from [5] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */ + /* another static int t */ + BTF_VAR_ENC(NAME_NTH(2), 4, 0), /* [5] */ + /* another .bss section */ /* [6] */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(5, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0.bss\0t"), + }, + .expect = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* static int t */ + BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + /* another static int t */ + BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [4] */ + /* another .bss section */ /* [5] */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(4, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0.bss\0t"), + }, + .opts = { + .dont_resolve_fwds = false, + .dedup_table_size = 1 + }, +}, }; @@ -5903,6 +6655,10 @@ static int btf_type_size(const struct btf_type *t) return base_size + vlen * sizeof(struct btf_member); case BTF_KIND_FUNC_PROTO: return base_size + vlen * sizeof(struct btf_param); + case BTF_KIND_VAR: + return base_size + sizeof(struct btf_var); + case BTF_KIND_DATASEC: + return base_size + vlen * sizeof(struct btf_var_secinfo); default: fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); return -EINVAL; diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h new file mode 100644 index 000000000000..2023725f1962 --- /dev/null +++ b/tools/testing/selftests/bpf/test_btf.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Facebook */ + +#ifndef _TEST_BTF_H +#define _TEST_BTF_H + +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) + +#define BTF_TYPE_ENC(name, info, size_or_type) \ + (name), (info), (size_or_type) + +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) + +#define BTF_FWD_ENC(name, kind_flag) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FWD, kind_flag, 0), 0) + +#define BTF_ARRAY_ENC(type, index_type, nr_elems) \ + (type), (index_type), (nr_elems) +#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \ + BTF_ARRAY_ENC(type, index_type, nr_elems) + +#define BTF_STRUCT_ENC(name, nr_elems, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, nr_elems), sz) + +#define BTF_UNION_ENC(name, nr_elems, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz) + +#define BTF_VAR_ENC(name, type, linkage) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), type), (linkage) +#define BTF_VAR_SECINFO_ENC(type, offset, size) \ + (type), (offset), (size) + +#define BTF_MEMBER_ENC(name, type, bits_offset) \ + (name), (type), (bits_offset) +#define BTF_ENUM_ENC(name, val) (name), (val) +#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \ + ((bitfield_size) << 24 | (bits_offset)) + +#define BTF_TYPEDEF_ENC(name, type) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type) + +#define BTF_PTR_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) + +#define BTF_CONST_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type) + +#define BTF_VOLATILE_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), type) + +#define BTF_RESTRICT_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), type) + +#define BTF_FUNC_PROTO_ENC(ret_type, nargs) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type) + +#define BTF_FUNC_PROTO_ARG_ENC(name, type) \ + (name), (type) + +#define BTF_FUNC_ENC(name, func_proto) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto) + +#endif /* _TEST_BTF_H */ diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index d4d3391cc13a..acf7a74f97cd 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -129,6 +129,24 @@ setup() ip link set veth7 netns ${NS2} ip link set veth8 netns ${NS3} + if [ ! -z "${VRF}" ] ; then + ip -netns ${NS1} link add red type vrf table 1001 + ip -netns ${NS1} link set red up + ip -netns ${NS1} route add table 1001 unreachable default metric 8192 + ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192 + ip -netns ${NS1} link set veth1 vrf red + ip -netns ${NS1} link set veth5 vrf red + + ip -netns ${NS2} link add red type vrf table 1001 + ip -netns ${NS2} link set red up + ip -netns ${NS2} route add table 1001 unreachable default metric 8192 + ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192 + ip -netns ${NS2} link set veth2 vrf red + ip -netns ${NS2} link set veth3 vrf red + ip -netns ${NS2} link set veth6 vrf red + ip -netns ${NS2} link set veth7 vrf red + fi + # configure addesses: the top route (1-2-3-4) ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1 ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2 @@ -163,29 +181,29 @@ setup() # NS1 # top route - ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 - ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} # go top by default - ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 - ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} # go top by default + ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 ${VRF} + ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} ${VRF} # go top by default + ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF} + ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF} # go top by default # bottom route - ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 - ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} - ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} - ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 - ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} - ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} + ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 ${VRF} + ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} ${VRF} + ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} ${VRF} + ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF} + ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF} + ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF} # NS2 # top route - ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 - ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 - ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 - ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 + ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 ${VRF} + ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 ${VRF} + ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF} + ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF} # bottom route - ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 - ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 - ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 - ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 + ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 ${VRF} + ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 ${VRF} + ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF} + ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF} # NS3 # top route @@ -207,16 +225,16 @@ setup() ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255 ip -netns ${NS3} link set gre_dev up ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev - ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} - ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} + ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF} + ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF} # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255 ip -netns ${NS3} link set gre6_dev up ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev - ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} - ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} + ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF} + ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF} # rp_filter gets confused by what these tests are doing, so disable it ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 @@ -244,18 +262,18 @@ trap cleanup EXIT remove_routes_to_gredev() { - ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 - ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 - ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 - ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 + ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF} + ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF} + ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF} + ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF} } add_unreachable_routes_to_gredev() { - ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 - ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 - ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 - ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 + ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF} + ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF} + ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF} + ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF} } test_ping() @@ -265,10 +283,10 @@ test_ping() local RET=0 if [ "${PROTO}" == "IPv4" ] ; then - ip netns exec ${NS1} ping -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null + ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null RET=$? elif [ "${PROTO}" == "IPv6" ] ; then - ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null + ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null RET=$? else echo " test_ping: unknown PROTO: ${PROTO}" @@ -328,7 +346,7 @@ test_gso() test_egress() { local readonly ENCAP=$1 - echo "starting egress ${ENCAP} encap test" + echo "starting egress ${ENCAP} encap test ${VRF}" setup # by default, pings work @@ -336,26 +354,35 @@ test_egress() test_ping IPv6 0 # remove NS2->DST routes, ping fails - ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 - ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 + ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF} + ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF} test_ping IPv4 1 test_ping IPv6 1 # install replacement routes (LWT/eBPF), pings succeed if [ "${ENCAP}" == "IPv4" ] ; then - ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1 - ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1 + ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ + test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF} + ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ + test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF} elif [ "${ENCAP}" == "IPv6" ] ; then - ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1 - ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1 + ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ + test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF} + ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ + test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF} else echo " unknown encap ${ENCAP}" TEST_STATUS=1 fi test_ping IPv4 0 test_ping IPv6 0 - test_gso IPv4 - test_gso IPv6 + + # skip GSO tests with VRF: VRF routing needs properly assigned + # source IP/device, which is easy to do with ping and hard with dd/nc. + if [ -z "${VRF}" ] ; then + test_gso IPv4 + test_gso IPv6 + fi # a negative test: remove routes to GRE devices: ping fails remove_routes_to_gredev @@ -374,7 +401,7 @@ test_egress() test_ingress() { local readonly ENCAP=$1 - echo "starting ingress ${ENCAP} encap test" + echo "starting ingress ${ENCAP} encap test ${VRF}" setup # need to wait a bit for IPv6 to autoconf, otherwise @@ -385,18 +412,22 @@ test_ingress() test_ping IPv6 0 # remove NS2->DST routes, pings fail - ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 - ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 + ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF} + ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF} test_ping IPv4 1 test_ping IPv6 1 # install replacement routes (LWT/eBPF), pings succeed if [ "${ENCAP}" == "IPv4" ] ; then - ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2 - ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2 + ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ + test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF} + ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ + test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF} elif [ "${ENCAP}" == "IPv6" ] ; then - ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2 - ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2 + ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ + test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF} + ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ + test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF} else echo "FAIL: unknown encap ${ENCAP}" TEST_STATUS=1 @@ -418,6 +449,13 @@ test_ingress() process_test_results } +VRF="" +test_egress IPv4 +test_egress IPv6 +test_ingress IPv4 +test_ingress IPv6 + +VRF="vrf red" test_egress IPv4 test_egress IPv6 test_ingress IPv4 diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 3c627771f965..246f745cb006 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -27,6 +27,7 @@ #include "bpf_util.h" #include "bpf_rlimit.h" +#include "test_maps.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -36,15 +37,6 @@ static int skips; static int map_flags; -#define CHECK(condition, tag, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ - printf(format); \ - exit(-1); \ - } \ -}) - static void test_hashmap(unsigned int task, void *data) { long long key, next_key, first_key, value; @@ -1703,6 +1695,10 @@ static void run_all_tests(void) test_map_in_map(); } +#define DECLARE +#include <map_tests/tests.h> +#undef DECLARE + int main(void) { srand(time(NULL)); @@ -1713,6 +1709,10 @@ int main(void) map_flags = BPF_F_NO_PREALLOC; run_all_tests(); +#define CALL +#include <map_tests/tests.h> +#undef CALL + printf("test_maps: OK, %d SKIPPED\n", skips); return 0; } diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h new file mode 100644 index 000000000000..77d8587ac4ed --- /dev/null +++ b/tools/testing/selftests/bpf/test_maps.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TEST_MAPS_H +#define _TEST_MAPS_H + +#include <stdio.h> +#include <stdlib.h> + +#define CHECK(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ + printf(format); \ + exit(-1); \ + } \ +}) + +#endif diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 84bea3985d64..425f9ed27c3b 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 # Copyright (C) 2017 Netronome Systems, Inc. +# Copyright (c) 2019 Mellanox Technologies. All rights reserved # # This software is licensed under the GNU General License Version 2, # June 1991 as shown in the file COPYING in the top-level directory of this @@ -15,10 +16,12 @@ from datetime import datetime import argparse +import errno import json import os import pprint import random +import re import string import struct import subprocess @@ -306,6 +309,8 @@ class DebugfsDir: _, out = cmd('ls ' + path) for f in out.split(): + if f == "ports": + continue p = os.path.join(path, f) if os.path.isfile(p): _, out = cmd('cat %s/%s' % (path, f)) @@ -321,42 +326,112 @@ class DebugfsDir: return dfs -class NetdevSim: +class NetdevSimDev: """ - Class for netdevsim netdevice and its attributes. + Class for netdevsim bus device and its attributes. """ - def __init__(self, link=None): - self.link = link + def __init__(self, port_count=1): + addr = 0 + while True: + try: + with open("/sys/bus/netdevsim/new_device", "w") as f: + f.write("%u %u" % (addr, port_count)) + except OSError as e: + if e.errno == errno.ENOSPC: + addr += 1 + continue + raise e + break + self.addr = addr + + # As probe of netdevsim device might happen from a workqueue, + # so wait here until all netdevs appear. + self.wait_for_netdevs(port_count) + + ret, out = cmd("udevadm settle", fail=False) + if ret: + raise Exception("udevadm settle failed") + ifnames = self.get_ifnames() - self.dev = self._netdevsim_create() devs.append(self) + self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr + + self.nsims = [] + for port_index in range(port_count): + self.nsims.append(NetdevSim(self, port_index, ifnames[port_index])) + + def get_ifnames(self): + ifnames = [] + listdir = os.listdir("/sys/bus/netdevsim/devices/netdevsim%u/net/" % self.addr) + for ifname in listdir: + ifnames.append(ifname) + ifnames.sort() + return ifnames + + def wait_for_netdevs(self, port_count): + timeout = 5 + timeout_start = time.time() + + while True: + try: + ifnames = self.get_ifnames() + except FileNotFoundError as e: + ifnames = [] + if len(ifnames) == port_count: + break + if time.time() < timeout_start + timeout: + continue + raise Exception("netdevices did not appear within timeout") - self.ns = "" + def dfs_num_bound_progs(self): + path = os.path.join(self.dfs_dir, "bpf_bound_progs") + _, progs = cmd('ls %s' % (path)) + return len(progs.split()) - self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname']) - self.sdev_dir = self.dfs_dir + '/sdev/' - self.dfs_refresh() + def dfs_get_bound_progs(self, expected): + progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs")) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs bound, expected %d" % + (len(progs), expected)) + return progs - def __getitem__(self, key): - return self.dev[key] + def remove(self): + with open("/sys/bus/netdevsim/del_device", "w") as f: + f.write("%u" % self.addr) + devs.remove(self) - def _netdevsim_create(self): - link = "" if self.link is None else "link " + self.link.dev['ifname'] - _, old = ip("link show") - ip("link add sim%d {link} type netdevsim".format(link=link)) - _, new = ip("link show") + def remove_nsim(self, nsim): + self.nsims.remove(nsim) + with open("/sys/bus/netdevsim/devices/netdevsim%u/del_port" % self.addr ,"w") as f: + f.write("%u" % nsim.port_index) - for dev in new: - f = filter(lambda x: x["ifname"] == dev["ifname"], old) - if len(list(f)) == 0: - return dev +class NetdevSim: + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self, nsimdev, port_index, ifname): + # In case udev renamed the netdev to according to new schema, + # check if the name matches the port_index. + nsimnamere = re.compile("eni\d+np(\d+)") + match = nsimnamere.match(ifname) + if match and int(match.groups()[0]) != port_index + 1: + raise Exception("netdevice name mismatches the expected one") + + self.nsimdev = nsimdev + self.port_index = port_index + self.ns = "" + self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) + self.dfs_refresh() + _, [self.dev] = ip("link show dev %s" % ifname) - raise Exception("failed to create netdevsim device") + def __getitem__(self, key): + return self.dev[key] def remove(self): - devs.remove(self) - ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns) + self.nsimdev.remove_nsim(self) def dfs_refresh(self): self.dfs = DebugfsDir(self.dfs_dir) @@ -367,22 +442,9 @@ class NetdevSim: _, data = cmd('cat %s' % (path)) return data.strip() - def dfs_num_bound_progs(self): - path = os.path.join(self.sdev_dir, "bpf_bound_progs") - _, progs = cmd('ls %s' % (path)) - return len(progs.split()) - - def dfs_get_bound_progs(self, expected): - progs = DebugfsDir(os.path.join(self.sdev_dir, "bpf_bound_progs")) - if expected is not None: - if len(progs) != expected: - fail(True, "%d BPF programs bound, expected %d" % - (len(progs), expected)) - return progs - def wait_for_flush(self, bound=0, total=0, n_retry=20): for i in range(n_retry): - nbound = self.dfs_num_bound_progs() + nbound = self.nsimdev.dfs_num_bound_progs() nprogs = len(bpftool_prog_list()) if nbound == bound and nprogs == total: return @@ -612,7 +674,7 @@ def test_spurios_extack(sim, obj, skip_hw, needle): include_stderr=True) check_no_extack(res, needle) -def test_multi_prog(sim, obj, modename, modeid): +def test_multi_prog(simdev, sim, obj, modename, modeid): start_test("Test multi-attachment XDP - %s + offload..." % (modename or "default", )) sim.set_xdp(obj, "offload") @@ -668,11 +730,12 @@ def test_multi_prog(sim, obj, modename, modeid): check_multi_basic(two_xdps) start_test("Test multi-attachment XDP - device remove...") - sim.remove() + simdev.remove() - sim = NetdevSim() + simdev = NetdevSimDev() + sim, = simdev.nsims sim.set_ethtool_tc_offloads(True) - return sim + return [simdev, sim] # Parse command line parser = argparse.ArgumentParser() @@ -729,12 +792,14 @@ try: bytecode = bpf_bytecode("1,6 0 0 4294967295,") start_test("Test destruction of generic XDP...") - sim = NetdevSim() + simdev = NetdevSimDev() + sim, = simdev.nsims sim.set_xdp(obj, "generic") - sim.remove() + simdev.remove() bpftool_prog_list_wait(expected=0) - sim = NetdevSim() + simdev = NetdevSimDev() + sim, = simdev.nsims sim.tc_add_ingress() start_test("Test TC non-offloaded...") @@ -744,7 +809,7 @@ try: start_test("Test TC non-offloaded isn't getting bound...") ret, _ = sim.cls_bpf_add_filter(obj, fail=False) fail(ret != 0, "Software TC filter did not load") - sim.dfs_get_bound_progs(expected=0) + simdev.dfs_get_bound_progs(expected=0) sim.tc_flush_filters() @@ -761,7 +826,7 @@ try: start_test("Test TC offload by default...") ret, _ = sim.cls_bpf_add_filter(obj, fail=False) fail(ret != 0, "Software TC filter did not load") - sim.dfs_get_bound_progs(expected=0) + simdev.dfs_get_bound_progs(expected=0) ingress = sim.tc_show_ingress(expected=1) fltr = ingress[0] fail(not fltr["in_hw"], "Filter not offloaded by default") @@ -771,7 +836,7 @@ try: start_test("Test TC cBPF bytcode tries offload by default...") ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False) fail(ret != 0, "Software TC filter did not load") - sim.dfs_get_bound_progs(expected=0) + simdev.dfs_get_bound_progs(expected=0) ingress = sim.tc_show_ingress(expected=1) fltr = ingress[0] fail(not fltr["in_hw"], "Bytecode not offloaded by default") @@ -839,7 +904,7 @@ try: check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test TC offload basics...") - dfs = sim.dfs_get_bound_progs(expected=1) + dfs = simdev.dfs_get_bound_progs(expected=1) progs = bpftool_prog_list(expected=1) ingress = sim.tc_show_ingress(expected=1) @@ -874,18 +939,20 @@ try: start_test("Test destroying device gets rid of TC filters...") sim.cls_bpf_add_filter(obj, skip_sw=True) - sim.remove() + simdev.remove() bpftool_prog_list_wait(expected=0) - sim = NetdevSim() + simdev = NetdevSimDev() + sim, = simdev.nsims sim.set_ethtool_tc_offloads(True) start_test("Test destroying device gets rid of XDP...") sim.set_xdp(obj, "offload") - sim.remove() + simdev.remove() bpftool_prog_list_wait(expected=0) - sim = NetdevSim() + simdev = NetdevSimDev() + sim, = simdev.nsims sim.set_ethtool_tc_offloads(True) start_test("Test XDP prog reporting...") @@ -971,7 +1038,7 @@ try: check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test XDP offload is device bound...") - dfs = sim.dfs_get_bound_progs(expected=1) + dfs = simdev.dfs_get_bound_progs(expected=1) dprog = dfs[0] fail(prog["id"] != link_xdp["id"], "Program IDs don't match") @@ -990,7 +1057,8 @@ try: bpftool_prog_list_wait(expected=0) start_test("Test attempt to use a program for a wrong device...") - sim2 = NetdevSim() + simdev2 = NetdevSimDev() + sim2, = simdev2.nsims sim2.set_xdp(obj, "offload") pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") @@ -998,7 +1066,7 @@ try: fail=False, include_stderr=True) fail(ret == 0, "Pinned program loaded for a different device accepted") check_extack_nsim(err, "program bound to different dev.", args) - sim2.remove() + simdev2.remove() ret, _, err = sim.set_xdp(pinned, "offload", fail=False, include_stderr=True) fail(ret == 0, "Pinned program loaded for a removed device accepted") @@ -1006,9 +1074,9 @@ try: rm(pin_file) bpftool_prog_list_wait(expected=0) - sim = test_multi_prog(sim, obj, "", 1) - sim = test_multi_prog(sim, obj, "drv", 1) - sim = test_multi_prog(sim, obj, "generic", 2) + simdev, sim = test_multi_prog(simdev, sim, obj, "", 1) + simdev, sim = test_multi_prog(simdev, sim, obj, "drv", 1) + simdev, sim = test_multi_prog(simdev, sim, obj, "generic", 2) start_test("Test mixing of TC and XDP...") sim.tc_add_ingress() @@ -1055,15 +1123,15 @@ try: start_test("Test if netdev removal waits for translation...") delay_msec = 500 - sim.dfs["bpf_bind_verifier_delay"] = delay_msec + sim.dfs["dev/bpf_bind_verifier_delay"] = delay_msec start = time.time() cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \ (sim['ifname'], obj) tc_proc = cmd(cmd_line, background=True, fail=False) # Wait for the verifier to start - while sim.dfs_num_bound_progs() <= 2: + while simdev.dfs_num_bound_progs() <= 2: pass - sim.remove() + simdev.remove() end = time.time() ret, _ = cmd_result(tc_proc, fail=False) time_diff = end - start @@ -1078,7 +1146,8 @@ try: clean_up() bpftool_prog_list_wait(expected=0) - sim = NetdevSim() + simdev = NetdevSimDev() + sim, = simdev.nsims map_obj = bpf_obj("sample_map_ret0.o") start_test("Test loading program with maps...") sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON @@ -1100,7 +1169,7 @@ try: prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) - sim.remove() + simdev.remove() start_test("Test bpftool bound info reporting (removed dev)...") check_dev_info_removed(prog_file=prog_file, map_file=map_file) @@ -1109,7 +1178,8 @@ try: clean_up() bpftool_prog_list_wait(expected=0) - sim = NetdevSim() + simdev = NetdevSimDev() + sim, = simdev.nsims start_test("Test map update (no flags)...") sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON @@ -1190,27 +1260,29 @@ try: start_test("Test map remove...") sim.unset_xdp("offload") bpftool_map_list_wait(expected=0) - sim.remove() + simdev.remove() - sim = NetdevSim() + simdev = NetdevSimDev() + sim, = simdev.nsims sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON - sim.remove() + simdev.remove() bpftool_map_list_wait(expected=0) start_test("Test map creation fail path...") - sim = NetdevSim() + simdev = NetdevSimDev() + sim, = simdev.nsims sim.dfs["bpf_map_accept"] = "N" ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) fail(ret == 0, "netdevsim didn't refuse to create a map with offload disabled") - sim.remove() + simdev.remove() start_test("Test multi-dev ASIC program reuse...") - simA = NetdevSim() - simB1 = NetdevSim() - simB2 = NetdevSim(link=simB1) - simB3 = NetdevSim(link=simB1) + simdevA = NetdevSimDev() + simA, = simdevA.nsims + simdevB = NetdevSimDev(3) + simB1, simB2, simB3 = simdevB.nsims sims = (simA, simB1, simB2, simB3) simB = (simB1, simB2, simB3) @@ -1222,13 +1294,13 @@ try: progB = bpf_pinned("/sys/fs/bpf/nsimB") simA.set_xdp(progA, "offload", JSON=False) - for d in simB: + for d in simdevB.nsims: d.set_xdp(progB, "offload", JSON=False) start_test("Test multi-dev ASIC cross-dev replace...") ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False) fail(ret == 0, "cross-ASIC program allowed") - for d in simB: + for d in simdevB.nsims: ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False) fail(ret == 0, "cross-ASIC program allowed") @@ -1240,7 +1312,7 @@ try: fail=False, include_stderr=True) fail(ret == 0, "cross-ASIC program allowed") check_extack_nsim(err, "program bound to different dev.", args) - for d in simB: + for d in simdevB.nsims: ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False, include_stderr=True) fail(ret == 0, "cross-ASIC program allowed") @@ -1277,7 +1349,7 @@ try: start_test("Test multi-dev ASIC cross-dev destruction...") bpftool_prog_list_wait(expected=2) - simA.remove() + simdevA.remove() bpftool_prog_list_wait(expected=1) ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] @@ -1295,6 +1367,7 @@ try: fail(ifnameB != simB3['ifname'], "program not bound to remaining device") simB3.remove() + simdevB.remove() bpftool_prog_list_wait(expected=0) start_test("Test multi-dev ASIC cross-dev destruction - orphaned...") diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 5d10aee9e277..bf5c90998916 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -9,6 +9,7 @@ int error_cnt, pass_cnt; bool jit_enabled; +bool verifier_stats = false; struct ipv4_packet pkt_v4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), @@ -162,12 +163,15 @@ void *spin_lock_thread(void *arg) #include <prog_tests/tests.h> #undef DECLARE -int main(void) +int main(int ac, char **av) { srand(time(NULL)); jit_enabled = is_jit_enabled(); + if (ac == 2 && strcmp(av[1], "-s") == 0) + verifier_stats = true; + #define CALL #include <prog_tests/tests.h> #undef CALL diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 51a07367cd43..f095e1d4c657 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -40,6 +40,7 @@ typedef __u16 __sum16; extern int error_cnt, pass_cnt; extern bool jit_enabled; +extern bool verifier_stats; #define MAGIC_BYTES 123 diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c index 7c4f41572b1c..bebd4fbca1f4 100644 --- a/tools/testing/selftests/bpf/test_section_names.c +++ b/tools/testing/selftests/bpf/test_section_names.c @@ -119,6 +119,11 @@ static struct sec_name_test tests[] = { {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG}, {0, BPF_CGROUP_UDP6_SENDMSG}, }, + { + "cgroup/sysctl", + {0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL}, + {0, BPF_CGROUP_SYSCTL}, + }, }; static int test_prog_type_by_name(const struct sec_name_test *test) diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c index bc8943938bf5..e089477fa0a3 100644 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ b/tools/testing/selftests/bpf/test_sock_fields.c @@ -16,10 +16,28 @@ #include "cgroup_helpers.h" #include "bpf_rlimit.h" -enum bpf_array_idx { - SRV_IDX, - CLI_IDX, - __NR_BPF_ARRAY_IDX, +enum bpf_addr_array_idx { + ADDR_SRV_IDX, + ADDR_CLI_IDX, + __NR_BPF_ADDR_ARRAY_IDX, +}; + +enum bpf_result_array_idx { + EGRESS_SRV_IDX, + EGRESS_CLI_IDX, + INGRESS_LISTEN_IDX, + __NR_BPF_RESULT_ARRAY_IDX, +}; + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, +}; + +struct bpf_spinlock_cnt { + struct bpf_spin_lock lock; + __u32 cnt; }; #define CHECK(condition, tag, format...) ({ \ @@ -37,12 +55,22 @@ enum bpf_array_idx { #define DATA_LEN sizeof(DATA) static struct sockaddr_in6 srv_sa6, cli_sa6; +static int sk_pkt_out_cnt10_fd; +static int sk_pkt_out_cnt_fd; static int linum_map_fd; static int addr_map_fd; static int tp_map_fd; static int sk_map_fd; -static __u32 srv_idx = SRV_IDX; -static __u32 cli_idx = CLI_IDX; + +static __u32 addr_srv_idx = ADDR_SRV_IDX; +static __u32 addr_cli_idx = ADDR_CLI_IDX; + +static __u32 egress_srv_idx = EGRESS_SRV_IDX; +static __u32 egress_cli_idx = EGRESS_CLI_IDX; +static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX; + +static __u32 egress_linum_idx = EGRESS_LINUM_IDX; +static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; static void init_loopback6(struct sockaddr_in6 *sa6) { @@ -93,29 +121,46 @@ static void print_tp(const struct bpf_tcp_sock *tp) static void check_result(void) { - struct bpf_tcp_sock srv_tp, cli_tp; - struct bpf_sock srv_sk, cli_sk; - __u32 linum, idx0 = 0; + struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; + struct bpf_sock srv_sk, cli_sk, listen_sk; + __u32 ingress_linum, egress_linum; int err; - err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum); + err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, + &egress_linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, + &ingress_linum); CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)", + err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)", + err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)", + err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)", + err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)", "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)", + "err:%d errno:%d", err, errno); + + printf("listen_sk: "); + print_sk(&listen_sk); + printf("\n"); + printf("srv_sk: "); print_sk(&srv_sk); printf("\n"); @@ -124,6 +169,10 @@ static void check_result(void) print_sk(&cli_sk); printf("\n"); + printf("listen_tp: "); + print_tp(&listen_tp); + printf("\n"); + printf("srv_tp: "); print_tp(&srv_tp); printf("\n"); @@ -132,6 +181,19 @@ static void check_result(void) print_tp(&cli_tp); printf("\n"); + CHECK(listen_sk.state != 10 || + listen_sk.family != AF_INET6 || + listen_sk.protocol != IPPROTO_TCP || + memcmp(listen_sk.src_ip6, &in6addr_loopback, + sizeof(listen_sk.src_ip6)) || + listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || + listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || + listen_sk.src_port != ntohs(srv_sa6.sin6_port) || + listen_sk.dst_port, + "Unexpected listen_sk", + "Check listen_sk output. ingress_linum:%u", + ingress_linum); + CHECK(srv_sk.state == 10 || !srv_sk.state || srv_sk.family != AF_INET6 || @@ -142,7 +204,8 @@ static void check_result(void) sizeof(srv_sk.dst_ip6)) || srv_sk.src_port != ntohs(srv_sa6.sin6_port) || srv_sk.dst_port != cli_sa6.sin6_port, - "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum); + "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u", + egress_linum); CHECK(cli_sk.state == 10 || !cli_sk.state || @@ -154,21 +217,92 @@ static void check_result(void) sizeof(cli_sk.dst_ip6)) || cli_sk.src_port != ntohs(cli_sa6.sin6_port) || cli_sk.dst_port != srv_sa6.sin6_port, - "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum); + "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u", + egress_linum); + + CHECK(listen_tp.data_segs_out || + listen_tp.data_segs_in || + listen_tp.total_retrans || + listen_tp.bytes_acked, + "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", + ingress_linum); - CHECK(srv_tp.data_segs_out != 1 || + CHECK(srv_tp.data_segs_out != 2 || srv_tp.data_segs_in || srv_tp.snd_cwnd != 10 || srv_tp.total_retrans || - srv_tp.bytes_acked != DATA_LEN, - "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum); + srv_tp.bytes_acked != 2 * DATA_LEN, + "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", + egress_linum); CHECK(cli_tp.data_segs_out || - cli_tp.data_segs_in != 1 || + cli_tp.data_segs_in != 2 || cli_tp.snd_cwnd != 10 || cli_tp.total_retrans || - cli_tp.bytes_received != DATA_LEN, - "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum); + cli_tp.bytes_received != 2 * DATA_LEN, + "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", + egress_linum); +} + +static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) +{ + struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {}; + int err; + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, + &pkt_out_cnt10); + + /* The bpf prog only counts for fullsock and + * passive conneciton did not become fullsock until 3WHS + * had been finished. + * The bpf prog only counted two data packet out but we + * specially init accept_fd's pkt_out_cnt by 2 in + * init_sk_storage(). Hence, 4 here. + */ + CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, + &pkt_out_cnt10); + /* Active connection is fullsock from the beginning. + * 1 SYN and 1 ACK during 3WHS + * 2 Acks on data packet. + * + * The bpf_prog initialized it to 0xeB9F. + */ + CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || + pkt_out_cnt10.cnt != 0xeB9F + 40, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); +} + +static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) +{ + struct bpf_spinlock_cnt scnt = {}; + int err; + + scnt.cnt = pkt_out_cnt; + err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, + BPF_NOEXIST); + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", + "err:%d errno:%d", err, errno); + + scnt.cnt *= 10; + err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, + BPF_NOEXIST); + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", + "err:%d errno:%d", err, errno); } static void test(void) @@ -176,6 +310,7 @@ static void test(void) int listen_fd, cli_fd, accept_fd, epfd, err; struct epoll_event ev; socklen_t addrlen; + int i; addrlen = sizeof(struct sockaddr_in6); ev.events = EPOLLIN; @@ -211,10 +346,10 @@ static void test(void) err, errno); /* Update addr_map with srv_sa6 and cli_sa6 */ - err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0); + err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0); CHECK(err, "map_update", "err:%d errno:%d", err, errno); - err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0); + err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0); CHECK(err, "map_update", "err:%d errno:%d", err, errno); /* Connect from cli_sa6 to srv_sa6 */ @@ -242,24 +377,30 @@ static void test(void) accept_fd, errno); close(listen_fd); - /* Send some data from accept_fd to cli_fd */ - err = send(accept_fd, DATA, DATA_LEN, 0); - CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", - err, errno); - - /* Have some timeout in recv(cli_fd). Just in case. */ ev.data.fd = cli_fd; err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", err, errno); - err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != cli_fd, - "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", - err, errno, ev.data.fd, cli_fd); + init_sk_storage(accept_fd, 2); + + for (i = 0; i < 2; i++) { + /* Send some data from accept_fd to cli_fd */ + err = send(accept_fd, DATA, DATA_LEN, 0); + CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", + err, errno); + + /* Have some timeout in recv(cli_fd). Just in case. */ + err = epoll_wait(epfd, &ev, 1, 1000); + CHECK(err != 1 || ev.data.fd != cli_fd, + "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", + err, errno, ev.data.fd, cli_fd); + + err = recv(cli_fd, NULL, 0, MSG_TRUNC); + CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); + } - err = recv(cli_fd, NULL, 0, MSG_TRUNC); - CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); + check_sk_pkt_out_cnt(accept_fd, cli_fd); close(epfd); close(accept_fd); @@ -273,9 +414,9 @@ int main(int argc, char **argv) struct bpf_prog_load_attr attr = { .file = "test_sock_fields_kern.o", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .expected_attach_type = BPF_CGROUP_INET_EGRESS, }; - int cgroup_fd, prog_fd, err; + int cgroup_fd, egress_fd, ingress_fd, err; + struct bpf_program *ingress_prog; struct bpf_object *obj; struct bpf_map *map; @@ -293,12 +434,24 @@ int main(int argc, char **argv) err = join_cgroup(TEST_CGROUP); CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); - err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); - err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); + ingress_prog = bpf_object__find_program_by_title(obj, + "cgroup_skb/ingress"); + CHECK(!ingress_prog, + "bpf_object__find_program_by_title(cgroup_skb/ingress)", + "not found"); + ingress_fd = bpf_program__fd(ingress_prog); + + err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", "err:%d errno%d", err, errno); + + err = bpf_prog_attach(ingress_fd, cgroup_fd, + BPF_CGROUP_INET_INGRESS, 0); + CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)", + "err:%d errno%d", err, errno); close(cgroup_fd); map = bpf_object__find_map_by_name(obj, "addr_map"); @@ -317,6 +470,14 @@ int main(int argc, char **argv) CHECK(!map, "cannot find linum_map", "(null)"); linum_map_fd = bpf_map__fd(map); + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); + CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); + sk_pkt_out_cnt_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); + CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); + sk_pkt_out_cnt10_fd = bpf_map__fd(map); + test(); bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c new file mode 100644 index 000000000000..a3bebd7c68dd --- /dev/null +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -0,0 +1,1567 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <fcntl.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <linux/filter.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +#define CG_PATH "/foo" +#define MAX_INSNS 512 +#define FIXUP_SYSCTL_VALUE 0 + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +struct sysctl_test { + const char *descr; + size_t fixup_value_insn; + struct bpf_insn insns[MAX_INSNS]; + const char *prog_file; + enum bpf_attach_type attach_type; + const char *sysctl; + int open_flags; + const char *newval; + const char *oldval; + enum { + LOAD_REJECT, + ATTACH_REJECT, + OP_EPERM, + SUCCESS, + } result; +}; + +static struct sysctl_test tests[] = { + { + .descr = "sysctl wrong attach_type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = 0, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = ATTACH_REJECT, + }, + { + .descr = "sysctl:read allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl:read deny all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = OP_EPERM, + }, + { + .descr = "ctx:write sysctl:read read ok", + .insns = { + /* If (write) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "ctx:write sysctl:write read ok", + .insns = { + /* If (write) */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/domainname", + .open_flags = O_WRONLY, + .newval = "(none)", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, + { + .descr = "ctx:write sysctl:read write reject", + .insns = { + /* write = X */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sysctl, write)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = LOAD_REJECT, + }, + { + .descr = "ctx:file_pos sysctl:read read ok", + .insns = { + /* If (file_pos == X) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "ctx:file_pos sysctl:read read ok narrow", + .insns = { + /* If (file_pos == X) */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "ctx:file_pos sysctl:read write ok", + .insns = { + /* file_pos = X */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sysctl, file_pos)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .oldval = "nux\n", + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl_value:base ok", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6), + /* buf == "tcp_mem\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl_value:base E2BIG truncated", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) too small */ + BPF_MOV64_IMM(BPF_REG_3, 7), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), + + /* buf[0:7] == "tcp_me\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl:full ok", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 17), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, 0), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14), + + /* buf[0:8] == "net/ipv4" && */ + BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), + + /* buf[8:16] == "/tcp_mem" && */ + BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), + + /* buf[16:24] == "\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x0ULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl:full E2BIG truncated", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 16), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, 0), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10), + + /* buf[0:8] == "net/ipv4" && */ + BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), + + /* buf[8:16] == "/tcp_me\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl:full E2BIG truncated small", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 7), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, 0), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), + + /* buf[0:8] == "net/ip\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read ok, gt", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), + + /* buf[0:6] == "Linux\n\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read ok, eq", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 7), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 7), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), + + /* buf[0:6] == "Linux\n\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read E2BIG truncated", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_7, BPF_REG_0, 6), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 6), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), + + /* buf[0:6] == "Linux\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read EINVAL", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 4), + + /* buf[0:8] is NUL-filled) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv6/conf/lo/stable_secret", /* -EIO */ + .open_flags = O_RDONLY, + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_current_value sysctl:write ok", + .fixup_value_insn = 6, + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 6), + + /* buf[0:4] == expected) */ + BPF_LD_IMM64(BPF_REG_8, FIXUP_SYSCTL_VALUE), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "600", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_new_value sysctl:read EINVAL", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_new_value sysctl:write ok", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 4), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + + /* buf[0:4] == "606\0") */ + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "606", + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_new_value sysctl:write ok long", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 24), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14), + + /* buf[0:8] == "3000000 " && */ + BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), + + /* buf[8:16] == "4000000 " && */ + BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), + + /* buf[16:24] == "6000000\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_WRONLY, + .newval = "3000000 4000000 6000000", + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_new_value sysctl:write E2BIG", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 3), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 4), + + /* buf[0:3] == "60\0") */ + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "606", + .result = OP_EPERM, + }, + { + .descr = "sysctl_set_new_value sysctl:read EINVAL", + .insns = { + /* sysctl_set_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_set_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* sysctl_set_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_set_new_value sysctl:write ok", + .fixup_value_insn = 2, + .insns = { + /* sysctl_set_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_set_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* sysctl_set_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "606", + .result = SUCCESS, + }, + { + "bpf_strtoul one number string", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul multi number string", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* "600 602\0" */ + BPF_LD_IMM64(BPF_REG_0, 0x0032303620303036ULL), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 8), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 18), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 16), + + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 602, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul buf_len = 0, reject", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 0), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = LOAD_REJECT, + }, + { + "bpf_strtoul supported base, ok", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00373730), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 63, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul unsupported base, EINVAL", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul buf with spaces only, EINVAL", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x090a0c0d), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul negative number, EINVAL", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol negative number, ok", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 10), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, -6, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol hex number, ok", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x65667830), /* "0xfe" */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 254, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol max long", + .insns = { + /* arg1 (buf) 9223372036854775807 */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + BPF_LD_IMM64(BPF_REG_0, 0x0000000000373038ULL), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 19), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 19, 6), + /* res == expected) */ + BPF_LD_IMM64(BPF_REG_8, 0x7fffffffffffffffULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol overflow, ERANGE", + .insns = { + /* arg1 (buf) 9223372036854775808 */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + BPF_LD_IMM64(BPF_REG_0, 0x0000000000383038ULL), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 19), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -ERANGE, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "C prog: deny all writes", + .prog_file = "./test_sysctl_prog.o", + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_WRONLY, + .newval = "123 456 789", + .result = OP_EPERM, + }, + { + "C prog: deny access by name", + .prog_file = "./test_sysctl_prog.o", + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = OP_EPERM, + }, + { + "C prog: read tcp_mem", + .prog_file = "./test_sysctl_prog.o", + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, +}; + +static size_t probe_prog_length(const struct bpf_insn *fp) +{ + size_t len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static int fixup_sysctl_value(const char *buf, size_t buf_len, + struct bpf_insn *prog, size_t insn_num) +{ + uint32_t value_num = 0; + uint8_t c, i; + + if (buf_len > sizeof(value_num)) { + log_err("Value is too big (%zd) to use in fixup", buf_len); + return -1; + } + + for (i = 0; i < buf_len; ++i) { + c = buf[i]; + value_num |= (c << i * 8); + } + + prog[insn_num].imm = value_num; + + return 0; +} + +static int load_sysctl_prog_insns(struct sysctl_test *test, + const char *sysctl_path) +{ + struct bpf_insn *prog = test->insns; + struct bpf_load_program_attr attr; + int ret; + + memset(&attr, 0, sizeof(struct bpf_load_program_attr)); + attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL; + attr.insns = prog; + attr.insns_cnt = probe_prog_length(attr.insns); + attr.license = "GPL"; + + if (test->fixup_value_insn) { + char buf[128]; + ssize_t len; + int fd; + + fd = open(sysctl_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + log_err("open(%s) failed", sysctl_path); + return -1; + } + len = read(fd, buf, sizeof(buf)); + if (len == -1) { + log_err("read(%s) failed", sysctl_path); + close(fd); + return -1; + } + close(fd); + if (fixup_sysctl_value(buf, len, prog, test->fixup_value_insn)) + return -1; + } + + ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + if (ret < 0 && test->result != LOAD_REJECT) { + log_err(">>> Loading program error.\n" + ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); + } + + return ret; +} + +static int load_sysctl_prog_file(struct sysctl_test *test) +{ + struct bpf_prog_load_attr attr; + struct bpf_object *obj; + int prog_fd; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = test->prog_file; + attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL; + + if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { + if (test->result != LOAD_REJECT) + log_err(">>> Loading program (%s) error.\n", + test->prog_file); + return -1; + } + + return prog_fd; +} + +static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path) +{ + return test->prog_file + ? load_sysctl_prog_file(test) + : load_sysctl_prog_insns(test, sysctl_path); +} + +static int access_sysctl(const char *sysctl_path, + const struct sysctl_test *test) +{ + int err = 0; + int fd; + + fd = open(sysctl_path, test->open_flags | O_CLOEXEC); + if (fd < 0) + return fd; + + if (test->open_flags == O_RDONLY) { + char buf[128]; + + if (read(fd, buf, sizeof(buf)) == -1) + goto err; + if (test->oldval && + strncmp(buf, test->oldval, strlen(test->oldval))) { + log_err("Read value %s != %s", buf, test->oldval); + goto err; + } + } else if (test->open_flags == O_WRONLY) { + if (!test->newval) { + log_err("New value for sysctl is not set"); + goto err; + } + if (write(fd, test->newval, strlen(test->newval)) == -1) + goto err; + } else { + log_err("Unexpected sysctl access: neither read nor write"); + goto err; + } + + goto out; +err: + err = -1; +out: + close(fd); + return err; +} + +static int run_test_case(int cgfd, struct sysctl_test *test) +{ + enum bpf_attach_type atype = test->attach_type; + char sysctl_path[128]; + int progfd = -1; + int err = 0; + + printf("Test case: %s .. ", test->descr); + + snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s", + test->sysctl); + + progfd = load_sysctl_prog(test, sysctl_path); + if (progfd < 0) { + if (test->result == LOAD_REJECT) + goto out; + else + goto err; + } + + if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) { + if (test->result == ATTACH_REJECT) + goto out; + else + goto err; + } + + if (access_sysctl(sysctl_path, test) == -1) { + if (test->result == OP_EPERM && errno == EPERM) + goto out; + else + goto err; + } + + if (test->result != SUCCESS) { + log_err("Unexpected failure"); + goto err; + } + + goto out; +err: + err = -1; +out: + /* Detaching w/o checking return code: best effort attempt. */ + if (progfd != -1) + bpf_prog_detach(cgfd, atype); + close(progfd); + printf("[%s]\n", err ? "FAIL" : "PASS"); + return err; +} + +static int run_tests(int cgfd) +{ + int passes = 0; + int fails = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + if (run_test_case(cgfd, &tests[i])) + ++fails; + else + ++passes; + } + printf("Summary: %d PASSED, %d FAILED\n", passes, fails); + return fails ? -1 : 0; +} + +int main(int argc, char **argv) +{ + int cgfd = -1; + int err = 0; + + if (setup_cgroup_environment()) + goto err; + + cgfd = create_and_get_cgroup(CG_PATH); + if (cgfd < 0) + goto err; + + if (join_cgroup(CG_PATH)) + goto err; + + if (run_tests(cgfd)) + goto err; + + goto out; +err: + err = -1; +out: + close(cgfd); + cleanup_cgroup_environment(); + return err; +} diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh new file mode 100755 index 000000000000..f38567ef694b --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_edt.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test installs a TC bpf program that throttles a TCP flow +# with dst port = 9000 down to 5MBps. Then it measures actual +# throughput of the flow. + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + echo "FAIL" + exit 1 +fi + +# check that nc, dd, and timeout are present +command -v nc >/dev/null 2>&1 || \ + { echo >&2 "nc is not available"; exit 1; } +command -v dd >/dev/null 2>&1 || \ + { echo >&2 "nc is not available"; exit 1; } +command -v timeout >/dev/null 2>&1 || \ + { echo >&2 "timeout is not available"; exit 1; } + +readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" +readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" + +readonly IP_SRC="172.16.1.100" +readonly IP_DST="172.16.2.100" + +cleanup() +{ + ip netns del ${NS_SRC} + ip netns del ${NS_DST} +} + +trap cleanup EXIT + +set -e # exit on error + +ip netns add "${NS_SRC}" +ip netns add "${NS_DST}" +ip link add veth_src type veth peer name veth_dst +ip link set veth_src netns ${NS_SRC} +ip link set veth_dst netns ${NS_DST} + +ip -netns ${NS_SRC} addr add ${IP_SRC}/24 dev veth_src +ip -netns ${NS_DST} addr add ${IP_DST}/24 dev veth_dst + +ip -netns ${NS_SRC} link set dev veth_src up +ip -netns ${NS_DST} link set dev veth_dst up + +ip -netns ${NS_SRC} route add ${IP_DST}/32 dev veth_src +ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst + +# set up TC on TX +ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq +ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact +ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ + bpf da obj test_tc_edt.o sec cls_test + + +# start the listener +ip netns exec ${NS_DST} bash -c \ + "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &" +declare -i NC_PID=$! +sleep 1 + +declare -ir TIMEOUT=20 +declare -ir EXPECTED_BPS=5000000 + +# run the load, capture RX bytes on DST +declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \ + cat /sys/class/net/veth_dst/statistics/rx_bytes ) + +set +e +ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \ + bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null" +set -e + +declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \ + cat /sys/class/net/veth_dst/statistics/rx_bytes ) + +declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT )) + +echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \ + awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n", + $1, ($2-$3)*100.0/$3}' + +# Pass the test if the actual bps is within 1% of the expected bps. +# The difference is usually about 0.1% on a 20-sec test, and ==> zero +# the longer the test runs. +declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \ + awk 'function abs(x){return ((x < 0.0) ? -x : x)} + {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" } + else { print "0"} }' ) +if [ "${RES}" == "0" ] ; then + echo "PASS" +else + echo "FAIL" + exit 1 +fi diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh new file mode 100755 index 000000000000..ff0d31d38061 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -0,0 +1,290 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# In-place tunneling + +# must match the port that the bpf program filters on +readonly port=8000 + +readonly ns_prefix="ns-$$-" +readonly ns1="${ns_prefix}1" +readonly ns2="${ns_prefix}2" + +readonly ns1_v4=192.168.1.1 +readonly ns2_v4=192.168.1.2 +readonly ns1_v6=fd::1 +readonly ns2_v6=fd::2 + +# Must match port used by bpf program +readonly udpport=5555 +# MPLSoverUDP +readonly mplsudpport=6635 +readonly mplsproto=137 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" + +setup() { + ip netns add "${ns1}" + ip netns add "${ns2}" + + ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \ + peer name veth2 mtu 1500 netns "${ns2}" + + ip netns exec "${ns1}" ethtool -K veth1 tso off + + ip -netns "${ns1}" link set veth1 up + ip -netns "${ns2}" link set veth2 up + + ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1 + ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2 + ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad + ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad + + # clamp route to reserve room for tunnel headers + ip -netns "${ns1}" -4 route flush table main + ip -netns "${ns1}" -6 route flush table main + ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1 + ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1 + + sleep 1 + + dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none +} + +cleanup() { + ip netns del "${ns2}" + ip netns del "${ns1}" + + if [[ -f "${outfile}" ]]; then + rm "${outfile}" + fi + if [[ -f "${infile}" ]]; then + rm "${infile}" + fi +} + +server_listen() { + ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" & + server_pid=$! + sleep 0.2 +} + +client_connect() { + ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}" + echo $? +} + +verify_data() { + wait "${server_pid}" + # sha1sum returns two fields [sha1] [filepath] + # convert to bash array and access first elem + insum=($(sha1sum ${infile})) + outsum=($(sha1sum ${outfile})) + if [[ "${insum[0]}" != "${outsum[0]}" ]]; then + echo "data mismatch" + exit 1 + fi +} + +set -e + +# no arguments: automated test, run all +if [[ "$#" -eq "0" ]]; then + echo "ipip" + $0 ipv4 ipip none 100 + + echo "ip6ip6" + $0 ipv6 ip6tnl none 100 + + echo "sit" + $0 ipv6 sit none 100 + + for mac in none mpls eth ; do + echo "ip gre $mac" + $0 ipv4 gre $mac 100 + + echo "ip6 gre $mac" + $0 ipv6 ip6gre $mac 100 + + echo "ip gre $mac gso" + $0 ipv4 gre $mac 2000 + + echo "ip6 gre $mac gso" + $0 ipv6 ip6gre $mac 2000 + + echo "ip udp $mac" + $0 ipv4 udp $mac 100 + + echo "ip6 udp $mac" + $0 ipv6 ip6udp $mac 100 + + echo "ip udp $mac gso" + $0 ipv4 udp $mac 2000 + + echo "ip6 udp $mac gso" + $0 ipv6 ip6udp $mac 2000 + done + + echo "OK. All tests passed" + exit 0 +fi + +if [[ "$#" -ne "4" ]]; then + echo "Usage: $0" + echo " or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>" + exit 1 +fi + +case "$1" in +"ipv4") + readonly addr1="${ns1_v4}" + readonly addr2="${ns2_v4}" + readonly ipproto=4 + readonly netcat_opt=-${ipproto} + readonly foumod=fou + readonly foutype=ipip + readonly fouproto=4 + readonly fouproto_mpls=${mplsproto} + readonly gretaptype=gretap + ;; +"ipv6") + readonly addr1="${ns1_v6}" + readonly addr2="${ns2_v6}" + readonly ipproto=6 + readonly netcat_opt=-${ipproto} + readonly foumod=fou6 + readonly foutype=ip6tnl + readonly fouproto="41 -6" + readonly fouproto_mpls="${mplsproto} -6" + readonly gretaptype=ip6gretap + ;; +*) + echo "unknown arg: $1" + exit 1 + ;; +esac + +readonly tuntype=$2 +readonly mac=$3 +readonly datalen=$4 + +echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}" + +trap cleanup EXIT + +setup + +# basic communication works +echo "test basic connectivity" +server_listen +client_connect +verify_data + +# clientside, insert bpf program to encap all TCP to port ${port} +# client can no longer connect +ip netns exec "${ns1}" tc qdisc add dev veth1 clsact +ip netns exec "${ns1}" tc filter add dev veth1 egress \ + bpf direct-action object-file ./test_tc_tunnel.o \ + section "encap_${tuntype}_${mac}" +echo "test bpf encap without decap (expect failure)" +server_listen +! client_connect + +if [[ "$tuntype" =~ "udp" ]]; then + # Set up fou tunnel. + ttype="${foutype}" + targs="encap fou encap-sport auto encap-dport $udpport" + # fou may be a module; allow this to fail. + modprobe "${foumod}" ||true + if [[ "$mac" == "mpls" ]]; then + dport=${mplsudpport} + dproto=${fouproto_mpls} + tmode="mode any ttl 255" + else + dport=${udpport} + dproto=${fouproto} + fi + ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto} + targs="encap fou encap-sport auto encap-dport $dport" +elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then + ttype=$gretaptype +else + ttype=$tuntype + targs="" +fi + +# tunnel address family differs from inner for SIT +if [[ "${tuntype}" == "sit" ]]; then + link_addr1="${ns1_v4}" + link_addr2="${ns2_v4}" +else + link_addr1="${addr1}" + link_addr2="${addr2}" +fi + +# serverside, insert decap module +# server is still running +# client can connect again +ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \ + ${tmode} remote "${link_addr1}" local "${link_addr2}" $targs + +expect_tun_fail=0 + +if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then + # No support for MPLS IPv6 fou tunnel; expect failure. + expect_tun_fail=1 +elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then + # No support for TEB fou tunnel; expect failure. + expect_tun_fail=1 +elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then + # Share ethernet address between tunnel/veth2 so L2 decap works. + ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \ + awk '/ether/ { print $2 }') + ip netns exec "${ns2}" ip link set testtun0 address $ethaddr +elif [[ "$mac" == "mpls" ]]; then + modprobe mpls_iptunnel ||true + modprobe mpls_gso ||true + ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536 + ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo + ip netns exec "${ns2}" ip link set lo up + ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0 +fi + +# Because packets are decapped by the tunnel they arrive on testtun0 from +# the IP stack perspective. Ensure reverse path filtering is disabled +# otherwise we drop the TCP SYN as arriving on testtun0 instead of the +# expected veth2 (veth2 is where 192.168.1.2 is configured). +ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 +# rp needs to be disabled for both all and testtun0 as the rp value is +# selected as the max of the "all" and device-specific values. +ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0 +ip netns exec "${ns2}" ip link set dev testtun0 up +if [[ "$expect_tun_fail" == 1 ]]; then + # This tunnel mode is not supported, so we expect failure. + echo "test bpf encap with tunnel device decap (expect failure)" + ! client_connect +else + echo "test bpf encap with tunnel device decap" + client_connect + verify_data + server_listen +fi + +# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3. +if [[ "${tuntype}" == "sit" ]]; then + echo OK + exit 0 +fi + +# serverside, use BPF for decap +ip netns exec "${ns2}" ip link del dev testtun0 +ip netns exec "${ns2}" tc qdisc add dev veth2 clsact +ip netns exec "${ns2}" tc filter add dev veth2 ingress \ + bpf direct-action object-file ./test_tc_tunnel.o section decap +echo "test bpf encap with bpf decap" +client_connect +verify_data + +echo OK diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh new file mode 100755 index 000000000000..d48e51716d19 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh @@ -0,0 +1,81 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2018 Facebook +# Copyright (c) 2019 Cloudflare + +set -eu + +wait_for_ip() +{ + local _i + printf "Wait for IP %s to become available " "$1" + for _i in $(seq ${MAX_PING_TRIES}); do + printf "." + if ns1_exec ping -c 1 -W 1 "$1" >/dev/null 2>&1; then + echo " OK" + return + fi + sleep 1 + done + echo 1>&2 "ERROR: Timeout waiting for test IP to become available." + exit 1 +} + +get_prog_id() +{ + awk '/ id / {sub(/.* id /, "", $0); print($1)}' +} + +ns1_exec() +{ + ip netns exec ns1 "$@" +} + +setup() +{ + ip netns add ns1 + ns1_exec ip link set lo up + + ns1_exec sysctl -w net.ipv4.tcp_syncookies=2 + + wait_for_ip 127.0.0.1 + wait_for_ip ::1 +} + +cleanup() +{ + ip netns del ns1 2>/dev/null || : +} + +main() +{ + trap cleanup EXIT 2 3 6 15 + setup + + printf "Testing clsact..." + ns1_exec tc qdisc add dev "${TEST_IF}" clsact + ns1_exec tc filter add dev "${TEST_IF}" ingress \ + bpf obj "${BPF_PROG_OBJ}" sec "${CLSACT_SECTION}" da + + BPF_PROG_ID=$(ns1_exec tc filter show dev "${TEST_IF}" ingress | \ + get_prog_id) + ns1_exec "${PROG}" "${BPF_PROG_ID}" + ns1_exec tc qdisc del dev "${TEST_IF}" clsact + + printf "Testing XDP..." + ns1_exec ip link set "${TEST_IF}" xdp \ + object "${BPF_PROG_OBJ}" section "${XDP_SECTION}" + BPF_PROG_ID=$(ns1_exec ip link show "${TEST_IF}" | get_prog_id) + ns1_exec "${PROG}" "${BPF_PROG_ID}" +} + +DIR=$(dirname $0) +TEST_IF=lo +MAX_PING_TRIES=5 +BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o" +CLSACT_SECTION="clsact/check_syncookie" +XDP_SECTION="xdp/check_syncookie" +BPF_PROG_ID=0 +PROG="${DIR}/test_tcp_check_syncookie_user" + +main diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c new file mode 100644 index 000000000000..87829c86c746 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook +// Copyright (c) 2019 Cloudflare + +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_rlimit.h" +#include "cgroup_helpers.h" + +static int start_server(const struct sockaddr *addr, socklen_t len) +{ + int fd; + + fd = socket(addr->sa_family, SOCK_STREAM, 0); + if (fd == -1) { + log_err("Failed to create server socket"); + goto out; + } + + if (bind(fd, addr, len) == -1) { + log_err("Failed to bind server socket"); + goto close_out; + } + + if (listen(fd, 128) == -1) { + log_err("Failed to listen on server socket"); + goto close_out; + } + + goto out; + +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int connect_to_server(int server_fd) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd = -1; + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + goto out; + } + + fd = socket(addr.ss_family, SOCK_STREAM, 0); + if (fd == -1) { + log_err("Failed to create client socket"); + goto out; + } + + if (connect(fd, (const struct sockaddr *)&addr, len) == -1) { + log_err("Fail to connect to server"); + goto close_out; + } + + goto out; + +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int get_map_fd_by_prog_id(int prog_id) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 map_ids[1]; + int prog_fd = -1; + int map_fd = -1; + + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + log_err("Failed to get fd by prog id %d", prog_id); + goto err; + } + + info.nr_map_ids = 1; + info.map_ids = (__u64)(unsigned long)map_ids; + + if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) { + log_err("Failed to get info by prog fd %d", prog_fd); + goto err; + } + + if (!info.nr_map_ids) { + log_err("No maps found for prog fd %d", prog_fd); + goto err; + } + + map_fd = bpf_map_get_fd_by_id(map_ids[0]); + if (map_fd < 0) + log_err("Failed to get fd by map id %d", map_ids[0]); +err: + if (prog_fd >= 0) + close(prog_fd); + return map_fd; +} + +static int run_test(int server_fd, int results_fd) +{ + int client = -1, srv_client = -1; + int ret = 0; + __u32 key = 0; + __u64 value = 0; + + if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) { + log_err("Can't clear results"); + goto err; + } + + client = connect_to_server(server_fd); + if (client == -1) + goto err; + + srv_client = accept(server_fd, NULL, 0); + if (srv_client == -1) { + log_err("Can't accept connection"); + goto err; + } + + if (bpf_map_lookup_elem(results_fd, &key, &value) < 0) { + log_err("Can't lookup result"); + goto err; + } + + if (value != 1) { + log_err("Didn't match syncookie: %llu", value); + goto err; + } + + goto out; + +err: + ret = 1; +out: + close(client); + close(srv_client); + return ret; +} + +int main(int argc, char **argv) +{ + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + int server = -1; + int server_v6 = -1; + int results = -1; + int err = 0; + + if (argc < 2) { + fprintf(stderr, "Usage: %s prog_id\n", argv[0]); + exit(1); + } + + results = get_map_fd_by_prog_id(atoi(argv[1])); + if (results < 0) { + log_err("Can't get map"); + goto err; + } + + memset(&addr4, 0, sizeof(addr4)); + addr4.sin_family = AF_INET; + addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr4.sin_port = 0; + + memset(&addr6, 0, sizeof(addr6)); + addr6.sin6_family = AF_INET6; + addr6.sin6_addr = in6addr_loopback; + addr6.sin6_port = 0; + + server = start_server((const struct sockaddr *)&addr4, sizeof(addr4)); + if (server == -1) + goto err; + + server_v6 = start_server((const struct sockaddr *)&addr6, + sizeof(addr6)); + if (server_v6 == -1) + goto err; + + if (run_test(server, results)) + goto err; + + if (run_test(server_v6, results)) + goto err; + + printf("ok\n"); + goto out; +err: + err = 1; +out: + close(server); + close(server_v6); + close(results); + return err; +} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 477a9dcf9fff..ccd896b98cac 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -47,11 +47,13 @@ #include "bpf_rlimit.h" #include "bpf_rand.h" #include "bpf_util.h" +#include "test_btf.h" #include "../../../include/linux/filter.h" #define MAX_INSNS BPF_MAXINSNS +#define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 14 +#define MAX_NR_MAPS 18 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -66,6 +68,7 @@ static int skips; struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; + struct bpf_insn *fill_insns; int fixup_map_hash_8b[MAX_FIXUPS]; int fixup_map_hash_48b[MAX_FIXUPS]; int fixup_map_hash_16b[MAX_FIXUPS]; @@ -80,9 +83,14 @@ struct bpf_test { int fixup_cgroup_storage[MAX_FIXUPS]; int fixup_percpu_cgroup_storage[MAX_FIXUPS]; int fixup_map_spin_lock[MAX_FIXUPS]; + int fixup_map_array_ro[MAX_FIXUPS]; + int fixup_map_array_wo[MAX_FIXUPS]; + int fixup_map_array_small[MAX_FIXUPS]; + int fixup_sk_storage_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t retval, retval_unpriv, insn_processed; + int prog_len; enum { UNDEF, ACCEPT, @@ -119,10 +127,11 @@ struct other_val { static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) { - /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */ + /* test: {skb->data[0], vlan_push} x 51 + {skb->data[0], vlan_pop} x 51 */ #define PUSH_CNT 51 - unsigned int len = BPF_MAXINSNS; - struct bpf_insn *insn = self->insns; + /* jump range is limited to 16 bit. PUSH_CNT of ld_abs needs room */ + unsigned int len = (1 << 15) - PUSH_CNT * 2 * 5 * 6; + struct bpf_insn *insn = self->fill_insns; int i = 0, j, k = 0; insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); @@ -156,12 +165,14 @@ loop: for (; i < len - 1; i++) insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef); insn[len - 1] = BPF_EXIT_INSN(); + self->prog_len = len; } static void bpf_fill_jump_around_ld_abs(struct bpf_test *self) { - struct bpf_insn *insn = self->insns; - unsigned int len = BPF_MAXINSNS; + struct bpf_insn *insn = self->fill_insns; + /* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns */ + unsigned int len = (1 << 15) / 6; int i = 0; insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); @@ -171,11 +182,12 @@ static void bpf_fill_jump_around_ld_abs(struct bpf_test *self) while (i < len - 1) insn[i++] = BPF_LD_ABS(BPF_B, 1); insn[i] = BPF_EXIT_INSN(); + self->prog_len = i + 1; } static void bpf_fill_rand_ld_dw(struct bpf_test *self) { - struct bpf_insn *insn = self->insns; + struct bpf_insn *insn = self->fill_insns; uint64_t res = 0; int i = 0; @@ -193,12 +205,83 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32); insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1); insn[i] = BPF_EXIT_INSN(); + self->prog_len = i + 1; res ^= (res >> 32); self->retval = (uint32_t)res; } +/* test the sequence of 1k jumps */ +static void bpf_fill_scale1(struct bpf_test *self) +{ + struct bpf_insn *insn = self->fill_insns; + int i = 0, k = 0; + + insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); + /* test to check that the sequence of 1024 jumps is acceptable */ + while (k++ < 1024) { + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32); + insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2); + insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10); + insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, + -8 * (k % 64 + 1)); + } + /* every jump adds 1024 steps to insn_processed, so to stay exactly + * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT + */ + while (i < MAX_TEST_INSNS - 1025) + insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42); + insn[i] = BPF_EXIT_INSN(); + self->prog_len = i + 1; + self->retval = 42; +} + +/* test the sequence of 1k jumps in inner most function (function depth 8)*/ +static void bpf_fill_scale2(struct bpf_test *self) +{ + struct bpf_insn *insn = self->fill_insns; + int i = 0, k = 0; + +#define FUNC_NEST 7 + for (k = 0; k < FUNC_NEST; k++) { + insn[i++] = BPF_CALL_REL(1); + insn[i++] = BPF_EXIT_INSN(); + } + insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); + /* test to check that the sequence of 1024 jumps is acceptable */ + while (k++ < 1024) { + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32); + insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2); + insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10); + insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, + -8 * (k % (64 - 4 * FUNC_NEST) + 1)); + } + /* every jump adds 1024 steps to insn_processed, so to stay exactly + * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT + */ + while (i < MAX_TEST_INSNS - 1025) + insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42); + insn[i] = BPF_EXIT_INSN(); + self->prog_len = i + 1; + self->retval = 42; +} + +static void bpf_fill_scale(struct bpf_test *self) +{ + switch (self->retval) { + case 1: + return bpf_fill_scale1(self); + case 2: + return bpf_fill_scale2(self); + default: + self->prog_len = 0; + break; + } +} + /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ -#define BPF_SK_LOOKUP \ +#define BPF_SK_LOOKUP(func) \ /* struct bpf_sock_tuple tuple = {} */ \ BPF_MOV64_IMM(BPF_REG_2, 0), \ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), \ @@ -207,13 +290,13 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32), \ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40), \ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48), \ - /* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */ \ + /* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */ \ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), \ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), \ BPF_MOV64_IMM(BPF_REG_4, 0), \ BPF_MOV64_IMM(BPF_REG_5, 0), \ - BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp) + BPF_EMIT_CALL(BPF_FUNC_ ## func) /* BPF_DIRECT_PKT_R2 contains 7 instructions, it initializes default return * value into 0 and does necessary preparation for direct packet access @@ -277,13 +360,15 @@ static bool skip_unsupported_map(enum bpf_map_type map_type) return false; } -static int create_map(uint32_t type, uint32_t size_key, - uint32_t size_value, uint32_t max_elem) +static int __create_map(uint32_t type, uint32_t size_key, + uint32_t size_value, uint32_t max_elem, + uint32_t extra_flags) { int fd; fd = bpf_create_map(type, size_key, size_value, max_elem, - type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0); + (type == BPF_MAP_TYPE_HASH ? + BPF_F_NO_PREALLOC : 0) | extra_flags); if (fd < 0) { if (skip_unsupported_map(type)) return -1; @@ -293,6 +378,12 @@ static int create_map(uint32_t type, uint32_t size_key, return fd; } +static int create_map(uint32_t type, uint32_t size_key, + uint32_t size_value, uint32_t max_elem) +{ + return __create_map(type, size_key, size_value, max_elem, 0); +} + static void update_map(int fd, int index) { struct test_val value = { @@ -408,24 +499,6 @@ static int create_cgroup_storage(bool percpu) return fd; } -#define BTF_INFO_ENC(kind, kind_flag, vlen) \ - ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) -#define BTF_TYPE_ENC(name, info, size_or_type) \ - (name), (info), (size_or_type) -#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ - ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) -#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ - BTF_INT_ENC(encoding, bits_offset, bits) -#define BTF_MEMBER_ENC(name, type, bits_offset) \ - (name), (type), (bits_offset) - -struct btf_raw_data { - __u32 raw_types[64]; - const char *str_sec; - __u32 str_sec_size; -}; - /* struct bpf_spin_lock { * int val; * }; @@ -500,6 +573,31 @@ static int create_map_spin_lock(void) return fd; } +static int create_sk_storage_map(void) +{ + struct bpf_create_map_attr attr = { + .name = "test_map", + .map_type = BPF_MAP_TYPE_SK_STORAGE, + .key_size = 4, + .value_size = 8, + .max_entries = 0, + .map_flags = BPF_F_NO_PREALLOC, + .btf_key_type_id = 1, + .btf_value_type_id = 3, + }; + int fd, btf_fd; + + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; + attr.btf_fd = btf_fd; + fd = bpf_create_map_xattr(&attr); + close(attr.btf_fd); + if (fd < 0) + printf("Failed to create sk_storage_map\n"); + return fd; +} + static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, @@ -519,9 +617,15 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_cgroup_storage = test->fixup_cgroup_storage; int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage; int *fixup_map_spin_lock = test->fixup_map_spin_lock; + int *fixup_map_array_ro = test->fixup_map_array_ro; + int *fixup_map_array_wo = test->fixup_map_array_wo; + int *fixup_map_array_small = test->fixup_map_array_small; + int *fixup_sk_storage_map = test->fixup_sk_storage_map; - if (test->fill_helper) + if (test->fill_helper) { + test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); test->fill_helper(test); + } /* Allocating HTs with 1 elem is fine here, since we only test * for verifier and not do a runtime lookup, so the only thing @@ -642,6 +746,42 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_spin_lock++; } while (*fixup_map_spin_lock); } + if (*fixup_map_array_ro) { + map_fds[14] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), + sizeof(struct test_val), 1, + BPF_F_RDONLY_PROG); + update_map(map_fds[14], 0); + do { + prog[*fixup_map_array_ro].imm = map_fds[14]; + fixup_map_array_ro++; + } while (*fixup_map_array_ro); + } + if (*fixup_map_array_wo) { + map_fds[15] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), + sizeof(struct test_val), 1, + BPF_F_WRONLY_PROG); + update_map(map_fds[15], 0); + do { + prog[*fixup_map_array_wo].imm = map_fds[15]; + fixup_map_array_wo++; + } while (*fixup_map_array_wo); + } + if (*fixup_map_array_small) { + map_fds[16] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), + 1, 1, 0); + update_map(map_fds[16], 0); + do { + prog[*fixup_map_array_small].imm = map_fds[16]; + fixup_map_array_small++; + } while (*fixup_map_array_small); + } + if (*fixup_sk_storage_map) { + map_fds[17] = create_sk_storage_map(); + do { + prog[*fixup_sk_storage_map].imm = map_fds[17]; + fixup_sk_storage_map++; + } while (*fixup_sk_storage_map); + } } static int set_admin(bool admin) @@ -718,12 +858,17 @@ static void do_test_single(struct bpf_test *test, bool unpriv, prog_type = BPF_PROG_TYPE_SOCKET_FILTER; fixup_skips = skips; do_test_fixup(test, prog_type, prog, map_fds); + if (test->fill_insns) { + prog = test->fill_insns; + prog_len = test->prog_len; + } else { + prog_len = probe_filter_length(prog); + } /* If there were some map skips during fixup due to missing bpf * features, skip this test. */ if (fixup_skips != skips) return; - prog_len = probe_filter_length(prog); pflags = 0; if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) @@ -731,7 +876,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) pflags |= BPF_F_ANY_ALIGNMENT; fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags, - "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); + "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 4); if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) { printf("SKIP (unsupported program type %d)\n", prog_type); skips++; @@ -830,6 +975,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto fail_log; } close_fds: + if (test->fill_insns) + free(test->fill_insns); close(fd_prog); for (i = 0; i < MAX_NR_MAPS; i++) close(map_fds[i]); diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 4cdb63bf0521..9a9fc6c9b70b 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -52,6 +52,10 @@ struct ksym *ksym_search(long key) int start = 0, end = sym_cnt; int result; + /* kallsyms not loaded. return NULL */ + if (sym_cnt <= 0) + return NULL; + while (start < end) { size_t mid = start + (end - start) / 2; diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c index 9de8b7cb4e6d..db781052758d 100644 --- a/tools/testing/selftests/bpf/urandom_read.c +++ b/tools/testing/selftests/bpf/urandom_read.c @@ -7,11 +7,19 @@ #define BUF_SIZE 256 +static __attribute__((noinline)) +void urandom_read(int fd, int count) +{ + char buf[BUF_SIZE]; + int i; + + for (i = 0; i < count; ++i) + read(fd, buf, BUF_SIZE); +} + int main(int argc, char *argv[]) { int fd = open("/dev/urandom", O_RDONLY); - int i; - char buf[BUF_SIZE]; int count = 4; if (fd < 0) @@ -20,8 +28,7 @@ int main(int argc, char *argv[]) if (argc == 2) count = atoi(argv[1]); - for (i = 0; i < count; ++i) - read(fd, buf, BUF_SIZE); + urandom_read(fd, count); close(fd); return 0; diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index 0dcecaf3ec6f..bcb83196e459 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -217,3 +217,162 @@ .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "valid read map access into a read-only array 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_ro = { 3 }, + .result = ACCEPT, + .retval = 28, +}, +{ + "valid read map access into a read-only array 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_array_ro = { 3 }, + .result = ACCEPT, + .retval = -29, +}, +{ + "invalid write map access into a read-only array 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map_array_ro = { 3 }, + .result = REJECT, + .errstr = "write into map forbidden", +}, +{ + "invalid write map access into a read-only array 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_array_ro = { 4 }, + .result = REJECT, + .errstr = "write into map forbidden", +}, +{ + "valid write map access into a write-only array 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_wo = { 3 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "valid write map access into a write-only array 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_array_wo = { 4 }, + .result = ACCEPT, + .retval = 0, +}, +{ + "invalid read map access into a write-only array 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_wo = { 3 }, + .result = REJECT, + .errstr = "read from map forbidden", +}, +{ + "invalid read map access into a write-only array 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_array_wo = { 3 }, + .result = REJECT, + .errstr = "read from map forbidden", +}, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 4004891afa9c..fb11240b758b 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -908,6 +908,44 @@ .result = REJECT, }, { + "calls: stack depth check in dead code", + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, +}, +{ "calls: spill into caller stack frame", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -1940,3 +1978,28 @@ .errstr = "!read_ok", .result = REJECT, }, +{ + "calls: cross frame pruning - liveness propagation", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr = "!read_ok", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index c660deb582f1..b0fda2877119 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -705,7 +705,6 @@ .errstr = "invalid bpf_context access", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check cb access: half, wrong type", diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c new file mode 100644 index 000000000000..b9fb28e8e224 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c @@ -0,0 +1,347 @@ +{ + "direct map access, write test 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "direct map access, write test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "direct map access, write test 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 4242), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "direct map access, write test 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 40), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "direct map access, write test 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 32), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 4242), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "direct map access, write test 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 40), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 4, 4242), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "R1 min value is outside of the array range", +}, +{ + "direct map access, write test 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, -1), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 4, 4242), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "direct value offset of 4294967295 is not allowed", +}, +{ + "direct map access, write test 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_1, -1, 4242), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "direct map access, write test 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 48), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value pointer", +}, +{ + "direct map access, write test 10", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 47), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "direct map access, write test 11", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 48), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value pointer", +}, +{ + "direct map access, write test 12", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, (1<<29)), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "direct value offset of 536870912 is not allowed", +}, +{ + "direct map access, write test 13", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, (1<<29)-1), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value pointer, value_size=48 off=536870911", +}, +{ + "direct map access, write test 14", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 47), + BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46), + BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1, 3 }, + .result = ACCEPT, + .retval = 0xff, +}, +{ + "direct map access, write test 15", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46), + BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46), + BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1, 3 }, + .result = ACCEPT, + .retval = 0xffff, +}, +{ + "direct map access, write test 16", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46), + BPF_LD_MAP_VALUE(BPF_REG_2, 0, 47), + BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1, 3 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=47 size=2", +}, +{ + "direct map access, write test 17", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46), + BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46), + BPF_ST_MEM(BPF_H, BPF_REG_2, 1, 0xffff), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1, 3 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=47 size=2", +}, +{ + "direct map access, write test 18", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0), + BPF_ST_MEM(BPF_H, BPF_REG_1, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map_array_small = { 1 }, + .result = REJECT, + .errstr = "R1 min value is outside of the array range", +}, +{ + "direct map access, write test 19", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map_array_small = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "direct map access, write test 20", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map_array_small = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value pointer", +}, +{ + "direct map access, invalid insn test 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0, 1, 0, 47), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "invalid bpf_ld_imm64 insn", +}, +{ + "direct map access, invalid insn test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 1, 0, 0, 47), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "BPF_LD_IMM64 uses reserved fields", +}, +{ + "direct map access, invalid insn test 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, ~0, 0, 0, 47), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "BPF_LD_IMM64 uses reserved fields", +}, +{ + "direct map access, invalid insn test 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0, ~0, 0, 47), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "invalid bpf_ld_imm64 insn", +}, +{ + "direct map access, invalid insn test 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, ~0, ~0, 0, 47), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "invalid bpf_ld_imm64 insn", +}, +{ + "direct map access, invalid insn test 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, ~0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "BPF_LD_IMM64 uses reserved fields", +}, +{ + "direct map access, invalid insn test 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, ~0, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "invalid bpf_ld_imm64 insn", +}, +{ + "direct map access, invalid insn test 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, ~0, ~0, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "invalid bpf_ld_imm64 insn", +}, +{ + "direct map access, invalid insn test 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 0, 0, 47), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = REJECT, + .errstr = "unrecognized bpf_ld_imm64 insn", +}, diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c new file mode 100644 index 000000000000..ca3b4729df66 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/int_ptr.c @@ -0,0 +1,160 @@ +{ + "ARG_PTR_TO_LONG uninitialized", + .insns = { + /* bpf_strtoul arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* bpf_strtoul arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* bpf_strtoul arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* bpf_strtoul arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + /* bpf_strtoul() */ + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, + .errstr = "invalid indirect read from stack off -16+0 size 8", +}, +{ + "ARG_PTR_TO_LONG half-uninitialized", + .insns = { + /* bpf_strtoul arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* bpf_strtoul arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* bpf_strtoul arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* bpf_strtoul arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + /* bpf_strtoul() */ + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, + .errstr = "invalid indirect read from stack off -16+4 size 8", +}, +{ + "ARG_PTR_TO_LONG misaligned", + .insns = { + /* bpf_strtoul arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* bpf_strtoul arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* bpf_strtoul arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* bpf_strtoul arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -12), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + /* bpf_strtoul() */ + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, + .errstr = "misaligned stack access off (0x0; 0x0)+-20+0 size 8", +}, +{ + "ARG_PTR_TO_LONG size < sizeof(long)", + .insns = { + /* bpf_strtoul arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), + BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* bpf_strtoul arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* bpf_strtoul arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* bpf_strtoul arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 12), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + /* bpf_strtoul() */ + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, + .errstr = "invalid stack type R4 off=-4 access_size=8", +}, +{ + "ARG_PTR_TO_LONG initialized", + .insns = { + /* bpf_strtoul arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* bpf_strtoul arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* bpf_strtoul arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* bpf_strtoul arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + /* bpf_strtoul() */ + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, +}, diff --git a/tools/testing/selftests/bpf/verifier/ld_dw.c b/tools/testing/selftests/bpf/verifier/ld_dw.c index d2c75b889598..0f18e62f0099 100644 --- a/tools/testing/selftests/bpf/verifier/ld_dw.c +++ b/tools/testing/selftests/bpf/verifier/ld_dw.c @@ -34,3 +34,12 @@ .result = ACCEPT, .retval = 5, }, +{ + "ld_dw: xor semi-random 64 bit imms, test 5", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1000000 - 6, +}, diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c new file mode 100644 index 000000000000..95b5d70a1dc1 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c @@ -0,0 +1,34 @@ +{ + "raw_tracepoint_writable: reject variable offset", + .insns = { + /* r6 is our tp buffer */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + + BPF_LD_MAP_FD(BPF_REG_1, 0), + /* move the key (== 0) to r10-8 */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + /* lookup in the map */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + + /* exit clean if null */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + /* shift the buffer pointer to a variable location */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0), + /* clobber whatever's there */ + BPF_MOV64_IMM(BPF_REG_7, 4242), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1, }, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)", +}, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 3ed3593bd8b6..ebcbf154c460 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -1,7 +1,18 @@ { "reference tracking: leak potential reference", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking: leak potential reference to sock_common", + .insns = { + BPF_SK_LOOKUP(skc_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */ BPF_EXIT_INSN(), }, @@ -12,7 +23,7 @@ { "reference tracking: leak potential reference on stack", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), @@ -26,7 +37,7 @@ { "reference tracking: leak potential reference on stack 2", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), @@ -41,7 +52,18 @@ { "reference tracking: zero potential reference", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking: zero potential reference to sock_common", + .insns = { + BPF_SK_LOOKUP(skc_lookup_tcp), BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */ BPF_EXIT_INSN(), }, @@ -52,7 +74,7 @@ { "reference tracking: copy and zero potential references", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */ @@ -65,7 +87,7 @@ { "reference tracking: release reference without check", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* reference in r0 may be NULL */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_2, 0), @@ -77,9 +99,35 @@ .result = REJECT, }, { + "reference tracking: release reference to sock_common without check", + .insns = { + BPF_SK_LOOKUP(skc_lookup_tcp), + /* reference in r0 may be NULL */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=sock_common_or_null expected=sock", + .result = REJECT, +}, +{ "reference tracking: release reference", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: release reference to sock_common", + .insns = { + BPF_SK_LOOKUP(skc_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_EMIT_CALL(BPF_FUNC_sk_release), @@ -91,7 +139,7 @@ { "reference tracking: release reference 2", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), @@ -104,7 +152,7 @@ { "reference tracking: release reference twice", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), @@ -120,7 +168,7 @@ { "reference tracking: release reference twice inside branch", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */ @@ -147,7 +195,7 @@ BPF_EXIT_INSN(), BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, offsetof(struct __sk_buff, mark)), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */ /* Leak reference in R0 */ BPF_EXIT_INSN(), @@ -175,7 +223,7 @@ BPF_EXIT_INSN(), BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, offsetof(struct __sk_buff, mark)), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), @@ -193,7 +241,7 @@ { "reference tracking in call: free reference in subprog", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -211,7 +259,7 @@ { "reference tracking in call: free reference in subprog and outside", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), @@ -241,7 +289,7 @@ /* subprog 1 */ BPF_MOV64_REG(BPF_REG_6, BPF_REG_4), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* spill unchecked sk_ptr into stack of caller */ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), @@ -262,7 +310,7 @@ BPF_EXIT_INSN(), /* subprog 1 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_EXIT_INSN(), /* return sk */ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -291,7 +339,7 @@ BPF_EXIT_INSN(), /* subprog 2 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -324,7 +372,7 @@ BPF_EXIT_INSN(), /* subprog 2 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -334,7 +382,7 @@ "reference tracking: allow LD_ABS", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_EMIT_CALL(BPF_FUNC_sk_release), @@ -350,7 +398,7 @@ "reference tracking: forbid LD_ABS while holding reference", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_LD_ABS(BPF_B, 0), BPF_LD_ABS(BPF_H, 0), BPF_LD_ABS(BPF_W, 0), @@ -367,7 +415,7 @@ "reference tracking: allow LD_IND", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_EMIT_CALL(BPF_FUNC_sk_release), @@ -384,7 +432,7 @@ "reference tracking: forbid LD_IND while holding reference", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_7, 1), BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), @@ -402,7 +450,7 @@ "reference tracking: check reference or tail call", .insns = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* if (sk) bpf_sk_release() */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7), @@ -424,7 +472,7 @@ "reference tracking: release reference then tail call", .insns = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* if (sk) bpf_sk_release() */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), @@ -446,7 +494,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), /* Look up socket and store in REG_6 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* bpf_tail_call() */ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_3, 2), @@ -470,7 +518,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), /* Look up socket and store in REG_6 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* if (!sk) goto end */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), @@ -492,7 +540,7 @@ { "reference tracking: mangle and release sock_or_null", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), @@ -506,7 +554,7 @@ { "reference tracking: mangle and release sock", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), @@ -520,7 +568,7 @@ { "reference tracking: access member", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), @@ -534,7 +582,7 @@ { "reference tracking: write to member", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), @@ -553,7 +601,7 @@ { "reference tracking: invalid 64-bit access of member", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), @@ -568,7 +616,7 @@ { "reference tracking: access after release", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_EMIT_CALL(BPF_FUNC_sk_release), @@ -605,3 +653,171 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, +{ + "reference tracking: use ptr from bpf_tcp_sock() after release", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use ptr from bpf_sk_fullsock() after release", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use ptr from bpf_sk_fullsock(tp) after release", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use sk after bpf_sk_release(tp)", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_listener_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, src_port)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: bpf_sk_release(listen_sk)", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_listener_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "reference has not been acquired before", +}, +{ + /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */ + "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_8, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, diff --git a/tools/testing/selftests/bpf/verifier/scale.c b/tools/testing/selftests/bpf/verifier/scale.c new file mode 100644 index 000000000000..7f868d4802e0 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/scale.c @@ -0,0 +1,18 @@ +{ + "scale: scale test 1", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_scale, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, +{ + "scale: scale test 2", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_scale, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, +}, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index 0ddfdf76aba5..b31cd2cf50d0 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -342,7 +342,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "type=sock_common expected=sock", + .errstr = "reference has not been acquired before", }, { "bpf_sk_release(bpf_sk_fullsock(skb->sk))", @@ -380,5 +380,121 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "type=tcp_sock expected=sock", + .errstr = "reference has not been acquired before", +}, +{ + "sk_storage_get(map, skb->sk, NULL, 0): value == NULL", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_storage_get), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_sk_storage_map = { 11 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "sk_storage_get(map, skb->sk, 1, 1): value == 1", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_storage_get), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_sk_storage_map = { 11 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "R3 type=inv expected=fp", +}, +{ + "sk_storage_get(map, skb->sk, &stack_value, 1): stack_value", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_storage_get), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_sk_storage_map = { 14 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_storage_get), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_sk_storage_map = { 14 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid indirect read from stack", +}, +{ + "bpf_map_lookup_elem(smap, &key)", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_sk_storage_map = { 3 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem", }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index dbaf5be947b2..91bb77c24a2e 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -242,7 +242,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* u64 foo; */ /* void *target = &foo; */ @@ -276,7 +276,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* u64 foo; */ /* void *target = &foo; */ @@ -307,7 +307,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* u64 foo; */ /* void *target = &foo; */ @@ -339,7 +339,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* u64 foo; */ /* void *target = &foo; */ diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index 1e536ff121a5..8504ac937809 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -40,7 +40,35 @@ .prog_type = BPF_PROG_TYPE_LWT_IN, }, { - "indirect variable-offset stack access", + "indirect variable-offset stack access, unbounded", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_3, 28), + /* Fill the top 16 bytes of the stack. */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + bytes_received)), + /* Check the lower bound but don't check the upper one. */ + BPF_JMP_IMM(BPF_JSLT, BPF_REG_4, 0, 4), + /* Point the lower bound to initialized stack. Offset is now in range + * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded. + */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10), + BPF_MOV64_IMM(BPF_REG_5, 8), + /* Dereference it indirectly. */ + BPF_EMIT_CALL(BPF_FUNC_getsockopt), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R4 unbounded indirect variable offset stack access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SOCK_OPS, +}, +{ + "indirect variable-offset stack access, max out of bound", .insns = { /* Fill the top 8 bytes of the stack */ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -60,7 +88,161 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "variable stack read R2", + .errstr = "R2 max value is outside of stack bound", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, +{ + "indirect variable-offset stack access, min out of bound", + .insns = { + /* Fill the top 8 bytes of the stack */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 516), + /* add it to fp. We now have either fp-516 or fp-512, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* dereference it indirectly */ + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 5 }, + .errstr = "R2 min value is outside of stack bound", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, +{ + "indirect variable-offset stack access, max_off+size > max_initialized", + .insns = { + /* Fill only the second from top 8 bytes of the stack. */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + /* Get an unknown value. */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned. */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), + /* Add it to fp. We now have either fp-12 or fp-16, but we don't know + * which. fp-12 size 8 is partially uninitialized stack. + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* Dereference it indirectly. */ + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 5 }, + .errstr = "invalid indirect read from stack var_off", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, +{ + "indirect variable-offset stack access, min_off < min_initialized", + .insns = { + /* Fill only the top 8 bytes of the stack. */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned. */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), + /* Add it to fp. We now have either fp-12 or fp-16, but we don't know + * which. fp-16 size 8 is partially uninitialized stack. + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* Dereference it indirectly. */ + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 5 }, + .errstr = "invalid indirect read from stack var_off", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, +{ + "indirect variable-offset stack access, priv vs unpriv", + .insns = { + /* Fill the top 16 bytes of the stack. */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value. */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned. */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), + /* Add it to fp. We now have either fp-12 or fp-16, we don't know + * which, but either way it points to initialized stack. + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* Dereference it indirectly. */ + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 6 }, + .errstr_unpriv = "R2 stack pointer arithmetic goes out of range, prohibited for !root", + .result_unpriv = REJECT, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "indirect variable-offset stack access, uninitialized", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_3, 28), + /* Fill the top 16 bytes of the stack. */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value. */ + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0), + /* Make it small and 4-byte aligned. */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16), + /* Add it to fp. We now have either fp-12 or fp-16, we don't know + * which, but either way it points to initialized stack. + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10), + BPF_MOV64_IMM(BPF_REG_5, 8), + /* Dereference it indirectly. */ + BPF_EMIT_CALL(BPF_FUNC_getsockopt), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid indirect read from stack var_off", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SOCK_OPS, +}, +{ + "indirect variable-offset stack access, ok", + .insns = { + /* Fill the top 16 bytes of the stack. */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value. */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned. */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), + /* Add it to fp. We now have either fp-12 or fp-16, we don't know + * which, but either way it points to initialized stack. + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* Dereference it indirectly. */ + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 6 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh new file mode 100755 index 000000000000..6d1790b5de7a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -0,0 +1,311 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A test for strict prioritization of traffic in the switch. Run two streams of +# traffic, each through a different ingress port, one tagged with PCP of 1, the +# other with PCP of 2. Both streams converge at one egress port, where they are +# assigned TC of, respectively, 1 and 2, with strict priority configured between +# them. In H3, we expect to see (almost) exclusively the high-priority traffic. +# +# Please see qos_mc_aware.sh for an explanation of why we use mausezahn and +# counters instead of just running iperf3. +# +# +---------------------------+ +-----------------------------+ +# | H1 | | H2 | +# | $h1.111 + | | + $h2.222 | +# | 192.0.2.33/28 | | | | 192.0.2.65/28 | +# | e-qos-map 0:1 | | | | e-qos-map 0:2 | +# | | | | | | +# | $h1 + | | + $h2 | +# +-----------------|---------+ +---------|-------------------+ +# | | +# +-----------------|-------------------------------------|-------------------+ +# | $swp1 + + $swp2 | +# | >1Gbps | | >1Gbps | +# | +---------------|-----------+ +----------|----------------+ | +# | | $swp1.111 + | | + $swp2.222 | | +# | | BR111 | SW | BR222 | | +# | | $swp3.111 + | | + $swp3.222 | | +# | +---------------|-----------+ +----------|----------------+ | +# | \_____________________________________/ | +# | | | +# | + $swp3 | +# | | 1Gbps bottleneck | +# | | ETS: (up n->tc n for n in 0..7) | +# | | strict priority | +# +------------------------------------|--------------------------------------+ +# | +# +--------------------|--------------------+ +# | + $h3 H3 | +# | / \ | +# | / \ | +# | $h3.111 + + $h3.222 | +# | 192.0.2.34/28 192.0.2.66/28 | +# +-----------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ets_strict +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 + + vlan_create $h1 111 v$h1 192.0.2.33/28 + ip link set dev $h1.111 type vlan egress-qos-map 0:1 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 222 v$h2 192.0.2.65/28 + ip link set dev $h2.222 type vlan egress-qos-map 0:2 +} + +h2_destroy() +{ + vlan_destroy $h2 222 + + mtu_restore $h2 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + mtu_set $h3 10000 + + vlan_create $h3 111 v$h3 192.0.2.34/28 + vlan_create $h3 222 v$h3 192.0.2.66/28 +} + +h3_destroy() +{ + vlan_destroy $h3 222 + vlan_destroy $h3 111 + + mtu_restore $h3 + simple_if_fini $h3 +} + +switch_create() +{ + ip link set dev $swp1 up + mtu_set $swp1 10000 + + ip link set dev $swp2 up + mtu_set $swp2 10000 + + # prio n -> TC n, strict scheduling + lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7 + lldptool -T -i $swp3 -V ETS-CFG tsa=$( + )"0:strict,"$( + )"1:strict,"$( + )"2:strict,"$( + )"3:strict,"$( + )"4:strict,"$( + )"5:strict,"$( + )"6:strict,"$( + )"7:strict" + sleep 1 + + ip link set dev $swp3 up + mtu_set $swp3 10000 + ethtool -s $swp3 speed 1000 autoneg off + + vlan_create $swp1 111 + vlan_create $swp2 222 + vlan_create $swp3 111 + vlan_create $swp3 222 + + ip link add name br111 up type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br111 + ip link set dev $swp3.111 master br111 + + ip link add name br222 up type bridge vlan_filtering 0 + ip link set dev $swp2.222 master br222 + ip link set dev $swp3.222 master br222 + + # Make sure that ingress quotas are smaller than egress so that there is + # room for both streams of traffic to be admitted to shared buffer. + devlink_pool_size_thtype_set 0 dynamic 10000000 + devlink_pool_size_thtype_set 4 dynamic 10000000 + + devlink_port_pool_th_set $swp1 0 6 + devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6 + + devlink_port_pool_th_set $swp2 0 6 + devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6 + + devlink_tc_bind_pool_th_set $swp3 1 egress 4 7 + devlink_tc_bind_pool_th_set $swp3 2 egress 4 7 + devlink_port_pool_th_set $swp3 4 7 +} + +switch_destroy() +{ + devlink_port_pool_th_restore $swp3 4 + devlink_tc_bind_pool_th_restore $swp3 2 egress + devlink_tc_bind_pool_th_restore $swp3 1 egress + + devlink_tc_bind_pool_th_restore $swp2 2 ingress + devlink_port_pool_th_restore $swp2 0 + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 0 + + devlink_pool_size_thtype_restore 4 + devlink_pool_size_thtype_restore 0 + + ip link del dev br222 + ip link del dev br111 + + vlan_destroy $swp3 222 + vlan_destroy $swp3 111 + vlan_destroy $swp2 222 + vlan_destroy $swp1 111 + + ethtool -s $swp3 autoneg on + mtu_restore $swp3 + ip link set dev $swp3 down + lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0 + + mtu_restore $swp2 + ip link set dev $swp2 down + + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 " from H1" + ping_test $h2 192.0.2.66 " from H2" +} + +rel() +{ + local old=$1; shift + local new=$1; shift + + bc <<< " + scale=2 + ret = 100 * $new / $old + if (ret > 0) { ret } else { 0 } + " +} + +test_ets_strict() +{ + RET=0 + + # Run high-prio traffic on its own. + start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac + local -a rate_2 + rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2")) + check_err $? "Could not get high enough prio-2 ingress rate" + local rate_2_in=${rate_2[0]} + local rate_2_eg=${rate_2[1]} + stop_traffic # $h2.222 + + # Start low-prio stream. + start_traffic $h1.111 192.0.2.33 192.0.2.34 $h3mac + + local -a rate_1 + rate_1=($(measure_rate $swp1 $h3 rx_octets_prio_1 "prio 1")) + check_err $? "Could not get high enough prio-1 ingress rate" + local rate_1_in=${rate_1[0]} + local rate_1_eg=${rate_1[1]} + + # High-prio and low-prio on their own should have about the same + # throughput. + local rel21=$(rel $rate_1_eg $rate_2_eg) + check_err $(bc <<< "$rel21 < 95") + check_err $(bc <<< "$rel21 > 105") + + # Start the high-prio stream--now both streams run. + start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac + rate_3=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2 w/ 1")) + check_err $? "Could not get high enough prio-2 ingress rate with prio-1" + local rate_3_in=${rate_3[0]} + local rate_3_eg=${rate_3[1]} + stop_traffic # $h2.222 + + stop_traffic # $h1.111 + + # High-prio should have about the same throughput whether or not + # low-prio is in the system. + local rel32=$(rel $rate_2_eg $rate_3_eg) + check_err $(bc <<< "$rel32 < 95") + + log_test "strict priority" + echo "Ingress to switch:" + echo " p1 in rate $(humanize $rate_1_in)" + echo " p2 in rate $(humanize $rate_2_in)" + echo " p2 in rate w/ p1 $(humanize $rate_3_in)" + echo "Egress from switch:" + echo " p1 eg rate $(humanize $rate_1_eg)" + echo " p2 eg rate $(humanize $rate_2_eg) ($rel21% of p1)" + echo " p2 eg rate w/ p1 $(humanize $rate_3_eg) ($rel32% of p2)" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh new file mode 100644 index 000000000000..e80be65799ad --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: GPL-2.0 + +humanize() +{ + local speed=$1; shift + + for unit in bps Kbps Mbps Gbps; do + if (($(echo "$speed < 1024" | bc))); then + break + fi + + speed=$(echo "scale=1; $speed / 1024" | bc) + done + + echo "$speed${unit}" +} + +rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $((8 * (t1 - t0) / interval)) +} + +start_traffic() +{ + local h_in=$1; shift # Where the traffic egresses the host + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + + $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ + -a own -b $dmac -t udp -q & + sleep 1 +} + +stop_traffic() +{ + # Suppress noise from killing mausezahn. + { kill %% && wait %%; } 2>/dev/null +} + +check_rate() +{ + local rate=$1; shift + local min=$1; shift + local what=$1; shift + + if ((rate > min)); then + return 0 + fi + + echo "$what $(humanize $ir) < $(humanize $min)" > /dev/stderr + return 1 +} + +measure_rate() +{ + local sw_in=$1; shift # Where the traffic ingresses the switch + local host_in=$1; shift # Where it ingresses another host + local counter=$1; shift # Counter to use for measurement + local what=$1; shift + + local interval=10 + local i + local ret=0 + + # Dips in performance might cause momentary ingress rate to drop below + # 1Gbps. That wouldn't saturate egress and MC would thus get through, + # seemingly winning bandwidth on account of UC. Demand at least 2Gbps + # average ingress rate to somewhat mitigate this. + local min_ingress=2147483648 + + for i in {5..0}; do + local t0=$(ethtool_stats_get $host_in $counter) + local u0=$(ethtool_stats_get $sw_in $counter) + sleep $interval + local t1=$(ethtool_stats_get $host_in $counter) + local u1=$(ethtool_stats_get $sw_in $counter) + + local ir=$(rate $u0 $u1 $interval) + local er=$(rate $t0 $t1 $interval) + + if check_rate $ir $min_ingress "$what ingress rate"; then + break + fi + + # Fail the test if we can't get the throughput. + if ((i == 0)); then + ret=1 + fi + done + + echo $ir $er + return $ret +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 117f6f35d72f..71231ad2dbfb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -67,6 +67,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding NUM_NETIFS=6 source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh h1_create() { @@ -140,10 +142,28 @@ switch_create() ip link set dev br111 up ip link set dev $swp2.111 master br111 ip link set dev $swp3.111 master br111 + + # Make sure that ingress quotas are smaller than egress so that there is + # room for both streams of traffic to be admitted to shared buffer. + devlink_port_pool_th_set $swp1 0 5 + devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 + + devlink_port_pool_th_set $swp2 0 5 + devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 + + devlink_port_pool_th_set $swp3 4 12 } switch_destroy() { + devlink_port_pool_th_restore $swp3 4 + + devlink_tc_bind_pool_th_restore $swp2 1 ingress + devlink_port_pool_th_restore $swp2 0 + + devlink_tc_bind_pool_th_restore $swp1 0 ingress + devlink_port_pool_th_restore $swp1 0 + ip link del dev br111 ip link del dev br1 @@ -201,107 +221,28 @@ ping_ipv4() ping_test $h2 192.0.2.130 } -humanize() -{ - local speed=$1; shift - - for unit in bps Kbps Mbps Gbps; do - if (($(echo "$speed < 1024" | bc))); then - break - fi - - speed=$(echo "scale=1; $speed / 1024" | bc) - done - - echo "$speed${unit}" -} - -rate() -{ - local t0=$1; shift - local t1=$1; shift - local interval=$1; shift - - echo $((8 * (t1 - t0) / interval)) -} - -check_rate() -{ - local rate=$1; shift - local min=$1; shift - local what=$1; shift - - if ((rate > min)); then - return 0 - fi - - echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr - return 1 -} - -measure_uc_rate() -{ - local what=$1; shift - - local interval=10 - local i - local ret=0 - - # Dips in performance might cause momentary ingress rate to drop below - # 1Gbps. That wouldn't saturate egress and MC would thus get through, - # seemingly winning bandwidth on account of UC. Demand at least 2Gbps - # average ingress rate to somewhat mitigate this. - local min_ingress=2147483648 - - $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ - -a own -b $h3mac -t udp -q & - sleep 1 - - for i in {5..0}; do - local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) - local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) - sleep $interval - local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) - local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) - - local ir=$(rate $u0 $u1 $interval) - local er=$(rate $t0 $t1 $interval) - - if check_rate $ir $min_ingress "$what ingress rate"; then - break - fi - - # Fail the test if we can't get the throughput. - if ((i == 0)); then - ret=1 - fi - done - - # Suppress noise from killing mausezahn. - { kill %% && wait; } 2>/dev/null - - echo $ir $er - exit $ret -} - test_mc_aware() { RET=0 local -a uc_rate - uc_rate=($(measure_uc_rate "UC-only")) + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only")) check_err $? "Could not get high enough UC-only ingress rate" + stop_traffic local ucth1=${uc_rate[1]} - $MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q & + start_traffic $h1 own bc bc local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) local -a uc_rate_2 - uc_rate_2=($(measure_uc_rate "UC+MC")) + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC")) check_err $? "Could not get high enough UC+MC ingress rate" + stop_traffic local ucth2=${uc_rate_2[1]} local d1=$(date +%s) @@ -319,8 +260,7 @@ test_mc_aware() local mc_ir=$(rate $u0 $u1 $interval) local mc_er=$(rate $t0 $t1 $interval) - # Suppress noise from killing mausezahn. - { kill %% && wait; } 2>/dev/null + stop_traffic log_test "UC performace under MC overload" @@ -344,8 +284,7 @@ test_uc_aware() { RET=0 - $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ - -a own -b $h3mac -t udp -q & + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) @@ -375,8 +314,7 @@ test_uc_aware() ((attempts == passes)) check_err $? - # Suppress noise from killing mausezahn. - { kill %% && wait; } 2>/dev/null + stop_traffic log_test "MC performace under UC overload" echo " ingress UC throughput $(humanize ${uc_ir})" diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index c4cf6e6d800e..1c30f302a1e7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -11,6 +11,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" rif_set_addr_test + rif_vrf_set_addr_test rif_inherit_bridge_addr_test rif_non_inherit_bridge_addr_test vlan_interface_deletion_test @@ -26,6 +27,7 @@ ALL_TESTS=" lag_dev_deletion_test vlan_interface_uppers_test bridge_extern_learn_test + neigh_offload_test devlink_reload_test " NUM_NETIFS=2 @@ -98,6 +100,25 @@ rif_set_addr_test() ip link set dev $swp1 addr $swp1_mac } +rif_vrf_set_addr_test() +{ + # Test that it is possible to set an IP address on a VRF upper despite + # its random MAC address. + RET=0 + + ip link add name vrf-test type vrf table 10 + ip link set dev $swp1 master vrf-test + + ip -4 address add 192.0.2.1/24 dev vrf-test + check_err $? "failed to set IPv4 address on VRF" + ip -6 address add 2001:db8:1::1/64 dev vrf-test + check_err $? "failed to set IPv6 address on VRF" + + log_test "RIF - setting IP address on VRF" + + ip link del dev vrf-test +} + rif_inherit_bridge_addr_test() { RET=0 @@ -561,6 +582,31 @@ bridge_extern_learn_test() ip link del dev br0 } +neigh_offload_test() +{ + # Test that IPv4 and IPv6 neighbour entries are marked as offloaded + RET=0 + + ip -4 address add 192.0.2.1/24 dev $swp1 + ip -6 address add 2001:db8:1::1/64 dev $swp1 + + ip -4 neigh add 192.0.2.2 lladdr de:ad:be:ef:13:37 nud perm dev $swp1 + ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \ + dev $swp1 + + ip -4 neigh show dev $swp1 | grep 192.0.2.2 | grep -q offload + check_err $? "ipv4 neigh entry not marked as offloaded when should" + ip -6 neigh show dev $swp1 | grep 2001:db8:1::2 | grep -q offload + check_err $? "ipv6 neigh entry not marked as offloaded when should" + + log_test "neighbour offload indication" + + ip -6 neigh del 2001:db8:1::2 dev $swp1 + ip -4 neigh del 192.0.2.2 dev $swp1 + ip -6 address del 2001:db8:1::1/64 dev $swp1 + ip -4 address del 192.0.2.1/24 dev $swp1 +} + devlink_reload_test() { # Test that after executing all the above configuration tests, a diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index a372b2f60874..fb850e0ec837 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -12,6 +12,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 +source $lib_dir/lib.sh source $lib_dir/tc_common.sh source $lib_dir/devlink_lib.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh index b1fe960e398a..6f2683cbc7d5 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh @@ -1,7 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +lib_dir=$(dirname $0)/../../../../net/forwarding + NUM_NETIFS=1 +source $lib_dir/lib.sh source devlink_lib_spectrum.sh setup_prepare() diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index e7ffc79561b7..43ba1b438f6d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -1,8 +1,11 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +lib_dir=$(dirname $0)/../../../../net/forwarding + NUM_NETIFS=6 -source ../../../../net/forwarding/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh source devlink_lib_spectrum.sh current_test="" diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 3c1f4bdf9000..f8588cca2bef 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,3 +1,5 @@ +include ../../../../scripts/Kbuild.include + all: top_srcdir = ../../../.. @@ -17,6 +19,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test +TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test @@ -29,8 +32,12 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. -LDFLAGS += -pthread +CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. + +no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +LDFLAGS += -pthread $(no-pie-option) # After inclusion, $(OUTPUT) is defined and # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index a84785b02557..07b71ad9734a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -102,6 +102,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e2884c2b81ff..6063d5b2f356 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -778,6 +778,33 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_BASE (0xfffff<<12) +#define APIC_BASE_MSR 0x800 +#define X2APIC_ENABLE (1UL << 10) +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 + #define MSR_IA32_TSCDEADLINE 0x000006e0 #define MSR_IA32_UCODE_WRITE 0x00000079 diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b52cfdefecbf..4ca96b228e46 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -91,6 +91,11 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) if (vm->kvm_fd < 0) exit(KSFT_SKIP); + if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { + fprintf(stderr, "immediate_exit not available, skipping test\n"); + exit(KSFT_SKIP); + } + vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type); TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " "rc: %i errno: %i", vm->fd, errno); @@ -1121,6 +1126,22 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) return rc; } +void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + vcpu->state->immediate_exit = 1; + ret = ioctl(vcpu->fd, KVM_RUN, NULL); + vcpu->state->immediate_exit = 0; + + TEST_ASSERT(ret == -1 && errno == EINTR, + "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", + ret, errno); +} + /* * VM VCPU Set MP State * diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index f28127f4a3af..dc7fae9fa424 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1030,6 +1030,14 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) nested_size, sizeof(state->nested_)); } + /* + * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees + * guest state is consistent only after userspace re-enters the + * kernel with KVM_RUN. Complete IO prior to migrating state + * to a new VM. + */ + vcpu_run_complete_io(vm, vcpuid); + nmsrs = kvm_get_num_msrs(vm); list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); list->nmsrs = nmsrs; @@ -1093,12 +1101,6 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s struct vcpu *vcpu = vcpu_find(vm, vcpuid); int r; - if (state->nested.size) { - r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", - r); - } - r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", r); @@ -1130,4 +1132,10 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", r); + + if (state->nested.size) { + r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", + r); + } } diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index d503a51fad30..7c2c4d4055a8 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -87,22 +87,25 @@ int main(int argc, char *argv[]) while (1) { rc = _vcpu_run(vm, VCPU_ID); - if (run->exit_reason == KVM_EXIT_IO) { - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_SYNC: - /* emulate hypervisor clearing CR4.OSXSAVE */ - vcpu_sregs_get(vm, VCPU_ID, &sregs); - sregs.cr4 &= ~X86_CR4_OSXSAVE; - vcpu_sregs_set(vm, VCPU_ID, &sregs); - break; - case UCALL_ABORT: - TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); - break; - case UCALL_DONE: - goto done; - default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); - } + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + /* emulate hypervisor clearing CR4.OSXSAVE */ + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.cr4 &= ~X86_CR4_OSXSAVE; + vcpu_sregs_set(vm, VCPU_ID, &sregs); + break; + case UCALL_ABORT: + TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); + break; + case UCALL_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); } } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index c49c2a28b0eb..36669684eca5 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -123,8 +123,6 @@ int main(int argc, char *argv[]) stage, run->exit_reason, exit_reason_str(run->exit_reason)); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], @@ -144,6 +142,9 @@ int main(int argc, char *argv[]) stage, (ulong)uc.args[1]); state = vcpu_save_state(vm, VCPU_ID); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + kvm_vm_release(vm); /* Restore state in a new VM. */ diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c new file mode 100644 index 000000000000..fb8086964d83 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018, Red Hat, Inc. + * + * Tests for SMM. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" + +#include "vmx.h" + +#define VCPU_ID 1 + +#define PAGE_SIZE 4096 + +#define SMRAM_SIZE 65536 +#define SMRAM_MEMSLOT ((1 << 16) | 1) +#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) +#define SMRAM_GPA 0x1000000 +#define SMRAM_STAGE 0xfe + +#define STR(x) #x +#define XSTR(s) STR(s) + +#define SYNC_PORT 0xe +#define DONE 0xff + +/* + * This is compiled as normal 64-bit code, however, SMI handler is executed + * in real-address mode. To stay simple we're limiting ourselves to a mode + * independent subset of asm here. + * SMI handler always report back fixed stage SMRAM_STAGE. + */ +uint8_t smi_handler[] = { + 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ + 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ + 0x0f, 0xaa, /* rsm */ +}; + +void sync_with_host(uint64_t phase) +{ + asm volatile("in $" XSTR(SYNC_PORT)", %%al \n" + : : "a" (phase)); +} + +void self_smi(void) +{ + wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4), + APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); + + sync_with_host(1); + + wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE); + + sync_with_host(2); + + self_smi(); + + sync_with_host(4); + + if (vmx_pages) { + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + + sync_with_host(5); + + self_smi(); + + sync_with_host(7); + } + + sync_with_host(DONE); +} + +int main(int argc, char *argv[]) +{ + struct vmx_pages *vmx_pages = NULL; + vm_vaddr_t vmx_pages_gva = 0; + + struct kvm_regs regs; + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_x86_state *state; + int stage, stage_reported; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + run = vcpu_state(vm, VCPU_ID); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, + SMRAM_MEMSLOT, SMRAM_PAGES, 0); + TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT) + == SMRAM_GPA, "could not allocate guest physical addresses?"); + + memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE); + memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler, + sizeof(smi_handler)); + + vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA); + + if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { + vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + } else { + printf("will skip SMM test with VMX enabled\n"); + vcpu_args_set(vm, VCPU_ID, 1, 0); + } + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + memset(®s, 0, sizeof(regs)); + vcpu_regs_get(vm, VCPU_ID, ®s); + + stage_reported = regs.rax & 0xff; + + if (stage_reported == DONE) + goto done; + + TEST_ASSERT(stage_reported == stage || + stage_reported == SMRAM_STAGE, + "Unexpected stage: #%x, got %x", + stage, stage_reported); + + state = vcpu_save_state(vm, VCPU_ID); + kvm_vm_release(vm); + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID, 0, 0); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_load_state(vm, VCPU_ID, state); + run = vcpu_state(vm, VCPU_ID); + free(state); + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 4b3f556265f1..e0a3c0204b7c 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -156,8 +156,6 @@ int main(int argc, char *argv[]) stage, run->exit_reason, exit_reason_str(run->exit_reason)); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], @@ -177,6 +175,9 @@ int main(int argc, char *argv[]) stage, (ulong)uc.args[1]); state = vcpu_save_state(vm, VCPU_ID); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + kvm_vm_release(vm); /* Restore state in a new VM. */ diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index e9c860d00416..474040448601 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -7,9 +7,7 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_VETH=y -CONFIG_INET_XFRM_MODE_TUNNEL=y CONFIG_NET_IPVTI=y -CONFIG_INET6_XFRM_MODE_TUNNEL=y CONFIG_IPV6_VTI=y CONFIG_DUMMY=y CONFIG_BRIDGE=y diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 1080ff55a788..9457aaeae092 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,7 +9,8 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics" +TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw" + VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no @@ -48,6 +49,7 @@ setup() { set -e ip netns add ns1 + ip netns set ns1 auto $IP link set dev lo up ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 @@ -605,6 +607,39 @@ run_cmd() return $rc } +check_expected() +{ + local out="$1" + local expected="$2" + local rc=0 + + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route6() @@ -652,31 +687,7 @@ check_route6() pfx=$1 out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy '\'; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" } route_cleanup() @@ -698,6 +709,7 @@ route_setup() set -e ip netns add ns2 + ip netns set ns2 auto ip -netns ns2 link set dev lo up ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1 ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 @@ -725,7 +737,7 @@ route_setup() ip -netns ns2 addr add 172.16.103.2/24 dev veth4 ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 - set +ex + set +e } # assumption is that basic add of a single path route works @@ -960,7 +972,8 @@ ipv6_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route6 "" + out=$($IP -6 ro ls match 2001:db8:104::/64) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down" @@ -1091,38 +1104,13 @@ check_route() local pfx local expected="$1" local out - local rc=0 set -- $expected pfx=$1 [ "${pfx}" = "unreachable" ] && pfx=$2 out=$($IP ro ls match ${pfx}) - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy '\'; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" } # assumption is that basic add of a single path route works @@ -1387,7 +1375,8 @@ ipv4_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route "" + out=$($IP ro ls match 172.16.104.0/24) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down" @@ -1442,6 +1431,70 @@ ipv4_route_metrics_test() route_cleanup } +ipv4_route_v6_gw_test() +{ + local rc + + echo + echo "IPv4 route with IPv6 gateway tests" + + route_setup + sleep 2 + + # + # single path route + # + run_cmd "$IP ro add 172.16.104.0/24 via inet6 2001:db8:101::2" + rc=$? + log_test $rc 0 "Single path route with IPv6 gateway" + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1" + fi + + run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1" + log_test $rc 0 "Single path route with IPv6 gateway - ping" + + run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2" + rc=$? + log_test $rc 0 "Single path route delete" + if [ $rc -eq 0 ]; then + check_route "172.16.112.0/24" + fi + + # + # multipath - v6 then v4 + # + run_cmd "$IP ro add 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3" + rc=$? + log_test $rc 0 "Multipath route add - v6 nexthop then v4" + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + fi + + run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1" + log_test $? 2 " Multipath route delete - nexthops in wrong order" + + run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3" + log_test $? 0 " Multipath route delete exact match" + + # + # multipath - v4 then v6 + # + run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1" + rc=$? + log_test $rc 0 "Multipath route add - v4 nexthop then v6" + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 weight 1 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1" + fi + + run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3" + log_test $? 2 " Multipath route delete - nexthops in wrong order" + + run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1" + log_test $? 0 " Multipath route delete exact match" + + route_cleanup +} ################################################################################ # usage @@ -1511,6 +1564,7 @@ do ipv4_addr_metric) ipv4_addr_metric_test;; ipv6_route_metrics) ipv6_route_metrics_test;; ipv4_route_metrics) ipv4_route_metrics_test;; + ipv4_route_v6_gw) ipv4_route_v6_gw_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh new file mode 100755 index 000000000000..88d2472ba151 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="reportleave_test" +NUM_NETIFS=4 +CHECK_TC="yes" +TEST_GROUP="239.10.10.10" +TEST_GROUP_MAC="01:00:5e:0a:0a:0a" +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + # Always cleanup the mcast group + ip address del dev $h2 $TEST_GROUP/32 2>&1 1>/dev/null + + h2_destroy + h1_destroy + + vrf_cleanup +} + +# return 0 if the packet wasn't seen on host2_if or 1 if it was +mcast_packet_test() +{ + local mac=$1 + local ip=$2 + local host1_if=$3 + local host2_if=$4 + local seen=0 + + # Add an ACL on `host2_if` which will tell us whether the packet + # was received by it or not. + tc qdisc add dev $host2_if ingress + tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q + sleep 1 + + tc -j -s filter show dev $host2_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + if [[ $? -eq 0 ]]; then + seen=1 + fi + + tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower + tc qdisc del dev $host2_if ingress + + return $seen +} + +reportleave_test() +{ + RET=0 + ip address add dev $h2 $TEST_GROUP/32 autojoin + check_err $? "Could not join $TEST_GROUP" + + sleep 5 + bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null + check_err $? "Report didn't create mdb entry for $TEST_GROUP" + + mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" + + log_test "IGMP report $TEST_GROUP" + + RET=0 + bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null + check_err $? "mdb entry for $TEST_GROUP is missing" + + ip address del dev $h2 $TEST_GROUP/32 + check_err $? "Could not leave $TEST_GROUP" + + sleep 5 + bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null + check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP" + + mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry" + + log_test "IGMP leave $TEST_GROUP" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 57cf8914910d..8553a67a2322 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -2,21 +2,10 @@ # SPDX-License-Identifier: GPL-2.0 ############################################################################## -# Source library - -relative_path="${BASH_SOURCE%/*}" -if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then - relative_path="." -fi - -source "$relative_path/lib.sh" - -############################################################################## # Defines -DEVLINK_DEV=$(devlink port show | grep "${NETIFS[p1]}" | \ - grep -v "${NETIFS[p1]}[0-9]" | cut -d" " -f1 | \ - rev | cut -d"/" -f2- | rev) +DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \ + | jq -r '.port | keys[]' | cut -d/ -f-2) if [ -z "$DEVLINK_DEV" ]; then echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it" exit 1 @@ -106,3 +95,98 @@ devlink_reload() grep -c "size_new") check_err $still_pending "Failed reload - There are still unset sizes" } + +declare -A DEVLINK_ORIG + +devlink_port_pool_threshold() +{ + local port=$1; shift + local pool=$1; shift + + devlink sb port pool show $port pool $pool -j \ + | jq '.port_pool."'"$port"'"[].threshold' +} + +devlink_port_pool_th_set() +{ + local port=$1; shift + local pool=$1; shift + local th=$1; shift + local key="port_pool($port,$pool).threshold" + + DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool) + devlink sb port pool set $port pool $pool th $th +} + +devlink_port_pool_th_restore() +{ + local port=$1; shift + local pool=$1; shift + local key="port_pool($port,$pool).threshold" + + devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]} +} + +devlink_pool_size_thtype() +{ + local pool=$1; shift + + devlink sb pool show "$DEVLINK_DEV" pool $pool -j \ + | jq -r '.pool[][] | (.size, .thtype)' +} + +devlink_pool_size_thtype_set() +{ + local pool=$1; shift + local thtype=$1; shift + local size=$1; shift + local key="pool($pool).size_thtype" + + DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) + devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype +} + +devlink_pool_size_thtype_restore() +{ + local pool=$1; shift + local key="pool($pool).size_thtype" + local -a orig=(${DEVLINK_ORIG[$key]}) + + devlink sb pool set "$DEVLINK_DEV" pool $pool \ + size ${orig[0]} thtype ${orig[1]} +} + +devlink_tc_bind_pool_th() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + + devlink sb tc bind show $port tc $tc type $dir -j \ + | jq -r '.tc_bind[][] | (.pool, .threshold)' +} + +devlink_tc_bind_pool_th_set() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + local pool=$1; shift + local th=$1; shift + local key="tc_bind($port,$dir,$tc).pool_th" + + DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) + devlink sb tc bind set $port tc $tc type $dir pool $pool th $th +} + +devlink_tc_bind_pool_th_restore() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + local key="tc_bind($port,$dir,$tc).pool_th" + local -a orig=(${DEVLINK_ORIG[$key]}) + + devlink sb tc bind set $port tc $tc type $dir \ + pool ${orig[0]} th ${orig[1]} +} diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh index 109e6d785169..57e90c873a2c 100755 --- a/tools/testing/selftests/net/forwarding/router_multicast.sh +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -28,7 +28,7 @@ # +------------------+ +------------------+ # -ALL_TESTS="mcast_v4 mcast_v6" +ALL_TESTS="mcast_v4 mcast_v6 rpf_v4 rpf_v6" NUM_NETIFS=6 source lib.sh source tc_common.sh @@ -46,10 +46,14 @@ h1_create() ip route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::1 ip route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::1 + + tc qdisc add dev $h1 ingress } h1_destroy() { + tc qdisc del dev $h1 ingress + ip route del 2001:db8:3::/64 vrf v$h1 ip route del 2001:db8:2::/64 vrf v$h1 @@ -124,10 +128,14 @@ router_create() ip address add 2001:db8:1::1/64 dev $rp1 ip address add 2001:db8:2::1/64 dev $rp2 ip address add 2001:db8:3::1/64 dev $rp3 + + tc qdisc add dev $rp3 ingress } router_destroy() { + tc qdisc del dev $rp3 ingress + ip address del 2001:db8:3::1/64 dev $rp3 ip address del 2001:db8:2::1/64 dev $rp2 ip address del 2001:db8:1::1/64 dev $rp1 @@ -301,6 +309,103 @@ mcast_v6() log_test "mcast IPv6" } +rpf_v4() +{ + # Add a multicast route from first router port to the other two. Send + # matching packets and test that both hosts receive them. Then, send + # the same packets via the third router port and test that they do not + # reach any host due to RPF check. A filter with 'skip_hw' is added to + # test that devices capable of multicast routing offload trap those + # packets. The filter is essentialy a NOP in other scenarios. + + RET=0 + + tc filter add dev $h1 ingress protocol ip pref 1 handle 1 flower \ + dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h2 ingress protocol ip pref 1 handle 1 flower \ + dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h3 ingress protocol ip pref 1 handle 1 flower \ + dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop + tc filter add dev $rp3 ingress protocol ip pref 1 handle 1 flower \ + skip_hw dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action pass + + create_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + $MZ $h1 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 1 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 1 5 + check_err $? "Multicast not received on second host" + + $MZ $h3 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h1 ingress" 1 0 + check_err $? "Multicast received on first host when should not" + tc_check_packets "dev $h2 ingress" 1 5 + check_err $? "Multicast received on second host when should not" + tc_check_packets "dev $rp3 ingress" 1 5 + check_err $? "Packets not trapped due to RPF check" + + delete_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + tc filter del dev $rp3 ingress protocol ip pref 1 handle 1 flower + tc filter del dev $h3 ingress protocol ip pref 1 handle 1 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 1 flower + tc filter del dev $h1 ingress protocol ip pref 1 handle 1 flower + + log_test "RPF IPv4" +} + +rpf_v6() +{ + RET=0 + + tc filter add dev $h1 ingress protocol ipv6 pref 1 handle 1 flower \ + dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 1 flower \ + dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 1 flower \ + dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop + tc filter add dev $rp3 ingress protocol ipv6 pref 1 handle 1 flower \ + skip_hw dst_ip ff0e::3 ip_proto udp dst_port 12345 action pass + + create_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + $MZ $h1 -6 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \ + -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 1 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 1 5 + check_err $? "Multicast not received on second host" + + $MZ $h3 -6 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \ + -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h1 ingress" 1 0 + check_err $? "Multicast received on first host when should not" + tc_check_packets "dev $h2 ingress" 1 5 + check_err $? "Multicast received on second host when should not" + tc_check_packets "dev $rp3 ingress" 1 5 + check_err $? "Packets not trapped due to RPF check" + + delete_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + tc filter del dev $rp3 ingress protocol ipv6 pref 1 handle 1 flower + tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 1 flower + tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 1 flower + tc filter del dev $h1 ingress protocol ipv6 pref 1 handle 1 flower + + log_test "RPF IPv6" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 20d1077e5a3d..29bcfa84aec7 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ - match_src_ip_test match_ip_flags_test" + match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -219,6 +219,63 @@ match_ip_flags_test() log_test "ip_flags match ($tcflags)" } +match_pcp_test() +{ + RET=0 + + vlan_create $h2 85 v$h2 192.0.2.11/24 + + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 101 \ + flower vlan_prio 6 $tcflags dst_mac $h2mac action drop + tc filter add dev $h2 ingress protocol 802.1q pref 2 handle 102 \ + flower vlan_prio 7 $tcflags dst_mac $h2mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -B 192.0.2.11 -Q 7:85 -t ip -q + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -B 192.0.2.11 -Q 0:85 -t ip -q + + tc_check_packets "dev $h2 ingress" 101 0 + check_err $? "Matched on specified PCP when should not" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on specified PCP" + + tc filter del dev $h2 ingress protocol 802.1q pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 101 flower + + vlan_destroy $h2 85 + + log_test "PCP match ($tcflags)" +} + +match_vlan_test() +{ + RET=0 + + vlan_create $h2 85 v$h2 192.0.2.11/24 + vlan_create $h2 75 v$h2 192.0.2.10/24 + + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 101 \ + flower vlan_id 75 $tcflags action drop + tc filter add dev $h2 ingress protocol 802.1q pref 2 handle 102 \ + flower vlan_id 85 $tcflags action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -B 192.0.2.11 -Q 0:85 -t ip -q + + tc_check_packets "dev $h2 ingress" 101 0 + check_err $? "Matched on specified VLAN when should not" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on specified VLAN" + + tc filter del dev $h2 ingress protocol 802.1q pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 101 flower + + vlan_destroy $h2 75 + vlan_destroy $h2 85 + + log_test "VLAN match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh b/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh new file mode 100755 index 000000000000..45378905cb97 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + vlan_modify_ingress + vlan_modify_egress +" + +NUM_NETIFS=4 +CHECK_TC="yes" +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 85 v$h1 192.0.2.17/28 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 85 + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + vlan_create $h2 65 v$h2 192.0.2.18/28 2001:db8:2::2/64 +} + +h2_destroy() +{ + vlan_destroy $h2 65 + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + bridge vlan add dev $swp1 vid 85 + bridge vlan add dev $swp2 vid 65 + + bridge vlan add dev $swp2 vid 85 + bridge vlan add dev $swp1 vid 65 + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + bridge vlan del vid 65 dev $swp1 + bridge vlan del vid 85 dev $swp2 + + bridge vlan del vid 65 dev $swp2 + bridge vlan del vid 85 dev $swp1 + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +vlan_modify_ingress() +{ + RET=0 + + ping_do $h1.85 192.0.2.18 + check_fail $? "ping between two different vlans passed when should not" + + ping6_do $h1.85 2001:db8:2::2 + check_fail $? "ping6 between two different vlans passed when should not" + + tc filter add dev $swp1 ingress protocol all pref 1 handle 1 \ + flower action vlan modify id 65 + tc filter add dev $swp2 ingress protocol all pref 1 handle 1 \ + flower action vlan modify id 85 + + ping_do $h1.85 192.0.2.18 + check_err $? "ping between two different vlans failed when should not" + + ping6_do $h1.85 2001:db8:2::2 + check_err $? "ping6 between two different vlans failed when should not" + + log_test "VLAN modify at ingress" + + tc filter del dev $swp2 ingress protocol all pref 1 handle 1 flower + tc filter del dev $swp1 ingress protocol all pref 1 handle 1 flower +} + +vlan_modify_egress() +{ + RET=0 + + ping_do $h1.85 192.0.2.18 + check_fail $? "ping between two different vlans passed when should not" + + ping6_do $h1.85 2001:db8:2::2 + check_fail $? "ping6 between two different vlans passed when should not" + + tc filter add dev $swp1 egress protocol all pref 1 handle 1 \ + flower action vlan modify id 85 + tc filter add dev $swp2 egress protocol all pref 1 handle 1 \ + flower action vlan modify id 65 + + ping_do $h1.85 192.0.2.18 + check_err $? "ping between two different vlans failed when should not" + + ping6_do $h1.85 2001:db8:2::2 + check_err $? "ping6 between two different vlans failed when should not" + + log_test "VLAN modify at egress" + + tc filter del dev $swp2 egress protocol all pref 1 handle 1 flower + tc filter del dev $swp1 egress protocol all pref 1 handle 1 flower +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 912b2dc50be3..524b15dabb3c 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -116,6 +116,10 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +PAUSE_ON_FAIL=no +VERBOSE=0 +TRACING=0 + # Some systems don't have a ping6 binary anymore which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) @@ -222,6 +226,23 @@ err_flush() { err_buf= } +run_cmd() { + cmd="$*" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: $cmd\n" + fi + + out="$($cmd 2>&1)" + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + echo + fi + + return $rc +} + # Find the auto-generated name for this namespace nsname() { eval echo \$NS_$1 @@ -258,22 +279,22 @@ setup_fou_or_gue() { fi fi - ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 - ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 + run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 + run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 - ${ns_b} ip fou add port 5556 ipproto ${ipproto} - ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 + run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto} + run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 if [ "${inner}" = "4" ]; then - ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a - ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b + run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a + run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b else - ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a - ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b + run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a + run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b fi - ${ns_a} ip link set ${encap}_a up - ${ns_b} ip link set ${encap}_b up + run_cmd ${ns_a} ip link set ${encap}_a up + run_cmd ${ns_b} ip link set ${encap}_b up } setup_fou44() { @@ -319,17 +340,17 @@ setup_namespaces() { } setup_veth() { - ${ns_a} ip link add veth_a type veth peer name veth_b || return 1 - ${ns_a} ip link set veth_b netns ${NS_B} + run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1 + run_cmd ${ns_a} ip link set veth_b netns ${NS_B} - ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a - ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b + run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a + run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b - ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a - ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b + run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a + run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b - ${ns_a} ip link set veth_a up - ${ns_b} ip link set veth_b up + run_cmd ${ns_a} ip link set veth_a up + run_cmd ${ns_b} ip link set veth_b up } setup_vti() { @@ -342,14 +363,14 @@ setup_vti() { [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti" - ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1 - ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10 + run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1 + run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10 - ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a - ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b + run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a + run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b - ${ns_a} ip link set vti${proto}_a up - ${ns_b} ip link set vti${proto}_b up + run_cmd ${ns_a} ip link set vti${proto}_a up + run_cmd ${ns_b} ip link set vti${proto}_b up } setup_vti4() { @@ -375,17 +396,17 @@ setup_vxlan_or_geneve() { opts_b="" fi - ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1 - ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} + run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1 + run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} - ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a - ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b + run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a + run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b - ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a - ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b + run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a + run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b - ${ns_a} ip link set ${type}_a up - ${ns_b} ip link set ${type}_b up + run_cmd ${ns_a} ip link set ${type}_a up + run_cmd ${ns_b} ip link set ${type}_b up } setup_geneve4() { @@ -409,15 +430,15 @@ setup_xfrm() { veth_a_addr="${2}" veth_b_addr="${3}" - ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 - ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel - ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel + run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" || return 1 + run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" + run_cmd "${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel" + run_cmd "${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel" - ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel - ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel + run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" + run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" + run_cmd "${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel" + run_cmd "${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel" } setup_xfrm4() { @@ -481,7 +502,7 @@ setup() { } trace() { - [ $tracing -eq 0 ] && return + [ $TRACING -eq 0 ] && return for arg do [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue @@ -597,8 +618,8 @@ test_pmtu_ipvX() { mtu "${ns_b}" veth_B-R2 1500 # Create route exceptions - ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} > /dev/null - ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} > /dev/null + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} # Check that exceptions have been created with the correct PMTU pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" @@ -630,7 +651,7 @@ test_pmtu_ipvX() { # Decrease remote MTU on path via R2, get new exception mtu "${ns_r2}" veth_R2-B 400 mtu "${ns_b}" veth_B-R2 400 - ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 @@ -647,7 +668,7 @@ test_pmtu_ipvX() { check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1 # Get new exception - ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 } @@ -696,7 +717,7 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000)) mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) - ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} # Check that exception was created pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" @@ -776,7 +797,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() { mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) - ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} # Check that exception was created pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" @@ -834,13 +855,13 @@ test_pmtu_vti4_exception() { # Send DF packet without exceeding link layer MTU, check that no # exception is created - ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 # Now exceed link layer MTU by one byte, check that exception is created # with the right PMTU value - ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" } @@ -856,7 +877,7 @@ test_pmtu_vti6_exception() { mtu "${ns_b}" veth_b 4000 mtu "${ns_a}" vti6_a 5000 mtu "${ns_b}" vti6_b 5000 - ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} > /dev/null + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} # Check that exception was created pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" @@ -902,9 +923,9 @@ test_pmtu_vti6_default_mtu() { test_pmtu_vti4_link_add_mtu() { setup namespaces || return 2 - ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 + run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 [ $? -ne 0 ] && err " vti not supported" && return 2 - ${ns_a} ip link del vti4_a + run_cmd ${ns_a} ip link del vti4_a fail=0 @@ -912,7 +933,7 @@ test_pmtu_vti4_link_add_mtu() { max=$((65535 - 20)) # Check invalid values first for v in $((min - 1)) $((max + 1)); do - ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null + run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 # This can fail, or MTU can be adjusted to a proper value [ $? -ne 0 ] && continue mtu="$(link_get_mtu "${ns_a}" vti4_a)" @@ -920,14 +941,14 @@ test_pmtu_vti4_link_add_mtu() { err " vti tunnel created with invalid MTU ${mtu}" fail=1 fi - ${ns_a} ip link del vti4_a + run_cmd ${ns_a} ip link del vti4_a done # Now check valid values for v in ${min} 1300 ${max}; do - ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 + run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 mtu="$(link_get_mtu "${ns_a}" vti4_a)" - ${ns_a} ip link del vti4_a + run_cmd ${ns_a} ip link del vti4_a if [ "${mtu}" != "${v}" ]; then err " vti MTU ${mtu} doesn't match configured value ${v}" fail=1 @@ -940,9 +961,9 @@ test_pmtu_vti4_link_add_mtu() { test_pmtu_vti6_link_add_mtu() { setup namespaces || return 2 - ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 + run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 [ $? -ne 0 ] && err " vti6 not supported" && return 2 - ${ns_a} ip link del vti6_a + run_cmd ${ns_a} ip link del vti6_a fail=0 @@ -950,7 +971,7 @@ test_pmtu_vti6_link_add_mtu() { max=$((65535 - 40)) # Check invalid values first for v in $((min - 1)) $((max + 1)); do - ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null + run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 # This can fail, or MTU can be adjusted to a proper value [ $? -ne 0 ] && continue mtu="$(link_get_mtu "${ns_a}" vti6_a)" @@ -958,14 +979,14 @@ test_pmtu_vti6_link_add_mtu() { err " vti6 tunnel created with invalid MTU ${v}" fail=1 fi - ${ns_a} ip link del vti6_a + run_cmd ${ns_a} ip link del vti6_a done # Now check valid values for v in 68 1280 1300 $((65535 - 40)); do - ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 + run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 mtu="$(link_get_mtu "${ns_a}" vti6_a)" - ${ns_a} ip link del vti6_a + run_cmd ${ns_a} ip link del vti6_a if [ "${mtu}" != "${v}" ]; then err " vti6 MTU ${mtu} doesn't match configured value ${v}" fail=1 @@ -978,19 +999,19 @@ test_pmtu_vti6_link_add_mtu() { test_pmtu_vti6_link_change_mtu() { setup namespaces || return 2 - ${ns_a} ip link add dummy0 mtu 1500 type dummy + run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy [ $? -ne 0 ] && err " dummy not supported" && return 2 - ${ns_a} ip link add dummy1 mtu 3000 type dummy - ${ns_a} ip link set dummy0 up - ${ns_a} ip link set dummy1 up + run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy + run_cmd ${ns_a} ip link set dummy0 up + run_cmd ${ns_a} ip link set dummy1 up - ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0 - ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1 + run_cmd ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0 + run_cmd ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1 fail=0 # Create vti6 interface bound to device, passing MTU, check it - ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr} + run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr} mtu="$(link_get_mtu "${ns_a}" vti6_a)" if [ ${mtu} -ne 1300 ]; then err " vti6 MTU ${mtu} doesn't match configured value 1300" @@ -999,7 +1020,7 @@ test_pmtu_vti6_link_change_mtu() { # Move to another device with different MTU, without passing MTU, check # MTU is adjusted - ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr} + run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr} mtu="$(link_get_mtu "${ns_a}" vti6_a)" if [ ${mtu} -ne $((3000 - 40)) ]; then err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length" @@ -1007,7 +1028,7 @@ test_pmtu_vti6_link_change_mtu() { fi # Move it back, passing MTU, check MTU is not overridden - ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr} + run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr} mtu="$(link_get_mtu "${ns_a}" vti6_a)" if [ ${mtu} -ne 1280 ]; then err " vti6 MTU ${mtu} doesn't match configured value 1280" @@ -1052,7 +1073,7 @@ test_cleanup_vxlanX_exception() { # Fill exception cache for multiple CPUs (2) # we can always use inner IPv4 for that for cpu in ${cpu_list}; do - taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} > /dev/null + run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} done ${ns_a} ip link del dev veth_A-R1 & @@ -1084,29 +1105,33 @@ usage() { exit 1 } +################################################################################ +# exitcode=0 desc=0 + +while getopts :ptv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=1;; + t) if which tcpdump > /dev/null 2>&1; then + TRACING=1 + else + echo "=== tcpdump not available, tracing disabled" + fi + ;; + *) usage;; + esac +done +shift $(($OPTIND-1)) + IFS=" " -tracing=0 for arg do - if [ "${arg}" != "${arg#--*}" ]; then - opt="${arg#--}" - if [ "${opt}" = "trace" ]; then - if which tcpdump > /dev/null 2>&1; then - tracing=1 - else - echo "=== tcpdump not available, tracing disabled" - fi - else - usage - fi - else - # Check first that all requested tests are available before - # running any - command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; } - fi + # Check first that all requested tests are available before running any + command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; } done trap cleanup EXIT @@ -1124,6 +1149,11 @@ for t in ${tests}; do ( unset IFS + + if [ "$VERBOSE" = "1" ]; then + printf "\n##########################################################################\n\n" + fi + eval test_${name} ret=$? cleanup @@ -1132,6 +1162,11 @@ for t in ${tests}; do printf "TEST: %-60s [ OK ]\n" "${t}" elif [ $ret -eq 1 ]; then printf "TEST: %-60s [FAIL]\n" "${t}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "Pausing. Hit enter to continue" + read a + fi err_flush exit 1 elif [ $ret -eq 2 ]; then diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index b447803f3f8a..b25c9fe019d2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -696,9 +696,9 @@ kci_test_ipsec_offload() algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128" srcip=192.168.123.3 dstip=192.168.123.4 - dev=simx1 - sysfsd=/sys/kernel/debug/netdevsim/$dev + sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ sysfsf=$sysfsd/ipsec + sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ # setup netdevsim since dummydev doesn't have offload support modprobe netdevsim @@ -708,7 +708,11 @@ kci_test_ipsec_offload() return 1 fi - ip link add $dev type netdevsim + echo "0" > /sys/bus/netdevsim/new_device + while [ ! -d $sysfsnet ] ; do :; done + udevadm settle + dev=`ls $sysfsnet` + ip addr add $srcip dev $dev ip link set $dev up if [ ! -d $sysfsd ] ; then @@ -781,7 +785,6 @@ EOF fi # clean up any leftovers - ip link del $dev rmmod netdevsim if [ $ret -ne 0 ]; then diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index 2dc95fda7ef7..ea5938ec009a 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -6,12 +6,14 @@ if [ $(id -u) != 0 ]; then exit 0 fi +ret=0 echo "--------------------" echo "running psock_fanout test" echo "--------------------" ./in_netns.sh ./psock_fanout if [ $? -ne 0 ]; then echo "[FAIL]" + ret=1 else echo "[PASS]" fi @@ -22,6 +24,7 @@ echo "--------------------" ./in_netns.sh ./psock_tpacket if [ $? -ne 0 ]; then echo "[FAIL]" + ret=1 else echo "[PASS]" fi @@ -32,6 +35,8 @@ echo "--------------------" ./in_netns.sh ./txring_overwrite if [ $? -ne 0 ]; then echo "[FAIL]" + ret=1 else echo "[PASS]" fi +exit $ret diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index b093f39c298c..14e41faf2c57 100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests @@ -7,7 +7,7 @@ echo "--------------------" ./socket if [ $? -ne 0 ]; then echo "[FAIL]" + exit 1 else echo "[PASS]" fi - diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index c9ff2b47bd1c..3e6d1bcc2894 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh nft_nat.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ + conntrack_icmp_related.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/bridge_brouter.sh b/tools/testing/selftests/netfilter/bridge_brouter.sh new file mode 100755 index 000000000000..29f3955b9af7 --- /dev/null +++ b/tools/testing/selftests/netfilter/bridge_brouter.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# +# This test is for bridge 'brouting', i.e. make some packets being routed +# rather than getting bridged even though they arrive on interface that is +# part of a bridge. + +# eth0 br0 eth0 +# setup is: ns1 <-> ns0 <-> ns2 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +ebtables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ebtables" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +if [ $? -ne 0 ]; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 link set veth1 up + +ip -net ns0 link add br0 type bridge +if [ $? -ne 0 ]; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +ip -net ns0 link set veth0 master br0 +ip -net ns0 link set veth1 master br0 +ip -net ns0 link set br0 up +ip -net ns0 addr add 10.0.0.1/24 dev br0 + +# place both in same subnet, ns1 and ns2 connected via ns0:br0 +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.0.1$i/24 dev eth0 +done + +test_ebtables_broute() +{ + local cipt + + # redirect is needed so the dstmac is rewritten to the bridge itself, + # ip stack won't process OTHERHOST (foreign unicast mac) packets. + ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP + if [ $? -ne 0 ]; then + echo "SKIP: Could not add ebtables broute redirect rule" + return $ksft_skip + fi + + # ping netns1, expected to not work (ip forwarding is off) + ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "ERROR: ping works, should have failed" 1>&2 + return 1 + fi + + # enable forwarding on both interfaces. + # neither needs an ip address, but at least the bridge needs + # an ip address in same network segment as ns1 and ns2 (ns0 + # needs to be able to determine route for to-be-forwarded packet). + ip netns exec ns0 sysctl -q net.ipv4.conf.veth0.forwarding=1 + ip netns exec ns0 sysctl -q net.ipv4.conf.veth1.forwarding=1 + + sleep 1 + + ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2 + return 1 + fi + + echo "PASS: ns1/ns2 connectivity with active broute rule" + ip netns exec ns0 ebtables -t broute -F + + # ping netns1, expected to work (frames are bridged) + ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping did not work, but it should (bridged)" 1>&2 + return 1 + fi + + ip netns exec ns0 ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP + + # ping netns1, expected to not work (DROP in bridge forward) + ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2 + return 1 + fi + + # re-activate brouter + ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP + + ip netns exec ns2 ping -q -c 1 10.0.0.11 > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2 + return 1 + fi + + echo "PASS: ns1/ns2 connectivity with active broute rule and bridge forward drop" + return 0 +} + +# test basic connectivity +ip netns exec ns1 ping -c 1 -q 10.0.0.12 > /dev/null +if [ $? -ne 0 ]; then + echo "ERROR: Could not reach ns2 from ns1" 1>&2 + ret=1 +fi + +ip netns exec ns2 ping -c 1 -q 10.0.0.11 > /dev/null +if [ $? -ne 0 ]; then + echo "ERROR: Could not reach ns1 from ns2" 1>&2 + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: netns connectivity: ns1 and ns2 can reach each other" +fi + +test_ebtables_broute +ret=$? +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh new file mode 100755 index 000000000000..b48e1833bc89 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# +# check that ICMP df-needed/pkttoobig icmp are set are set as related +# state +# +# Setup is: +# +# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2 +# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280). +# ping nsclient2 from nsclient1, checking that conntrack did set RELATED +# 'fragmentation needed' icmp packet. +# +# In addition, nsrouter1 will perform IP masquerading, i.e. also +# check the icmp errors are propagated to the correct host as per +# nat of "established" icmp-echo "connection". + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup() { + for i in 1 2;do ip netns del nsclient$i;done + for i in 1 2;do ip netns del nsrouter$i;done +} + +ipv4() { + echo -n 192.168.$1.2 +} + +ipv6 () { + echo -n dead:$1::2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +check_unknown() +{ + expect="packets 0 bytes 0" + for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + check_counter $n "unknown" "$expect" + if [ $? -ne 0 ] ;then + return 1 + fi + done + + return 0 +} + +for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + ip netns add $n + ip -net $n link set lo up +done + +DEV=veth0 +ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1 +DEV=veth0 +ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2 + +DEV=veth0 +ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2 + +DEV=veth0 +for i in 1 2; do + ip -net nsclient$i link set $DEV up + ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV + ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV +done + +ip -net nsrouter1 link set eth1 up +ip -net nsrouter1 link set veth0 up + +ip -net nsrouter2 link set eth1 up +ip -net nsrouter2 link set eth2 up + +ip -net nsclient1 route add default via 192.168.1.1 +ip -net nsclient1 -6 route add default via dead:1::1 + +ip -net nsclient2 route add default via 192.168.2.1 +ip -net nsclient2 route add default via dead:2::1 + +i=3 +ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1 +ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0 +ip -net nsrouter1 addr add dead:1::1/64 dev eth1 +ip -net nsrouter1 addr add dead:3::1/64 dev veth0 +ip -net nsrouter1 route add default via 192.168.3.10 +ip -net nsrouter1 -6 route add default via dead:3::10 + +ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1 +ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2 +ip -net nsrouter2 addr add dead:2::1/64 dev eth1 +ip -net nsrouter2 addr add dead:3::10/64 dev eth2 +ip -net nsrouter2 route add default via 192.168.3.1 +ip -net nsrouter2 route add default via dead:3::1 + +sleep 2 +for i in 4 6; do + ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1 + ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1 +done + +for netns in nsrouter1 nsrouter2; do +ip netns exec $netns nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + chain forward { + type filter hook forward priority 0; policy accept; + meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept + meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept + meta l4proto { icmp, icmpv6 } ct state new,established accept + counter name "unknown" drop + } +} +EOF +done + +ip netns exec nsclient1 nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + chain input { + type filter hook input priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept + counter name "unknown" drop + } +} +EOF + +ip netns exec nsclient2 nft -f - <<EOF +table inet filter { + counter unknown { } + counter new { } + counter established { } + + chain input { + type filter hook input priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept + counter name "unknown" drop + } + chain output { + type filter hook output priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" + counter name "unknown" drop + } +} +EOF + + +# make sure NAT core rewrites adress of icmp error if nat is used according to +# conntrack nat information (icmp error will be directed at nsrouter1 address, +# but it needs to be routed to nsclient1 address). +ip netns exec nsrouter1 nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip protocol icmp oifname "veth0" counter masquerade + } +} +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip6 nexthdr icmpv6 oifname "veth0" counter masquerade + } +} +EOF + +ip netns exec nsrouter2 ip link set eth1 mtu 1280 +ip netns exec nsclient2 ip link set veth0 mtu 1280 +sleep 1 + +ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ip routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi +ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi + +check_unknown +if [ $? -ne 0 ]; then + ret=1 +fi + +expect="packets 0 bytes 0" +for netns in nsrouter1 nsrouter2 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +expect="packets 2 bytes 2076" +check_counter nsclient2 "new" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +# nsrouter2 should have generated the icmp error, so +# related counter should be 0 (its in forward). +expect="packets 0 bytes 0" +check_counter "nsrouter2" "related" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +# but nsrouter1 should have seen it, same for nsclient1. +expect="packets 1 bytes 576" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +expect="packets 2 bytes 1856" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +if [ $ret -eq 0 ];then + echo "PASS: icmp mtu error had RELATED state" +else + echo "ERROR: icmp error RELATED state test has failed" +fi + +cleanup +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 8ec76681605c..21159f5f3362 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -6,6 +6,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 ret=0 +test_inet_nat=true nft --version > /dev/null 2>&1 if [ $? -ne 0 ];then @@ -141,17 +142,24 @@ reset_counters() test_local_dnat6() { + local family=$1 local lret=0 + local IPF="" + + if [ $family = "inet" ];then + IPF="ip6" + fi + ip netns exec ns0 nft -f - <<EOF -table ip6 nat { +table $family nat { chain output { type nat hook output priority 0; policy accept; - ip6 daddr dead:1::99 dnat to dead:2::99 + ip6 daddr dead:1::99 dnat $IPF to dead:2::99 } } EOF if [ $? -ne 0 ]; then - echo "SKIP: Could not add add ip6 dnat hook" + echo "SKIP: Could not add add $family dnat hook" return $ksft_skip fi @@ -201,7 +209,7 @@ EOF fi done - test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" + test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was $family NATted to ns2" ip netns exec ns0 nft flush chain ip6 nat output return $lret @@ -209,15 +217,32 @@ EOF test_local_dnat() { + local family=$1 local lret=0 -ip netns exec ns0 nft -f - <<EOF -table ip nat { + local IPF="" + + if [ $family = "inet" ];then + IPF="ip" + fi + +ip netns exec ns0 nft -f - <<EOF 2>/dev/null +table $family nat { chain output { type nat hook output priority 0; policy accept; - ip daddr 10.0.1.99 dnat to 10.0.2.99 + ip daddr 10.0.1.99 dnat $IPF to 10.0.2.99 } } EOF + if [ $? -ne 0 ]; then + if [ $family = "inet" ];then + echo "SKIP: inet nat tests" + test_inet_nat=false + return $ksft_skip + fi + echo "SKIP: Could not add add $family dnat hook" + return $ksft_skip + fi + # ping netns1, expect rewrite to netns2 ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null if [ $? -ne 0 ]; then @@ -264,9 +289,9 @@ EOF fi done - test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + test $lret -eq 0 && echo "PASS: ping to ns1 was $family NATted to ns2" - ip netns exec ns0 nft flush chain ip nat output + ip netns exec ns0 nft flush chain $family nat output reset_counters ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null @@ -313,7 +338,7 @@ EOF fi done - test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + test $lret -eq 0 && echo "PASS: ping to ns1 OK after $family nat output chain flush" return $lret } @@ -321,6 +346,8 @@ EOF test_masquerade6() { + local family=$1 + local natflags=$1 local lret=0 ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -351,16 +378,25 @@ test_masquerade6() # add masquerading rule ip netns exec ns0 nft -f - <<EOF -table ip6 nat { +table $family nat { chain postrouting { type nat hook postrouting priority 0; policy accept; - meta oif veth0 masquerade + meta oif veth0 masquerade $natflags } } EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" +<<<<<<< HEAD + echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading" +======= + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags" +>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 lret=1 fi @@ -397,19 +433,38 @@ EOF fi done +<<<<<<< HEAD + ip netns exec ns0 nft flush chain $family nat postrouting +======= + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)" + lret=1 + fi + ip netns exec ns0 nft flush chain ip6 nat postrouting +>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 if [ $? -ne 0 ]; then - echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" +<<<<<<< HEAD + test $lret -eq 0 && echo "PASS: $family IPv6 masquerade for ns2" +======= + test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2" +>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 return $lret } test_masquerade() { +<<<<<<< HEAD + local family=$1 +======= + local natflags=$1 +>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 local lret=0 ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null @@ -417,7 +472,7 @@ test_masquerade() ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: canot ping ns1 from ns2" + echo "ERROR: cannot ping ns1 from ns2 $natflags" lret=1 fi @@ -440,16 +495,25 @@ test_masquerade() # add masquerading rule ip netns exec ns0 nft -f - <<EOF -table ip nat { +table $family nat { chain postrouting { type nat hook postrouting priority 0; policy accept; - meta oif veth0 masquerade + meta oif veth0 masquerade $natflags } } EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" +<<<<<<< HEAD + echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading" +======= + echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags" +>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 lret=1 fi @@ -485,19 +549,34 @@ EOF fi done +<<<<<<< HEAD + ip netns exec ns0 nft flush chain $family nat postrouting +======= + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)" + lret=1 + fi + ip netns exec ns0 nft flush chain ip nat postrouting +>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 if [ $? -ne 0 ]; then - echo "ERROR: Could not flush nat postrouting" 1>&2 + echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IP masquerade for ns2" +<<<<<<< HEAD + test $lret -eq 0 && echo "PASS: $family IP masquerade for ns2" +======= + test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2" +>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 return $lret } test_redirect6() { + local family=$1 local lret=0 ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -527,16 +606,21 @@ test_redirect6() # add redirect rule ip netns exec ns0 nft -f - <<EOF -table ip6 nat { +table $family nat { chain prerouting { type nat hook prerouting priority 0; policy accept; meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect } } EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family redirect hook" + return $ksft_skip + fi + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + echo "ERROR: cannot ping ns1 from ns2 via ipv6 with active $family redirect" lret=1 fi @@ -560,19 +644,20 @@ EOF fi done - ip netns exec ns0 nft delete table ip6 nat + ip netns exec ns0 nft delete table $family nat if [ $? -ne 0 ]; then - echo "ERROR: Could not delete ip6 nat table" 1>&2 + echo "ERROR: Could not delete $family nat table" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + test $lret -eq 0 && echo "PASS: $family IPv6 redirection for ns2" return $lret } test_redirect() { + local family=$1 local lret=0 ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null @@ -603,16 +688,21 @@ test_redirect() # add redirect rule ip netns exec ns0 nft -f - <<EOF -table ip nat { +table $family nat { chain prerouting { type nat hook prerouting priority 0; policy accept; meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect } } EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family redirect hook" + return $ksft_skip + fi + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + echo "ERROR: cannot ping ns1 from ns2 with active $family ip redirect" lret=1 fi @@ -637,13 +727,13 @@ EOF fi done - ip netns exec ns0 nft delete table ip nat + ip netns exec ns0 nft delete table $family nat if [ $? -ne 0 ]; then - echo "ERROR: Could not delete nat table" 1>&2 + echo "ERROR: Could not delete $family nat table" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IP redirection for ns2" + test $lret -eq 0 && echo "PASS: $family IP redirection for ns2" return $lret } @@ -746,16 +836,34 @@ if [ $ret -eq 0 ];then fi reset_counters -test_local_dnat -test_local_dnat6 +test_local_dnat ip +test_local_dnat6 ip6 +reset_counters +$test_inet_nat && test_local_dnat inet +$test_inet_nat && test_local_dnat6 inet reset_counters -test_masquerade -test_masquerade6 +<<<<<<< HEAD +test_masquerade ip +test_masquerade6 ip6 +reset_counters +$test_inet_nat && test_masquerade inet +$test_inet_nat && test_masquerade6 inet +======= +test_masquerade "" +test_masquerade6 "" reset_counters -test_redirect -test_redirect6 +test_masquerade "fully-random" +test_masquerade6 "fully-random" +>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + +reset_counters +test_redirect ip +test_redirect6 ip6 +reset_counters +$test_inet_nat && test_redirect inet +$test_inet_nat && test_redirect6 inet for i in 0 1 2; do ip netns del ns$i;done diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 7202bbac976e..853aa164a401 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -187,8 +187,8 @@ static int make_exe(const uint8_t *payload, size_t len) ph.p_offset = 0; ph.p_vaddr = VADDR; ph.p_paddr = 0; - ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload); - ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload); + ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; + ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; ph.p_align = 4096; fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700); diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c index 762cb01f2ca7..47b7473dedef 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-002.c +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -46,12 +46,9 @@ static void fail(const char *fmt, unsigned long a, unsigned long b) int main(void) { - const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); -#ifdef __arm__ - unsigned long va = 2 * PAGE_SIZE; -#else - unsigned long va = 0; -#endif + const int PAGE_SIZE = sysconf(_SC_PAGESIZE); + const unsigned long va_max = 1UL << 32; + unsigned long va; void *p; int fd; unsigned long a, b; @@ -60,10 +57,13 @@ int main(void) if (fd == -1) return 1; - p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); - if (p == MAP_FAILED) { - if (errno == EPERM) - return 4; + for (va = 0; va < va_max; va += PAGE_SIZE) { + p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); + if (p == (void *)va) + break; + } + if (va == va_max) { + fprintf(stderr, "error: mmap doesn't like you\n"); return 1; } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 5970cee6d05f..b074ea9b6fe8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -286,5 +286,30 @@ "teardown": [ "$TC action flush action bpf" ] + }, + { + "id": "b8a1", + "name": "Replace bpf action with invalid goto_chain control", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ], + "$TC action add action bpf bytecode '1,6 0 0 4294967295' pass index 90" + ], + "cmdUnderTest": "$TC action replace action bpf bytecode '1,6 0 0 4294967295' goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC action list action bpf", + "matchPattern": "action order [0-9]*: bpf.* default-action pass.*index 90", + "matchCount": "1", + "teardown": [ + "$TC action flush action bpf" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json index 13147a1f5731..cadde8f41fcd 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json @@ -287,5 +287,30 @@ "teardown": [ "$TC actions flush action connmark" ] + }, + { + "id": "c506", + "name": "Replace connmark with invalid goto chain control", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ], + "$TC actions add action connmark pass index 90" + ], + "cmdUnderTest": "$TC actions replace action connmark goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action connmark index 90", + "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json index a022792d392a..ddabb2fbb7c7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json @@ -500,5 +500,30 @@ "matchPattern": "^[ \t]+index [0-9]+ ref", "matchCount": "0", "teardown": [] + }, + { + "id": "d128", + "name": "Replace csum action with invalid goto chain control", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ], + "$TC actions add action csum iph index 90" + ], + "cmdUnderTest": "$TC actions replace action csum iph goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action csum index 90", + "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index 89189a03ce3d..814b7a8a478b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -560,5 +560,30 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "ca89", + "name": "Replace gact action with invalid goto chain control", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action pass random determ drop 2 index 90" + ], + "cmdUnderTest": "$TC actions replace action goto chain 42 random determ drop 5 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass.*random type determ drop val 2.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index 0da3545cabdb..c13a68b98fc7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -1060,5 +1060,30 @@ "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4", "matchCount": "0", "teardown": [] + }, + { + "id": "a0e2", + "name": "Replace ife encode action with invalid goto chain control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife encode allow mark pass index 90" + ], + "cmdUnderTest": "$TC actions replace action ife encode allow mark goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 90", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E .*allow mark.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index db49fd0f8445..6e5fb3d25681 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -434,5 +434,30 @@ "teardown": [ "$TC actions flush action mirred" ] + }, + { + "id": "2a9a", + "name": "Replace mirred action with invalid goto chain control", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred ingress mirror dev lo drop index 90" + ], + "cmdUnderTest": "$TC actions replace action mirred ingress mirror dev lo goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action mirred index 90", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json index 0080dc2fd41c..bc12c1ccad30 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json @@ -589,5 +589,30 @@ "teardown": [ "$TC actions flush action nat" ] + }, + { + "id": "4b12", + "name": "Replace nat action with invalid goto chain control", + "category": [ + "actions", + "nat" + ], + "setup": [ + [ + "$TC actions flush action nat", + 0, + 1, + 255 + ], + "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 drop index 90" + ], + "cmdUnderTest": "$TC actions replace action nat ingress 1.18.1.1 1.18.2.2 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action nat index 90", + "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 drop.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action nat" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json new file mode 100644 index 000000000000..0d319f1d01db --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json @@ -0,0 +1,954 @@ +[ + { + "id": "319a", + "name": "Add pedit action that mangles IP TTL", + "category": [ + "actions", + "pedit" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip ttl set 10", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*index 1 ref.*key #0 at ipv4\\+8: val 0a000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "7e67", + "name": "Replace pedit action with invalid goto chain", + "category": [ + "actions", + "pedit" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ], + "$TC actions add action pedit ex munge ip ttl set 10 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action pedit ex munge ip ttl set 10 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*index 90 ref.*key #0 at ipv4\\+8: val 0a000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "377e", + "name": "Add pedit action with RAW_OP offset u32", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 12 u32 set 0x90abcdef", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "12: val 90abcdef mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "a0ca", + "name": "Add pedit action with RAW_OP offset u32 (INVALID)", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 2 u32 set 0x12345678", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "dd8a", + "name": "Add pedit action with RAW_OP offset u16 u16", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 12 u16 set 0x1234 munge offset 14 u16 set 0x5678", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "val 12340000 mask 0000ffff.*val 00005678 mask ffff0000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "53db", + "name": "Add pedit action with RAW_OP offset u16 (INVALID)", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 15 u16 set 0x1234", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "5c7e", + "name": "Add pedit action with RAW_OP offset u8 add value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge offset 16 u8 add 0xf", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 16: add 0f000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "2893", + "name": "Add pedit action with RAW_OP offset u8 quad", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 12 u8 set 0x12 munge offset 13 u8 set 0x34 munge offset 14 u8 set 0x56 munge offset 15 u8 set 0x78", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "val 12000000 mask 00ffffff.*val 00340000 mask ff00ffff.*val 00005600 mask ffff00ff.*val 00000078 mask ffffff00", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "3a07", + "name": "Add pedit action with RAW_OP offset u8-u16-u8", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u8 set 0x12 munge offset 1 u16 set 0x3456 munge offset 3 u8 set 0x78", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "val 12000000 mask 00ffffff.*val 00345600 mask ff0000ff.*val 00000078 mask ffffff00", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "ab0f", + "name": "Add pedit action with RAW_OP offset u16-u8-u8", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u16 set 0x1234 munge offset 2 u8 set 0x56 munge offset 3 u8 set 0x78", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "val 12340000 mask 0000ffff.*val 00005600 mask ffff00ff.*val 00000078 mask ffffff00", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "9d12", + "name": "Add pedit action with RAW_OP offset u32 set u16 clear u8 invert", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u32 set 0x12345678 munge offset 1 u16 clear munge offset 2 u8 invert", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "val 12345678 mask 00000000.*val 00000000 mask ff0000ff.*val 0000ff00 mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "ebfa", + "name": "Add pedit action with RAW_OP offset overflow u32 (INVALID)", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0xffffffffffffffffffffffffffffffffffffffffff u32 set 0x1", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "f512", + "name": "Add pedit action with RAW_OP offset u16 at offmask shift set", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 12 u16 at 12 ffff 1 set 0xaaaa", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 12: val aaaa0000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "c2cb", + "name": "Add pedit action with RAW_OP offset u32 retain value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 12 u32 set 0x12345678 retain 0xff00", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 12: val 00005600 mask ffff00ff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "86d4", + "name": "Add pedit action with LAYERED_OP eth set src & dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66 munge eth dst set ff:ee:dd:cc:bb:aa", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "eth\\+4: val 00001122 mask ffff0000.*eth\\+8: val 33445566 mask 00000000.*eth\\+0: val ffeeddcc mask 00000000.*eth\\+4: val bbaa0000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "c715", + "name": "Add pedit action with LAYERED_OP eth set src (INVALID)", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth src set %e:11:m2:33:x4:-5", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "ba22", + "name": "Add pedit action with LAYERED_OP eth type set/clear sequence", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "5810", + "name": "Add pedit action with LAYERED_OP ip set src & dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip src set 18.52.86.120 munge ip dst set 18.52.86.120", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 12: val 12345678 mask 00000000.* 16: val 12345678 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "1092", + "name": "Add pedit action with LAYERED_OP ip set ihl & dsfield", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip ihl set 0xff munge ip dsfield set 0xff", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 0: val 0f000000 mask f0ffffff.* 0: val 00ff0000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "02d8", + "name": "Add pedit action with LAYERED_OP ip set ttl & protocol", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip ttl set 0x1 munge ip protocol set 0xff", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 8: val 01000000 mask 00ffffff.* 8: val 00ff0000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "3e2d", + "name": "Add pedit action with LAYERED_OP ip set ttl (INVALID)", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip ttl set 300", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "31ae", + "name": "Add pedit action with LAYERED_OP ip ttl clear/set", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip ttl clear munge ip ttl set 0x1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 8: val 00000000 mask 00ffffff.* 8: val 01000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "486f", + "name": "Add pedit action with LAYERED_OP ip set duplicate fields", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip ttl set 0x1 munge ip ttl set 0x1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 8: val 01000000 mask 00ffffff.* 8: val 01000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "e790", + "name": "Add pedit action with LAYERED_OP ip set ce, df, mf, firstfrag, nofrag fields", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip ce set 0xff munge ip df set 0xff munge ip mf set 0xff munge ip firstfrag set 0xff munge ip nofrag set 0xff", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 4: val 00008000 mask ffff7fff.* 4: val 00004000 mask ffffbfff.* 4: val 00002000 mask ffffdfff.* 4: val 00001f00 mask ffffe0ff.* 4: val 00003f00 mask ffffc0ff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "6829", + "name": "Add pedit action with LAYERED_OP beyond ip set dport & sport", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip dport set 0x1234 munge ip sport set 0x5678", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 20: val 00001234 mask ffff0000.* 20: val 56780000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "afd8", + "name": "Add pedit action with LAYERED_OP beyond ip set icmp_type & icmp_code", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip icmp_type set 0xff munge ip icmp_code set 0xff", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": " 20: val ff000000 mask 00ffffff.* 20: val ff000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "3143", + "name": "Add pedit action with LAYERED_OP beyond ip set dport (INVALID)", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip dport set 0x1234", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "fc1f", + "name": "Add pedit action with LAYERED_OP ip6 set src & dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 src set 2001:0db8:0:f101::1 munge ip6 dst set 2001:0db8:0:f101::1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "ipv6\\+8: val 20010db8 mask 00000000.*ipv6\\+12: val 0000f101 mask 00000000.*ipv6\\+16: val 00000000 mask 00000000.*ipv6\\+20: val 00000001 mask 00000000.*ipv6\\+24: val 20010db8 mask 00000000.*ipv6\\+28: val 0000f101 mask 00000000.*ipv6\\+32: val 00000000 mask 00000000.*ipv6\\+36: val 00000001 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "6d34", + "name": "Add pedit action with LAYERED_OP ip6 dst retain value (INVALID)", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 dst set 2001:0db8:0:f101::1 retain 0xff0000", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "6f5e", + "name": "Add pedit action with LAYERED_OP ip6 flow_lbl", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 flow_lbl set 0xfffff", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "ipv6\\+0: val 0007ffff mask fff80000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "6795", + "name": "Add pedit action with LAYERED_OP ip6 set payload_len, nexthdr, hoplimit", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 payload_len set 0xffff munge ip6 nexthdr set 0xff munge ip6 hoplimit set 0xff", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "ipv6\\+4: val ffff0000 mask 0000ffff.*ipv6\\+4: val 0000ff00 mask ffff00ff.*ipv6\\+4: val 000000ff mask ffffff00", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "1442", + "name": "Add pedit action with LAYERED_OP tcp set dport & sport", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge tcp dport set 4789 munge tcp sport set 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "tcp\\+0: val 000012b5 mask ffff0000.*tcp\\+0: val 00010000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "b7ac", + "name": "Add pedit action with LAYERED_OP tcp sport set (INVALID)", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge tcp sport set -200", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "cfcc", + "name": "Add pedit action with LAYERED_OP tcp flags set", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge tcp flags set 0x16", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "tcp\\+12: val 00160000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "3bc4", + "name": "Add pedit action with LAYERED_OP tcp set dport, sport & flags fields", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge tcp dport set 4789 munge tcp sport set 1 munge tcp flags set 0x1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "tcp\\+0: val 000012b5 mask ffff0000.*tcp\\+0: val 00010000 mask 0000ffff.*tcp\\+12: val 00010000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "f1c8", + "name": "Add pedit action with LAYERED_OP udp set dport & sport", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge udp dport set 4789 munge udp sport set 4789", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "udp\\+0: val 000012b5 mask ffff0000.*udp\\+0: val 12b50000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "d784", + "name": "Add pedit action with mixed RAW/LAYERED_OP #1", + "category": [ + "actions", + "pedit", + "layered_op", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66 munge ip ttl set 0xff munge tcp flags clear munge offset 15 u8 add 40 retain 0xf0 munge udp dport add 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "eth\\+4: val 00001122 mask ffff0000.*eth\\+8: val 33445566 mask 00000000.*ipv4\\+8: val ff000000 mask 00ffffff.*tcp\\+12: val 00000000 mask ff00ffff.* 12: add 00000020 mask ffffff0f.*udp\\+0: add 00000001 mask ffff0000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "70ca", + "name": "Add pedit action with mixed RAW/LAYERED_OP #2", + "category": [ + "actions", + "pedit", + "layered_op", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66 munge eth dst set ff:ee:dd:cc:bb:aa munge ip6 payload_len set 0xffff munge ip6 nexthdr set 0xff munge ip6 hoplimit preserve munge offset 0 u8 set 0x12 munge offset 1 u16 set 0x3456 munge offset 3 u8 set 0x78 munge ip ttl set 0xaa munge ip protocol set 0xff", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit | grep 'key '", + "matchPattern": "eth\\+4: val 00001122 mask ffff0000.*eth\\+8: val 33445566 mask 00000000.*eth\\+0: val ffeeddcc mask 00000000.*eth\\+4: val bbaa0000 mask 0000ffff.*ipv6\\+4: val ffff0000 mask 0000ffff.*ipv6\\+4: val 0000ff00 mask ffff00ff.*ipv6\\+4: val 00000000 mask ffffffff.* 0: val 12000000 mask 00ffffff.* 0: val 00345600 mask ff0000ff.* 0: val 00000078 mask ffffff00.*ipv4\\+8: val aa000000 mask 00ffffff.*ipv4\\+8: val 00ff0000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + } + +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 4086a50a670e..b8268da5adaa 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -739,5 +739,30 @@ "teardown": [ "$TC actions flush action police" ] + }, + { + "id": "689e", + "name": "Replace police action with invalid goto chain control", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ], + "$TC actions add action police rate 3mbit burst 250k drop index 90" + ], + "cmdUnderTest": "$TC actions replace action police rate 3mbit burst 250k goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action police index 90", + "matchPattern": "action order [0-9]*: police 0x5a rate 3Mbit burst 250Kb mtu 2Kb action drop", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json index 3aca33c00039..ddabb160a11b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json @@ -144,6 +144,30 @@ ] }, { + "id": "7571", + "name": "Add sample action with invalid rate", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 0 group 1 index 2", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 2", + "matchPattern": "action order [0-9]+: sample rate 1/0 group 1.*index 2 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { "id": "b6d4", "name": "Add sample action with mandatory arguments and invalid control action", "category": [ @@ -584,5 +608,30 @@ "teardown": [ "$TC actions flush action sample" ] + }, + { + "id": "0a6e", + "name": "Replace sample action with invalid goto chain control", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pass.*index 90", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json index e89a7aa4012d..8e8c1ae12260 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json @@ -126,5 +126,30 @@ "teardown": [ "" ] + }, + { + "id": "b776", + "name": "Replace simple action with invalid goto chain control", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"hello\" pass index 90" + ], + "cmdUnderTest": "$TC actions replace action simple sdata \"world\" goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <hello>.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action simple" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json index 5aaf593b914a..ecd96eda7f6a 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json @@ -484,5 +484,30 @@ "teardown": [ "$TC actions flush action skbedit" ] + }, + { + "id": "1b2b", + "name": "Replace skbedit action with invalid goto_chain control", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "$TC actions add action skbedit ptype host pass index 90" + ], + "cmdUnderTest": "$TC actions replace action skbedit ptype host goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit ptype host pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json index fe3326e939c1..6eb4c4f97060 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json @@ -392,5 +392,30 @@ "teardown": [ "$TC actions flush action skbmod" ] + }, + { + "id": "b651", + "name": "Replace skbmod action with invalid goto_chain control", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ], + "$TC actions add action skbmod set etype 0x1111 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action skbmod set etype 0x1111 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pass set etype 0x1111\\s+index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json index e7e15a7336b6..28453a445fdb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -884,5 +884,30 @@ "teardown": [ "$TC actions flush action tunnel_key" ] + }, + { + "id": "8242", + "name": "Replace tunnel_key set action with invalid goto chain", + "category": [ + "actions", + "tunnel_key" + ], + "setup": [ + [ + "$TC actions flush action tunnel_key", + 0, + 1, + 255 + ], + "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 10.10.10.2 dst_ip 20.20.20.1 dst_port 3129 id 2 csum goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action tunnel_key index 90", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*csum pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action tunnel_key" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json index 69ea09eefffc..cc7c7d758008 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json @@ -688,5 +688,30 @@ "teardown": [ "$TC actions flush action vlan" ] + }, + { + "id": "e394", + "name": "Replace vlan push action with invalid goto chain control", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan push id 500 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action vlan push id 500 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action vlan index 90", + "matchPattern": "action order [0-9]+: vlan.*push id 500 protocol 802.1Q priority 0 pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index 99a5ffca1088..e2f92cefb8d5 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -19,6 +19,26 @@ ] }, { + "id": "2638", + "name": "Add matchall and try to get it", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok" + ], + "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { "id": "d052", "name": "Add 1M filters with the same action", "category": [ @@ -38,5 +58,25 @@ "$TC qdisc del dev $DEV2 ingress", "/bin/rm $BATCH_FILE" ] + }, + { + "id": "4cbd", + "name": "Try to add filter with duplicate key", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress", + "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop", + "expExitCode": "2", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] } ] diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index 40ea95ce2ead..828c18584624 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -22,6 +22,7 @@ TPM2_CC_UNSEAL = 0x015E TPM2_CC_FLUSH_CONTEXT = 0x0165 TPM2_CC_START_AUTH_SESSION = 0x0176 TPM2_CC_GET_CAPABILITY = 0x017A +TPM2_CC_GET_RANDOM = 0x017B TPM2_CC_PCR_READ = 0x017E TPM2_CC_POLICY_PCR = 0x017F TPM2_CC_PCR_EXTEND = 0x0182 @@ -357,9 +358,9 @@ class Client: self.flags = flags if (self.flags & Client.FLAG_SPACE) == 0: - self.tpm = open('/dev/tpm0', 'r+b') + self.tpm = open('/dev/tpm0', 'r+b', buffering=0) else: - self.tpm = open('/dev/tpmrm0', 'r+b') + self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0) def close(self): self.tpm.close() diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py index 3bb066fea4a0..d4973be53493 100644 --- a/tools/testing/selftests/tpm2/tpm2_tests.py +++ b/tools/testing/selftests/tpm2/tpm2_tests.py @@ -158,6 +158,69 @@ class SmokeTest(unittest.TestCase): pass self.assertEqual(rejected, True) + def test_read_partial_resp(self): + try: + fmt = '>HIIH' + cmd = struct.pack(fmt, + tpm2.TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + tpm2.TPM2_CC_GET_RANDOM, + 0x20) + self.client.tpm.write(cmd) + hdr = self.client.tpm.read(10) + sz = struct.unpack('>I', hdr[2:6])[0] + rsp = self.client.tpm.read() + except: + pass + self.assertEqual(sz, 10 + 2 + 32) + self.assertEqual(len(rsp), 2 + 32) + + def test_read_partial_overwrite(self): + try: + fmt = '>HIIH' + cmd = struct.pack(fmt, + tpm2.TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + tpm2.TPM2_CC_GET_RANDOM, + 0x20) + self.client.tpm.write(cmd) + # Read part of the respone + rsp1 = self.client.tpm.read(15) + + # Send a new cmd + self.client.tpm.write(cmd) + + # Read the whole respone + rsp2 = self.client.tpm.read() + except: + pass + self.assertEqual(len(rsp1), 15) + self.assertEqual(len(rsp2), 10 + 2 + 32) + + def test_send_two_cmds(self): + rejected = False + try: + fmt = '>HIIH' + cmd = struct.pack(fmt, + tpm2.TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + tpm2.TPM2_CC_GET_RANDOM, + 0x20) + self.client.tpm.write(cmd) + + # expect the second one to raise -EBUSY error + self.client.tpm.write(cmd) + rsp = self.client.tpm.read() + + except IOError, e: + # read the response + rsp = self.client.tpm.read() + rejected = True + pass + except: + pass + self.assertEqual(rejected, True) + class SpaceTest(unittest.TestCase): def setUp(self): logging.basicConfig(filename='SpaceTest.log', level=logging.DEBUG) |