diff options
Diffstat (limited to 'tools')
563 files changed, 26597 insertions, 5463 deletions
diff --git a/tools/Makefile b/tools/Makefile index 85af6ebbce91..7e9d34ddd74c 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -31,6 +31,7 @@ help: @echo ' bootconfig - boot config tool' @echo ' spi - spi tools' @echo ' tmon - thermal monitoring and tuning tool' + @echo ' tracing - misc tracing tools' @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @echo ' usb - USB testing tools' @echo ' virtio - vhost test module' @@ -64,7 +65,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE +cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE $(call descend,$@) bpf/%: FORCE @@ -103,7 +104,7 @@ all: acpi cgroup cpupower gpio hv firewire liblockdep \ perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ - pci debugging + pci debugging tracing acpi_install: $(call descend,power/$(@:_install=),install) @@ -111,7 +112,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -137,7 +138,8 @@ install: acpi_install cgroup_install cpupower_install gpio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ - wmi_install pci_install debugging_install intel-speed-select_install + wmi_install pci_install debugging_install intel-speed-select_install \ + tracing_install acpi_clean: $(call descend,power/acpi,clean) @@ -145,7 +147,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean: +cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -184,6 +186,6 @@ clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ - intel-speed-select_clean + intel-speed-select_clean tracing_clean .PHONY: FORCE diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index c3af3f324c5a..9f18fa090f1f 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) #define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) +#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) +#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h index bdf5f10f8b9f..578b3ee86105 100644 --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h @@ -55,17 +55,33 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_MMCR3, PERF_REG_POWERPC_SIER2, PERF_REG_POWERPC_SIER3, + PERF_REG_POWERPC_PMC1, + PERF_REG_POWERPC_PMC2, + PERF_REG_POWERPC_PMC3, + PERF_REG_POWERPC_PMC4, + PERF_REG_POWERPC_PMC5, + PERF_REG_POWERPC_PMC6, /* Max regs without the extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ -#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ -#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) +/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */ +#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3) -#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) -#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) +/* + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 + * includes 9 SPRS from MMCR0 to PMC6 excluding the + * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300. + */ +#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300) + +/* + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 + * includes 12 SPRs from MMCR0 to PMC6. + */ +#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0) + +#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 7947cb1782da..b7dd944dc867 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -91,6 +91,7 @@ DISABLE_ENQCMD) #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define DISABLED_MASK19 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index 52c6262e6bfd..cc777c185212 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -7,9 +7,12 @@ * Copyright (C) IBM Corporation, 2009 */ +#include <asm/byteorder.h> /* insn_attr_t is defined in inat.h */ #include "inat.h" +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) + struct insn_field { union { insn_value_t value; @@ -20,6 +23,48 @@ struct insn_field { unsigned char nbytes; }; +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->nbytes = n; +} + +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; +} + +#else + +struct insn_field { + insn_value_t value; + union { + insn_value_t little; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->little = __cpu_to_le32(v); + p->nbytes = n; +} + +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; + p->value = __le32_to_cpu(p->little); +} +#endif + struct insn { struct insn_field prefixes; /* * Prefixes diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index fdbffec4cfde..5a2baf28a1dc 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -40,6 +40,8 @@ #define ORC_REG_MAX 15 #ifndef __ASSEMBLY__ +#include <asm/byteorder.h> + /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -51,10 +53,18 @@ struct orc_entry { s16 sp_offset; s16 bp_offset; +#if defined(__LITTLE_ENDIAN_BITFIELD) unsigned sp_reg:4; unsigned bp_reg:4; unsigned type:2; unsigned end:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + unsigned bp_reg:4; + unsigned sp_reg:4; + unsigned unused:5; + unsigned end:1; + unsigned type:2; +#endif } __packed; #endif /* __ASSEMBLY__ */ diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 3ff0d48469f2..b2d504f11937 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -101,6 +101,7 @@ #define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define REQUIRED_MASK19 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 0151dfc6da61..3d9355ed1246 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -5,6 +5,7 @@ * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ +#include <linux/kernel.h> #ifdef __KERNEL__ #include <linux/string.h> #else @@ -15,15 +16,28 @@ #include "../include/asm/emulate_prefix.h" +#define leXX_to_cpu(t, r) \ +({ \ + __typeof__(t) v; \ + switch (sizeof(t)) { \ + case 4: v = le32_to_cpu(r); break; \ + case 2: v = le16_to_cpu(r); break; \ + case 1: v = r; break; \ + default: \ + BUILD_BUG(); break; \ + } \ + v; \ +}) + /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); r; }) + ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) @@ -147,9 +161,9 @@ found: b = insn->prefixes.bytes[3]; for (i = 0; i < nb; i++) if (prefixes->bytes[i] == lb) - prefixes->bytes[i] = b; + insn_set_byte(prefixes, i, b); } - insn->prefixes.bytes[3] = lb; + insn_set_byte(&insn->prefixes, 3, lb); } /* Decode REX prefix */ @@ -157,8 +171,7 @@ found: b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); if (inat_is_rex_prefix(attr)) { - insn->rex_prefix.value = b; - insn->rex_prefix.nbytes = 1; + insn_field_set(&insn->rex_prefix, b, 1); insn->next_byte++; if (X86_REX_W(b)) /* REX.W overrides opnd_size */ @@ -181,13 +194,13 @@ found: if (X86_MODRM_MOD(b2) != 3) goto vex_end; } - insn->vex_prefix.bytes[0] = b; - insn->vex_prefix.bytes[1] = b2; + insn_set_byte(&insn->vex_prefix, 0, b); + insn_set_byte(&insn->vex_prefix, 1, b2); if (inat_is_evex_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); b2 = peek_nbyte_next(insn_byte_t, insn, 3); - insn->vex_prefix.bytes[3] = b2; + insn_set_byte(&insn->vex_prefix, 3, b2); insn->vex_prefix.nbytes = 4; insn->next_byte += 4; if (insn->x86_64 && X86_VEX_W(b2)) @@ -195,7 +208,7 @@ found: insn->opnd_bytes = 8; } else if (inat_is_vex3_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); insn->vex_prefix.nbytes = 3; insn->next_byte += 3; if (insn->x86_64 && X86_VEX_W(b2)) @@ -207,7 +220,7 @@ found: * Makes it easier to decode vex.W, vex.vvvv, * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. */ - insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f); insn->vex_prefix.nbytes = 2; insn->next_byte += 2; } @@ -243,7 +256,7 @@ void insn_get_opcode(struct insn *insn) /* Get first opcode */ op = get_next(insn_byte_t, insn); - opcode->bytes[0] = op; + insn_set_byte(opcode, 0, op); opcode->nbytes = 1; /* Check if there is VEX prefix or not */ @@ -295,8 +308,7 @@ void insn_get_modrm(struct insn *insn) if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); - modrm->value = mod; - modrm->nbytes = 1; + insn_field_set(modrm, mod, 1); if (inat_is_group(insn->attr)) { pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, @@ -334,7 +346,7 @@ int insn_rip_relative(struct insn *insn) * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. */ - return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); + return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5); } /** @@ -353,11 +365,11 @@ void insn_get_sib(struct insn *insn) if (!insn->modrm.got) insn_get_modrm(insn); if (insn->modrm.nbytes) { - modrm = (insn_byte_t)insn->modrm.value; + modrm = insn->modrm.bytes[0]; if (insn->addr_bytes != 2 && X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { - insn->sib.value = get_next(insn_byte_t, insn); - insn->sib.nbytes = 1; + insn_field_set(&insn->sib, + get_next(insn_byte_t, insn), 1); } } insn->sib.got = 1; @@ -407,19 +419,18 @@ void insn_get_displacement(struct insn *insn) if (mod == 3) goto out; if (mod == 1) { - insn->displacement.value = get_next(signed char, insn); - insn->displacement.nbytes = 1; + insn_field_set(&insn->displacement, + get_next(signed char, insn), 1); } else if (insn->addr_bytes == 2) { if ((mod == 0 && rm == 6) || mod == 2) { - insn->displacement.value = - get_next(short, insn); - insn->displacement.nbytes = 2; + insn_field_set(&insn->displacement, + get_next(short, insn), 2); } } else { if ((mod == 0 && rm == 5) || mod == 2 || (mod == 0 && base == 5)) { - insn->displacement.value = get_next(int, insn); - insn->displacement.nbytes = 4; + insn_field_set(&insn->displacement, + get_next(int, insn), 4); } } } @@ -435,18 +446,14 @@ static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: - insn->moffset1.value = get_next(short, insn); - insn->moffset1.nbytes = 2; + insn_field_set(&insn->moffset1, get_next(short, insn), 2); break; case 4: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); break; case 8: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; - insn->moffset2.value = get_next(int, insn); - insn->moffset2.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); + insn_field_set(&insn->moffset2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -464,13 +471,11 @@ static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case 4: case 8: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -487,18 +492,15 @@ static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); + insn_field_set(&insn->immediate1, get_next(int, insn), 4); insn->immediate1.nbytes = 4; break; case 8: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -515,12 +517,10 @@ static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); break; case 8: /* ptr16:64 is not exist (no segment) */ @@ -528,8 +528,7 @@ static int __get_immptr(struct insn *insn) default: /* opnd_bytes must be modified manually */ goto err_out; } - insn->immediate2.value = get_next(unsigned short, insn); - insn->immediate2.nbytes = 2; + insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2); insn->immediate1.got = insn->immediate2.got = 1; return 1; @@ -565,22 +564,17 @@ void insn_get_immediate(struct insn *insn) switch (inat_immediate_size(insn->attr)) { case INAT_IMM_BYTE: - insn->immediate.value = get_next(signed char, insn); - insn->immediate.nbytes = 1; + insn_field_set(&insn->immediate, get_next(signed char, insn), 1); break; case INAT_IMM_WORD: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case INAT_IMM_DWORD: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; case INAT_IMM_QWORD: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; case INAT_IMM_PTR: if (!__get_immptr(insn)) @@ -599,8 +593,7 @@ void insn_get_immediate(struct insn *insn) goto err_out; } if (inat_has_second_immediate(insn->attr)) { - insn->immediate2.value = get_next(signed char, insn); - insn->immediate2.nbytes = 1; + insn_field_set(&insn->immediate2, get_next(signed char, insn), 1); } done: insn->immediate.got = 1; diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh index 595e164dc352..feb30c2c7881 100755 --- a/tools/bootconfig/scripts/bconf2ftrace.sh +++ b/tools/bootconfig/scripts/bconf2ftrace.sh @@ -152,6 +152,7 @@ setup_instance() { # [instance] set_array_of ${instance}.options ${instancedir}/trace_options set_value_of ${instance}.trace_clock ${instancedir}/trace_clock set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask + set_value_of ${instance}.tracing_on ${instancedir}/tracing_on set_value_of ${instance}.tracer ${instancedir}/current_tracer set_array_of ${instance}.ftrace.filters \ ${instancedir}/set_ftrace_filter diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh index 6c0d4b61e0c2..a0c3bcc6da4f 100755 --- a/tools/bootconfig/scripts/ftrace2bconf.sh +++ b/tools/bootconfig/scripts/ftrace2bconf.sh @@ -221,6 +221,10 @@ instance_options() { # [instance-name] if [ `echo $val | sed -e s/f//g`x != x ]; then emit_kv $PREFIX.cpumask = $val fi + val=`cat $INSTANCE/tracing_on` + if [ `echo $val | sed -e s/f//g`x != x ]; then + emit_kv $PREFIX.tracing_on = $val + fi val= for i in `cat $INSTANCE/set_event`; do diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c index a0ebcdf59c31..a07dfc479270 100644 --- a/tools/bpf/bpf_dbg.c +++ b/tools/bpf/bpf_dbg.c @@ -890,7 +890,7 @@ static int bpf_run_stepping(struct sock_filter *f, uint16_t bpf_len, bool stop = false; int i = 1; - while (bpf_curr.Rs == false && stop == false) { + while (!bpf_curr.Rs && !stop) { bpf_safe_regs(); if (i++ == next) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f897cb5fb12d..b3073ae84018 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -75,8 +75,6 @@ endif INSTALL ?= install RM ?= rm -f -CLANG ?= clang -LLVM_STRIP ?= llvm-strip FEATURE_USER = .bpftool FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \ @@ -148,6 +146,8 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +bootstrap: $(BPFTOOL_BOOTSTRAP) + ifneq ($(VMLINUX_BTF)$(VMLINUX_H),) ifeq ($(feature-clang-bpf-co-re),1) @@ -166,7 +166,7 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) -I$(srctree)/tools/include/uapi/ \ -I$(LIBBPF_PATH) \ -I$(srctree)/tools/lib \ - -g -O2 -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ + -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 3fae61ef6339..ff3aa0cf3997 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -11,7 +11,6 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> #include <net/if.h> -#include <linux/if.h> #include <linux/rtnetlink.h> #include <linux/socket.h> #include <linux/tc_act/tc_bpf.h> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 1fe3ba255bad..f2b915b20546 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -368,6 +368,8 @@ static void print_prog_header_json(struct bpf_prog_info *info) jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns); jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt); } + if (info->recursion_misses) + jsonw_uint_field(json_wtr, "recursion_misses", info->recursion_misses); } static void print_prog_json(struct bpf_prog_info *info, int fd) @@ -446,6 +448,8 @@ static void print_prog_header_plain(struct bpf_prog_info *info) if (info->run_time_ns) printf(" run_time_ns %lld run_cnt %lld", info->run_time_ns, info->run_cnt); + if (info->recursion_misses) + printf(" recursion_misses %lld", info->recursion_misses); printf("\n"); } diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore index a026df7dc280..16913fffc985 100644 --- a/tools/bpf/resolve_btfids/.gitignore +++ b/tools/bpf/resolve_btfids/.gitignore @@ -1,4 +1,3 @@ -/FEATURE-DUMP.libbpf -/bpf_helper_defs.h /fixdep /resolve_btfids +/libbpf/ diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index bf656432ad73..bb9fa8de7e62 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -2,11 +2,7 @@ include ../../scripts/Makefile.include include ../../scripts/Makefile.arch -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -endif +srctree := $(abspath $(CURDIR)/../../../) ifeq ($(V),1) Q = @@ -22,28 +18,29 @@ AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) ARCH = $(HOSTARCH) +RM ?= rm OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ LIBBPF_SRC := $(srctree)/tools/lib/bpf/ SUBCMD_SRC := $(srctree)/tools/lib/subcmd/ -BPFOBJ := $(OUTPUT)/libbpf.a -SUBCMDOBJ := $(OUTPUT)/libsubcmd.a +BPFOBJ := $(OUTPUT)/libbpf/libbpf.a +SUBCMDOBJ := $(OUTPUT)/libsubcmd/libsubcmd.a BINARY := $(OUTPUT)/resolve_btfids BINARY_IN := $(BINARY)-in.o all: $(BINARY) -$(OUTPUT): +$(OUTPUT) $(OUTPUT)/libbpf $(OUTPUT)/libsubcmd: $(call msg,MKDIR,,$@) - $(Q)mkdir -p $(OUTPUT) + $(Q)mkdir -p $(@) -$(SUBCMDOBJ): fixdep FORCE - $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT) +$(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd + $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) CFLAGS := -g \ @@ -57,24 +54,27 @@ LIBS = -lelf -lz export srctree OUTPUT CFLAGS Q include $(srctree)/tools/build/Makefile.include -$(BINARY_IN): fixdep FORCE +$(BINARY_IN): fixdep FORCE | $(OUTPUT) $(Q)$(MAKE) $(build)=resolve_btfids $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) $(call msg,LINK,$@) $(Q)$(CC) $(BINARY_IN) $(LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS) -libsubcmd-clean: - $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT) clean - -libbpf-clean: - $(Q)$(MAKE) -C $(LIBBPF_SRC) OUTPUT=$(OUTPUT) clean +clean_objects := $(wildcard $(OUTPUT)/*.o \ + $(OUTPUT)/.*.o.cmd \ + $(OUTPUT)/.*.o.d \ + $(OUTPUT)/libbpf \ + $(OUTPUT)/libsubcmd \ + $(OUTPUT)/resolve_btfids) -clean: libsubcmd-clean libbpf-clean fixdep-clean +ifneq ($(clean_objects),) +clean: fixdep-clean $(call msg,CLEAN,$(BINARY)) - $(Q)$(RM) -f $(BINARY); \ - $(RM) -rf $(if $(OUTPUT),$(OUTPUT),.)/feature; \ - find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + $(Q)$(RM) -rf $(clean_objects) +else +clean: +endif tags: $(call msg,GEN,,tags) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index e3ea569ee125..7409d7860aa6 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -139,6 +139,8 @@ int eprintf(int level, int var, const char *fmt, ...) #define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) \ + eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) static bool is_btf_id(const char *name) { @@ -472,7 +474,7 @@ static int symbols_resolve(struct object *obj) int nr_funcs = obj->nr_funcs; int err, type_id; struct btf *btf; - __u32 nr; + __u32 nr_types; btf = btf__parse(obj->btf ?: obj->path, NULL); err = libbpf_get_error(btf); @@ -483,12 +485,12 @@ static int symbols_resolve(struct object *obj) } err = -1; - nr = btf__get_nr_types(btf); + nr_types = btf__get_nr_types(btf); /* * Iterate all the BTF types and search for collected symbol IDs. */ - for (type_id = 1; type_id <= nr; type_id++) { + for (type_id = 1; type_id <= nr_types; type_id++) { const struct btf_type *type; struct rb_root *root; struct btf_id *id; @@ -526,8 +528,13 @@ static int symbols_resolve(struct object *obj) id = btf_id__find(root, str); if (id) { - id->id = type_id; - (*nr)--; + if (id->id) { + pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n", + str, id->id, type_id, id->id); + } else { + id->id = type_id; + (*nr)--; + } } } diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index 4d5ca54fcd4c..9d9fb6209be1 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -3,9 +3,6 @@ include ../../scripts/Makefile.include OUTPUT ?= $(abspath .output)/ -CLANG ?= clang -LLC ?= llc -LLVM_STRIP ?= llvm-strip BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 97cbfb31b762..74e255d58d8d 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -99,7 +99,9 @@ FEATURE_TESTS_EXTRA := \ clang \ libbpf \ libpfm4 \ - libdebuginfod + libdebuginfod \ + clang-bpf-co-re + FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 89ba522e377d..3e55edb3ea54 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.include + FILES= \ test-all.bin \ test-backtrace.bin \ @@ -76,8 +78,6 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config -LLVM_CONFIG ?= llvm-config -CLANG ?= clang all: $(FILES) diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index 1547bc2c0950..52c790b0317b 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((0 << 16) | (14 << 8) | (0)) +#define OCSD_MIN_VER ((1 << 16) | (0 << 8) | (0)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 0.14.0 is required" +#error "OpenCSD >= 1.0.0 is required" #endif int main(void) diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index cacd66ad7926..a2b233fdb572 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -107,8 +107,8 @@ int monitor_device(const char *device_name, ret = -EIO; break; } - fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ", - event.timestamp_ns, event.offset, event.line_seqno, + fprintf(stdout, "GPIO EVENT at %" PRIu64 " on line %d (%d|%d) ", + (uint64_t)event.timestamp_ns, event.offset, event.line_seqno, event.seqno); switch (event.id) { case GPIO_V2_LINE_EVENT_RISING_EDGE: diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c index 37187e056c8b..1639b4d832cd 100644 --- a/tools/gpio/gpio-utils.c +++ b/tools/gpio/gpio-utils.c @@ -32,74 +32,6 @@ * following api will request gpio lines, do the operation and then * release these lines. */ -/** - * gpiotools_request_linehandle() - request gpio lines in a gpiochip - * @device_name: The name of gpiochip without prefix "/dev/", - * such as "gpiochip0" - * @lines: An array desired lines, specified by offset - * index for the associated GPIO device. - * @num_lines: The number of lines to request. - * @flag: The new flag for requsted gpio. Reference - * "linux/gpio.h" for the meaning of flag. - * @data: Default value will be set to gpio when flag is - * GPIOHANDLE_REQUEST_OUTPUT. - * @consumer_label: The name of consumer, such as "sysfs", - * "powerkey". This is useful for other users to - * know who is using. - * - * Request gpio lines through the ioctl provided by chardev. User - * could call gpiotools_set_values() and gpiotools_get_values() to - * read and write respectively through the returned fd. Call - * gpiotools_release_linehandle() to release these lines after that. - * - * Return: On success return the fd; - * On failure return the errno. - */ -int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, - unsigned int num_lines, unsigned int flag, - struct gpiohandle_data *data, - const char *consumer_label) -{ - struct gpiohandle_request req; - char *chrdev_name; - int fd; - int i; - int ret; - - ret = asprintf(&chrdev_name, "/dev/%s", device_name); - if (ret < 0) - return -ENOMEM; - - fd = open(chrdev_name, 0); - if (fd == -1) { - ret = -errno; - fprintf(stderr, "Failed to open %s, %s\n", - chrdev_name, strerror(errno)); - goto exit_free_name; - } - - for (i = 0; i < num_lines; i++) - req.lineoffsets[i] = lines[i]; - - req.flags = flag; - strcpy(req.consumer_label, consumer_label); - req.lines = num_lines; - if (flag & GPIOHANDLE_REQUEST_OUTPUT) - memcpy(req.default_values, data, sizeof(req.default_values)); - - ret = ioctl(fd, GPIO_GET_LINEHANDLE_IOCTL, &req); - if (ret == -1) { - ret = -errno; - fprintf(stderr, "Failed to issue %s (%d), %s\n", - "GPIO_GET_LINEHANDLE_IOCTL", ret, strerror(errno)); - } - - if (close(fd) == -1) - perror("Failed to close GPIO character device file"); -exit_free_name: - free(chrdev_name); - return ret < 0 ? ret : req.fd; -} /** * gpiotools_request_line() - request gpio lines in a gpiochip @@ -216,27 +148,6 @@ int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values) } /** - * gpiotools_release_linehandle(): Release the line(s) of gpiochip - * @fd: The fd returned by - * gpiotools_request_linehandle(). - * - * Return: On success return 0; - * On failure return the errno. - */ -int gpiotools_release_linehandle(const int fd) -{ - int ret; - - ret = close(fd); - if (ret == -1) { - perror("Failed to close GPIO LINEHANDLE device file"); - ret = -errno; - } - - return ret; -} - -/** * gpiotools_release_line(): Release the line(s) of gpiochip * @fd: The fd returned by * gpiotools_request_line(). diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h index 6c69a9f1c253..8af7c8ee19ce 100644 --- a/tools/gpio/gpio-utils.h +++ b/tools/gpio/gpio-utils.h @@ -24,12 +24,6 @@ static inline int check_prefix(const char *str, const char *prefix) strncmp(str, prefix, strlen(prefix)) == 0; } -int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, - unsigned int num_lines, unsigned int flag, - struct gpiohandle_data *data, - const char *consumer_label); -int gpiotools_release_linehandle(const int fd); - int gpiotools_request_line(const char *device_name, unsigned int *lines, unsigned int num_lines, diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c index f229ec62301b..41e76d244192 100644 --- a/tools/gpio/gpio-watch.c +++ b/tools/gpio/gpio-watch.c @@ -10,6 +10,7 @@ #include <ctype.h> #include <errno.h> #include <fcntl.h> +#include <inttypes.h> #include <linux/gpio.h> #include <poll.h> #include <stdbool.h> @@ -86,8 +87,8 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - printf("line %u: %s at %llu\n", - chg.info.offset, event, chg.timestamp_ns); + printf("line %u: %s at %" PRIu64 "\n", + chg.info.offset, event, (uint64_t)chg.timestamp_ns); } } diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index ce365d212768..cc7070c7439b 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -79,9 +79,4 @@ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert -#ifdef __GENKSYMS__ -/* genksyms gets confused by _Static_assert */ -#define _Static_assert(expr, ...) -#endif - #endif /* _LINUX_BUILD_BUG_H */ diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h index d07e586b9ba0..acb6f4daa2f0 100644 --- a/tools/include/linux/export.h +++ b/tools/include/linux/export.h @@ -3,8 +3,5 @@ #define EXPORT_SYMBOL(sym) #define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) #endif diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index ca28b6ab8db7..736bdeccdfe4 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -169,15 +169,31 @@ .off = OFF, \ .imm = 0 }) -/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ +/* + * Atomic operations: + * + * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + * BPF_AND *(uint *) (dst_reg + off16) &= src_reg + * BPF_OR *(uint *) (dst_reg + off16) |= src_reg + * BPF_XOR *(uint *) (dst_reg + off16) ^= src_reg + * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); + * BPF_AND | BPF_FETCH src_reg = atomic_fetch_and(dst_reg + off16, src_reg); + * BPF_OR | BPF_FETCH src_reg = atomic_fetch_or(dst_reg + off16, src_reg); + * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); + * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) + * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) + */ -#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ +#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ - .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ - .imm = 0 }) + .imm = OP }) + +/* Legacy alias */ +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF) /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 577f51436cf9..7e72d975cb76 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -29,11 +29,14 @@ struct unwind_hint { * * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that * sp_reg+sp_offset points to the iret return frame. + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 -#define UNWIND_HINT_TYPE_RET_OFFSET 3 +#define UNWIND_HINT_TYPE_FUNC 3 #ifdef CONFIG_STACK_VALIDATION @@ -109,6 +112,12 @@ struct unwind_hint { .popsection .endm +.macro STACK_FRAME_NON_STANDARD func:req + .pushsection .discard.func_stack_frame_non_standard, "aw" + .long \func - . + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -122,6 +131,8 @@ struct unwind_hint { #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 .endm +.macro STACK_FRAME_NON_STANDARD func:req +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h index 30dd21f976c3..2680f2edb837 100644 --- a/tools/include/linux/rbtree.h +++ b/tools/include/linux/rbtree.h @@ -152,4 +152,194 @@ static inline void rb_replace_node_cached(struct rb_node *victim, rb_replace_node(victim, new, &root->rb_root); } -#endif /* __TOOLS_LINUX_PERF_RBTREE_H */ +/* + * The below helper functions use 2 operators with 3 different + * calling conventions. The operators are related like: + * + * comp(a->key,b) < 0 := less(a,b) + * comp(a->key,b) > 0 := less(b,a) + * comp(a->key,b) == 0 := !less(a,b) && !less(b,a) + * + * If these operators define a partial order on the elements we make no + * guarantee on which of the elements matching the key is found. See + * rb_find(). + * + * The reason for this is to allow the find() interface without requiring an + * on-stack dummy object, which might not be feasible due to object size. + */ + +/** + * rb_add_cached() - insert @node into the leftmost cached tree @tree + * @node: node to insert + * @tree: leftmost cached tree to insert @node into + * @less: operator defining the (partial) node order + */ +static __always_inline void +rb_add_cached(struct rb_node *node, struct rb_root_cached *tree, + bool (*less)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *parent = NULL; + bool leftmost = true; + + while (*link) { + parent = *link; + if (less(node, parent)) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; + leftmost = false; + } + } + + rb_link_node(node, parent, link); + rb_insert_color_cached(node, tree, leftmost); +} + +/** + * rb_add() - insert @node into @tree + * @node: node to insert + * @tree: tree to insert @node into + * @less: operator defining the (partial) node order + */ +static __always_inline void +rb_add(struct rb_node *node, struct rb_root *tree, + bool (*less)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_node; + struct rb_node *parent = NULL; + + while (*link) { + parent = *link; + if (less(node, parent)) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + + rb_link_node(node, parent, link); + rb_insert_color(node, tree); +} + +/** + * rb_find_add() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Returns the rb_node matching @node, or NULL when no match is found and @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add(struct rb_node *node, struct rb_root *tree, + int (*cmp)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_node; + struct rb_node *parent = NULL; + int c; + + while (*link) { + parent = *link; + c = cmp(node, parent); + + if (c < 0) + link = &parent->rb_left; + else if (c > 0) + link = &parent->rb_right; + else + return parent; + } + + rb_link_node(node, parent, link); + rb_insert_color(node, tree); + return NULL; +} + +/** + * rb_find() - find @key in tree @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining the node order + * + * Returns the rb_node matching @key or NULL. + */ +static __always_inline struct rb_node * +rb_find(const void *key, const struct rb_root *tree, + int (*cmp)(const void *key, const struct rb_node *)) +{ + struct rb_node *node = tree->rb_node; + + while (node) { + int c = cmp(key, node); + + if (c < 0) + node = node->rb_left; + else if (c > 0) + node = node->rb_right; + else + return node; + } + + return NULL; +} + +/** + * rb_find_first() - find the first @key in @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining node order + * + * Returns the leftmost node matching @key, or NULL. + */ +static __always_inline struct rb_node * +rb_find_first(const void *key, const struct rb_root *tree, + int (*cmp)(const void *key, const struct rb_node *)) +{ + struct rb_node *node = tree->rb_node; + struct rb_node *match = NULL; + + while (node) { + int c = cmp(key, node); + + if (c <= 0) { + if (!c) + match = node; + node = node->rb_left; + } else if (c > 0) { + node = node->rb_right; + } + } + + return match; +} + +/** + * rb_next_match() - find the next @key in @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining node order + * + * Returns the next node matching @key, or NULL. + */ +static __always_inline struct rb_node * +rb_next_match(const void *key, struct rb_node *node, + int (*cmp)(const void *key, const struct rb_node *)) +{ + node = rb_next(node); + if (node && cmp(key, node)) + node = NULL; + return node; +} + +/** + * rb_for_each() - iterates a subtree matching @key + * @node: iterator + * @key: key to match + * @tree: tree to search + * @cmp: operator defining node order + */ +#define rb_for_each(node, key, tree, cmp) \ + for ((node) = rb_find_first((key), (tree), (cmp)); \ + (node); (node) = rb_next_match((key), (node), (cmp))) + +#endif /* __TOOLS_LINUX_PERF_RBTREE_H */ diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index 89135bb35bf7..ae5662d368b9 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -4,11 +4,13 @@ #include <linux/types.h> #include <linux/stringify.h> +#include <linux/compiler.h> #define STATIC_CALL_KEY_PREFIX __SCK__ #define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) #define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) #define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) +#define STATIC_CALL_KEY_STR(name) __stringify(STATIC_CALL_KEY(name)) #define STATIC_CALL_TRAMP_PREFIX __SCT__ #define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) @@ -32,4 +34,52 @@ struct static_call_site { s32 key; }; +#define DECLARE_STATIC_CALL(name, func) \ + extern struct static_call_key STATIC_CALL_KEY(name); \ + extern typeof(func) STATIC_CALL_TRAMP(name); + +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __raw_static_call(name) (&STATIC_CALL_TRAMP(name)) + +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + +/* + * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from + * the symbol table so that objtool can reference it when it generates the + * .static_call_sites section. + */ +#define __STATIC_CALL_ADDRESSABLE(name) \ + __ADDRESSABLE(STATIC_CALL_KEY(name)) + +#define __static_call(name) \ +({ \ + __STATIC_CALL_ADDRESSABLE(name); \ + __raw_static_call(name); \ +}) + +#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */ + +#define __STATIC_CALL_ADDRESSABLE(name) +#define __static_call(name) __raw_static_call(name) + +#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */ + +#ifdef MODULE +#define __STATIC_CALL_MOD_ADDRESSABLE(name) +#define static_call_mod(name) __raw_static_call(name) +#else +#define __STATIC_CALL_MOD_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name) +#define static_call_mod(name) __static_call(name) +#endif + +#define static_call(name) __static_call(name) + +#else + +#define static_call(name) \ + ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) + +#endif /* CONFIG_HAVE_STATIC_CALL */ + #endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index 154eb4e3ca7c..e9c5a215837d 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -6,7 +6,10 @@ #include <stddef.h> #include <stdint.h> +#ifndef __SANE_USERSPACE_TYPES__ #define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ +#endif + #include <asm/types.h> #include <asm/posix_types.h> diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index e61d36cd4e50..8b7a9830dd22 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -71,7 +71,7 @@ * * A simple static executable may be built this way : * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ - * -static -include nolibc.h -lgcc -o hello hello.c + * -static -include nolibc.h -o hello hello.c -lgcc * * A very useful calling convention table may be found here : * http://man7.org/linux/man-pages/man2/syscall.2.html @@ -81,19 +81,12 @@ * */ -/* Some archs (at least aarch64) don't expose the regular syscalls anymore by - * default, either because they have an "_at" replacement, or because there are - * more modern alternatives. For now we'd rather still use them. - */ -#define __ARCH_WANT_SYSCALL_NO_AT -#define __ARCH_WANT_SYSCALL_NO_FLAGS -#define __ARCH_WANT_SYSCALL_DEPRECATED - #include <asm/unistd.h> #include <asm/ioctls.h> #include <asm/errno.h> #include <linux/fs.h> #include <linux/loop.h> +#include <linux/time.h> #define NOLIBC @@ -152,24 +145,6 @@ struct pollfd { short int revents; }; -/* for select() */ -struct timeval { - long tv_sec; - long tv_usec; -}; - -/* for pselect() */ -struct timespec { - long tv_sec; - long tv_nsec; -}; - -/* for gettimeofday() */ -struct timezone { - int tz_minuteswest; - int tz_dsttime; -}; - /* for getdents64() */ struct linux_dirent64 { uint64_t d_ino; @@ -271,6 +246,8 @@ struct stat { #define WEXITSTATUS(status) (((status) & 0xff00) >> 8) #define WIFEXITED(status) (((status) & 0x7f) == 0) +/* for SIGCHLD */ +#include <asm/signal.h> /* Below comes the architecture-specific code. For each architecture, we have * the syscall declarations and the _start code definition. This is the only @@ -1469,8 +1446,10 @@ int sys_chmod(const char *path, mode_t mode) { #ifdef __NR_fchmodat return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); -#else +#elif defined(__NR_chmod) return my_syscall2(__NR_chmod, path, mode); +#else +#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod() #endif } @@ -1479,8 +1458,10 @@ int sys_chown(const char *path, uid_t owner, gid_t group) { #ifdef __NR_fchownat return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); -#else +#elif defined(__NR_chown) return my_syscall3(__NR_chown, path, owner, group); +#else +#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown() #endif } @@ -1502,10 +1483,24 @@ int sys_dup(int fd) return my_syscall1(__NR_dup, fd); } +#ifdef __NR_dup3 +static __attribute__((unused)) +int sys_dup3(int old, int new, int flags) +{ + return my_syscall3(__NR_dup3, old, new, flags); +} +#endif + static __attribute__((unused)) int sys_dup2(int old, int new) { +#ifdef __NR_dup3 + return my_syscall3(__NR_dup3, old, new, 0); +#elif defined(__NR_dup2) return my_syscall2(__NR_dup2, old, new); +#else +#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2() +#endif } static __attribute__((unused)) @@ -1517,7 +1512,17 @@ int sys_execve(const char *filename, char *const argv[], char *const envp[]) static __attribute__((unused)) pid_t sys_fork(void) { +#ifdef __NR_clone + /* note: some archs only have clone() and not fork(). Different archs + * have a different API, but most archs have the flags on first arg and + * will not use the rest with no other flag. + */ + return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); +#elif defined(__NR_fork) return my_syscall0(__NR_fork); +#else +#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork() +#endif } static __attribute__((unused)) @@ -1533,9 +1538,15 @@ int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) } static __attribute__((unused)) +pid_t sys_getpgid(pid_t pid) +{ + return my_syscall1(__NR_getpgid, pid); +} + +static __attribute__((unused)) pid_t sys_getpgrp(void) { - return my_syscall0(__NR_getpgrp); + return sys_getpgid(0); } static __attribute__((unused)) @@ -1567,8 +1578,10 @@ int sys_link(const char *old, const char *new) { #ifdef __NR_linkat return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); -#else +#elif defined(__NR_link) return my_syscall2(__NR_link, old, new); +#else +#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link() #endif } @@ -1583,8 +1596,10 @@ int sys_mkdir(const char *path, mode_t mode) { #ifdef __NR_mkdirat return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); -#else +#elif defined(__NR_mkdir) return my_syscall2(__NR_mkdir, path, mode); +#else +#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir() #endif } @@ -1593,8 +1608,10 @@ long sys_mknod(const char *path, mode_t mode, dev_t dev) { #ifdef __NR_mknodat return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); -#else +#elif defined(__NR_mknod) return my_syscall3(__NR_mknod, path, mode, dev); +#else +#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod() #endif } @@ -1610,8 +1627,10 @@ int sys_open(const char *path, int flags, mode_t mode) { #ifdef __NR_openat return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); -#else +#elif defined(__NR_open) return my_syscall3(__NR_open, path, flags, mode); +#else +#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open() #endif } @@ -1624,7 +1643,19 @@ int sys_pivot_root(const char *new, const char *old) static __attribute__((unused)) int sys_poll(struct pollfd *fds, int nfds, int timeout) { +#if defined(__NR_ppoll) + struct timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL); +#elif defined(__NR_poll) return my_syscall3(__NR_poll, fds, nfds, timeout); +#else +#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll() +#endif } static __attribute__((unused)) @@ -1663,11 +1694,13 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva t.tv_nsec = timeout->tv_usec * 1000; } return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#else +#elif defined(__NR__newselect) || defined(__NR_select) #ifndef __NR__newselect #define __NR__newselect __NR_select #endif return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); +#else +#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select() #endif } @@ -1692,8 +1725,10 @@ int sys_stat(const char *path, struct stat *buf) #ifdef __NR_newfstatat /* only solution for arm64 */ ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0); -#else +#elif defined(__NR_stat) ret = my_syscall2(__NR_stat, path, &stat); +#else +#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat() #endif buf->st_dev = stat.st_dev; buf->st_ino = stat.st_ino; @@ -1717,8 +1752,10 @@ int sys_symlink(const char *old, const char *new) { #ifdef __NR_symlinkat return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); -#else +#elif defined(__NR_symlink) return my_syscall2(__NR_symlink, old, new); +#else +#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink() #endif } @@ -1739,8 +1776,10 @@ int sys_unlink(const char *path) { #ifdef __NR_unlinkat return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); -#else +#elif defined(__NR_unlink) return my_syscall1(__NR_unlink, path); +#else +#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink() #endif } @@ -1853,6 +1892,18 @@ int close(int fd) } static __attribute__((unused)) +int dup(int fd) +{ + int ret = sys_dup(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) int dup2(int old, int new) { int ret = sys_dup2(old, new); @@ -1864,6 +1915,20 @@ int dup2(int old, int new) return ret; } +#ifdef __NR_dup3 +static __attribute__((unused)) +int dup3(int old, int new, int flags) +{ + int ret = sys_dup3(old, new, flags); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} +#endif + static __attribute__((unused)) int execve(const char *filename, char *const argv[], char *const envp[]) { @@ -1913,6 +1978,18 @@ int getdents64(int fd, struct linux_dirent64 *dirp, int count) } static __attribute__((unused)) +pid_t getpgid(pid_t pid) +{ + pid_t ret = sys_getpgid(pid); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) pid_t getpgrp(void) { pid_t ret = sys_getpgrp(); diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 728752917785..ce58cff99b66 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -861,9 +861,11 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2) __SYSCALL(__NR_process_madvise, sys_process_madvise) #define __NR_epoll_pwait2 441 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) +#define __NR_mount_setattr 442 +__SYSCALL(__NR_mount_setattr, sys_mount_setattr) #undef __NR_syscalls -#define __NR_syscalls 442 +#define __NR_syscalls 443 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 77d7c1bb2923..4c24daa43bac 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -19,7 +19,8 @@ /* ld/ldx fields */ #define BPF_DW 0x18 /* double word (64-bit) */ -#define BPF_XADD 0xc0 /* exclusive add */ +#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */ +#define BPF_XADD 0xc0 /* exclusive add - legacy name */ /* alu/jmp fields */ #define BPF_MOV 0xb0 /* mov reg to reg */ @@ -43,6 +44,11 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* atomic op type fields (stored in immediate) */ +#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */ +#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ +#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ + /* Register numbers */ enum { BPF_REG_0 = 0, @@ -1650,22 +1656,30 @@ union bpf_attr { * networking traffic statistics as it provides a global socket * identifier that can be assumed unique. * Return - * A 8-byte long non-decreasing number on success, or 0 if the - * socket field is missing inside *skb*. + * A 8-byte long unique number on success, or 0 if the socket + * field is missing inside *skb*. * * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. + * + * u64 bpf_get_socket_cookie(struct sock *sk) + * Description + * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts + * *sk*, but gets socket from a BTF **struct sock**. This helper + * also works for sleepable programs. + * Return + * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return @@ -2225,6 +2239,9 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * + * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU + * was exceeded and output params->mtu_result contains the MTU. + * * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. @@ -2448,7 +2465,7 @@ union bpf_attr { * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the **BPF_STX_XADD** instruction to alter + * For example, by using the **BPF_ATOMIC** instructions to alter * the shared data. * Return * A pointer to the local storage area. @@ -2993,10 +3010,10 @@ union bpf_attr { * string length is larger than *size*, just *size*-1 bytes are * copied and the last byte is set to NUL. * - * On success, the length of the copied string is returned. This - * makes this helper useful in tracing programs for reading - * strings, and more importantly to get its length at runtime. See - * the following snippet: + * On success, returns the number of bytes that were written, + * including the terminal NUL. This makes this helper useful in + * tracing programs for reading strings, and more importantly to + * get its length at runtime. See the following snippet: * * :: * @@ -3024,7 +3041,7 @@ union bpf_attr { * **->mm->env_start**: using this helper and the return value, * one can quickly iterate at the right offset of the memory area. * Return - * On success, the strictly positive length of the string, + * On success, the strictly positive length of the output string, * including the trailing NUL character. On error, a negative * value. * @@ -3830,6 +3847,69 @@ union bpf_attr { * Return * A pointer to a struct socket on success or NULL if the file is * not a socket. + * + * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) + * Description + + * Check ctx packet size against exceeding MTU of net device (based + * on *ifindex*). This helper will likely be used in combination + * with helpers that adjust/change the packet size. + * + * The argument *len_diff* can be used for querying with a planned + * size change. This allows to check MTU prior to changing packet + * ctx. Providing an *len_diff* adjustment that is larger than the + * actual packet size (resulting in negative packet size) will in + * principle not exceed the MTU, why it is not considered a + * failure. Other BPF-helpers are needed for performing the + * planned size change, why the responsability for catch a negative + * packet size belong in those helpers. + * + * Specifying *ifindex* zero means the MTU check is performed + * against the current net device. This is practical if this isn't + * used prior to redirect. + * + * The Linux kernel route table can configure MTUs on a more + * specific per route level, which is not provided by this helper. + * For route level MTU checks use the **bpf_fib_lookup**\ () + * helper. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** for tc cls_act programs. + * + * The *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_MTU_CHK_SEGS** + * This flag will only works for *ctx* **struct sk_buff**. + * If packet context contains extra packet segment buffers + * (often knows as GSO skb), then MTU check is harder to + * check at this point, because in transmit path it is + * possible for the skb packet to get re-segmented + * (depending on net device features). This could still be + * a MTU violation, so this flag enables performing MTU + * check against segments, with a different violation + * return code to tell it apart. Check cannot use len_diff. + * + * On return *mtu_len* pointer contains the MTU value of the net + * device. Remember the net device configured MTU is the L3 size, + * which is returned here and XDP and TX length operate at L2. + * Helper take this into account for you, but remember when using + * MTU value in your BPF-code. On input *mtu_len* must be a valid + * pointer and be initialized (to zero), else verifier will reject + * BPF program. + * + * Return + * * 0 on success, and populate MTU value in *mtu_len* pointer. + * + * * < 0 if any input argument is invalid (*mtu_len* not updated) + * + * MTU violations return positive values, but also populate MTU + * value in *mtu_len* pointer, as this can be needed for + * implementing PMTU handing: + * + * * **BPF_MTU_CHK_RET_FRAG_NEEDED** + * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3995,6 +4075,7 @@ union bpf_attr { FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ FN(sock_from_file), \ + FN(check_mtu), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4495,6 +4576,7 @@ struct bpf_prog_info { __aligned_u64 prog_tags; __u64 run_time_ns; __u64 run_cnt; + __u64 recursion_misses; } __attribute__((aligned(8))); struct bpf_map_info { @@ -4975,9 +5057,13 @@ struct bpf_fib_lookup { __be16 sport; __be16 dport; - /* total length of packet from network header - used for MTU check */ - __u16 tot_len; + union { /* used for MTU check */ + /* input to lookup */ + __u16 tot_len; /* L3 length from network hdr (iph->tot_len) */ + /* output: MTU value */ + __u16 mtu_result; + }; /* input: L3 device index for lookup * output: device index from FIB lookup */ @@ -5023,6 +5109,17 @@ struct bpf_redir_neigh { }; }; +/* bpf_check_mtu flags*/ +enum bpf_check_mtu_flags { + BPF_MTU_CHK_SEGS = (1U << 0), +}; + +enum bpf_check_mtu_ret { + BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */ + BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_MTU_CHK_RET_SEGS_TOOBIG, /* GSO re-segmentation needed to fwd */ +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h index 8f95303f9d80..eb1b9d21250c 100644 --- a/tools/include/uapi/linux/bpf_perf_event.h +++ b/tools/include/uapi/linux/bpf_perf_event.h @@ -13,6 +13,7 @@ struct bpf_perf_event_data { bpf_user_pt_regs_t regs; __u64 sample_period; + __u64 addr; }; #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 886802b8ffba..abb89bbe5635 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -251,6 +251,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -573,6 +574,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 struct kvm_mp_state { __u32 mp_state; @@ -1056,6 +1058,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b15e3447cd9f..ad15e40d7f5d 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -145,12 +145,14 @@ enum perf_event_sample_format { PERF_SAMPLE_CGROUP = 1U << 21, PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, + PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, - PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; +#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) /* * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set * @@ -386,7 +388,8 @@ struct perf_event_attr { aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - __reserved_1 : 30; + build_id : 1, /* use build id in mmap2 events */ + __reserved_1 : 29; union { __u32 wakeup_events; /* wakeup every n events */ @@ -659,6 +662,22 @@ struct perf_event_mmap_page { __u64 aux_size; }; +/* + * The current state of perf_event_header::misc bits usage: + * ('|' used bit, '-' unused bit) + * + * 012 CDEF + * |||---------|||| + * + * Where: + * 0-2 CPUMODE_MASK + * + * C PROC_MAP_PARSE_TIMEOUT + * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT + * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT + * F (reserved) + */ + #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) @@ -690,6 +709,7 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event * * * PERF_RECORD_MISC_EXACT_IP: @@ -699,9 +719,13 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: * Indicates that thread was preempted in TASK_RUNNING state. + * + * PERF_RECORD_MISC_MMAP_BUILD_ID: + * Indicates that mmap2 event carries build id data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) +#define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ @@ -890,7 +914,24 @@ enum perf_event_type { * char data[size]; * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * - * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { union perf_sample_weight + * { + * u64 full; && PERF_SAMPLE_WEIGHT + * #if defined(__LITTLE_ENDIAN_BITFIELD) + * struct { + * u32 var1_dw; + * u16 var2_w; + * u16 var3_w; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #elif defined(__BIG_ENDIAN_BITFIELD) + * struct { + * u16 var3_w; + * u16 var2_w; + * u32 var1_dw; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #endif + * } + * } * { u64 data_src; } && PERF_SAMPLE_DATA_SRC * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi @@ -915,10 +956,20 @@ enum perf_event_type { * u64 addr; * u64 len; * u64 pgoff; - * u32 maj; - * u32 min; - * u64 ino; - * u64 ino_generation; + * union { + * struct { + * u32 maj; + * u32 min; + * u64 ino; + * u64 ino_generation; + * }; + * struct { + * u8 build_id_size; + * u8 __reserved_1; + * u16 __reserved_2; + * u8 build_id[20]; + * }; + * }; * u32 prot, flags; * char filename[]; * struct sample_id sample_id; @@ -1127,14 +1178,16 @@ union perf_mem_data_src { mem_lvl_num:4, /* memory hierarchy level number */ mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ - mem_rsvd:24; + mem_blk:3, /* access blocked */ + mem_rsvd:21; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:24, + __u64 mem_rsvd:21, + mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ mem_lvl_num:4, /* memory hierarchy level number */ @@ -1217,6 +1270,12 @@ union perf_mem_data_src { #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ #define PERF_MEM_TLB_SHIFT 26 +/* Access blocked */ +#define PERF_MEM_BLK_NA 0x01 /* not available */ +#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) @@ -1248,4 +1307,23 @@ struct perf_branch_entry { reserved:40; }; +union perf_sample_weight { + __u64 full; +#if defined(__LITTLE_ENDIAN_BITFIELD) + struct { + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; + }; +#elif defined(__BIG_ENDIAN_BITFIELD) + struct { + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; + }; +#else +#error "Unknown endianness" +#endif +}; + #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h index 0d18b1d1fbbc..5c903abc9fa5 100644 --- a/tools/include/uapi/linux/pkt_sched.h +++ b/tools/include/uapi/linux/pkt_sched.h @@ -414,6 +414,7 @@ enum { TCA_HTB_RATE64, TCA_HTB_CEIL64, TCA_HTB_PAD, + TCA_HTB_OFFLOAD, __TCA_HTB_MAX, }; diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 90deb41c8a34..667f1aed091c 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -251,5 +251,8 @@ struct prctl_mm_map { #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 # define PR_SYS_DISPATCH_ON 1 +/* The control values for the user space selector when dispatch is enabled */ +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/tcp.h b/tools/include/uapi/linux/tcp.h new file mode 100644 index 000000000000..13ceeb395eb8 --- /dev/null +++ b/tools/include/uapi/linux/tcp.h @@ -0,0 +1,357 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol. + * + * Version: @(#)tcp.h 1.0.2 04/28/93 + * + * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _UAPI_LINUX_TCP_H +#define _UAPI_LINUX_TCP_H + +#include <linux/types.h> +#include <asm/byteorder.h> +#include <linux/socket.h> + +struct tcphdr { + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 doff:4, + res1:4, + cwr:1, + ece:1, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __be16 window; + __sum16 check; + __be16 urg_ptr; +}; + +/* + * The union cast uses a gcc extension to avoid aliasing problems + * (union is compatible to any of its members) + * This means this part of the code is -fstrict-aliasing safe now. + */ +union tcp_word_hdr { + struct tcphdr hdr; + __be32 words[5]; +}; + +#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) + +enum { + TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), + TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), + TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), + TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), + TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), + TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), + TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), + TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) +}; + +/* + * TCP general constants + */ +#define TCP_MSS_DEFAULT 536U /* IPv4 (RFC1122, RFC2581) */ +#define TCP_MSS_DESIRED 1220U /* IPv6 (tunneled), EDNS0 (RFC3226) */ + +/* TCP socket options */ +#define TCP_NODELAY 1 /* Turn off Nagle's algorithm. */ +#define TCP_MAXSEG 2 /* Limit MSS */ +#define TCP_CORK 3 /* Never send partially complete segments */ +#define TCP_KEEPIDLE 4 /* Start keeplives after this period */ +#define TCP_KEEPINTVL 5 /* Interval between keepalives */ +#define TCP_KEEPCNT 6 /* Number of keepalives before death */ +#define TCP_SYNCNT 7 /* Number of SYN retransmits */ +#define TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */ +#define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ +#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ +#define TCP_INFO 11 /* Information about this connection. */ +#define TCP_QUICKACK 12 /* Block/reenable quick acks */ +#define TCP_CONGESTION 13 /* Congestion control algorithm */ +#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ +#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ +#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ +#define TCP_REPAIR 19 /* TCP sock is under repair right now */ +#define TCP_REPAIR_QUEUE 20 +#define TCP_QUEUE_SEQ 21 +#define TCP_REPAIR_OPTIONS 22 +#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ +#define TCP_TIMESTAMP 24 +#define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ +#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ +#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ +#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ +#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ +#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ +#define TCP_ULP 31 /* Attach a ULP to a TCP connection */ +#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ +#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */ +#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */ +#define TCP_ZEROCOPY_RECEIVE 35 +#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ + +#define TCP_CM_INQ TCP_INQ + +#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ + + +#define TCP_REPAIR_ON 1 +#define TCP_REPAIR_OFF 0 +#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ + +struct tcp_repair_opt { + __u32 opt_code; + __u32 opt_val; +}; + +struct tcp_repair_window { + __u32 snd_wl1; + __u32 snd_wnd; + __u32 max_window; + + __u32 rcv_wnd; + __u32 rcv_wup; +}; + +enum { + TCP_NO_QUEUE, + TCP_RECV_QUEUE, + TCP_SEND_QUEUE, + TCP_QUEUES_NR, +}; + +/* why fastopen failed from client perspective */ +enum tcp_fastopen_client_fail { + TFO_STATUS_UNSPEC, /* catch-all */ + TFO_COOKIE_UNAVAILABLE, /* if not in TFO_CLIENT_NO_COOKIE mode */ + TFO_DATA_NOT_ACKED, /* SYN-ACK did not ack SYN data */ + TFO_SYN_RETRANSMITTED, /* SYN-ACK did not ack SYN data after timeout */ +}; + +/* for TCP_INFO socket option */ +#define TCPI_OPT_TIMESTAMPS 1 +#define TCPI_OPT_SACK 2 +#define TCPI_OPT_WSCALE 4 +#define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */ +#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ +#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ + +/* + * Sender's congestion state indicating normal or abnormal situations + * in the last round of packets sent. The state is driven by the ACK + * information and timer events. + */ +enum tcp_ca_state { + /* + * Nothing bad has been observed recently. + * No apparent reordering, packet loss, or ECN marks. + */ + TCP_CA_Open = 0, +#define TCPF_CA_Open (1<<TCP_CA_Open) + /* + * The sender enters disordered state when it has received DUPACKs or + * SACKs in the last round of packets sent. This could be due to packet + * loss or reordering but needs further information to confirm packets + * have been lost. + */ + TCP_CA_Disorder = 1, +#define TCPF_CA_Disorder (1<<TCP_CA_Disorder) + /* + * The sender enters Congestion Window Reduction (CWR) state when it + * has received ACKs with ECN-ECE marks, or has experienced congestion + * or packet discard on the sender host (e.g. qdisc). + */ + TCP_CA_CWR = 2, +#define TCPF_CA_CWR (1<<TCP_CA_CWR) + /* + * The sender is in fast recovery and retransmitting lost packets, + * typically triggered by ACK events. + */ + TCP_CA_Recovery = 3, +#define TCPF_CA_Recovery (1<<TCP_CA_Recovery) + /* + * The sender is in loss recovery triggered by retransmission timeout. + */ + TCP_CA_Loss = 4 +#define TCPF_CA_Loss (1<<TCP_CA_Loss) +}; + +struct tcp_info { + __u8 tcpi_state; + __u8 tcpi_ca_state; + __u8 tcpi_retransmits; + __u8 tcpi_probes; + __u8 tcpi_backoff; + __u8 tcpi_options; + __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; + __u8 tcpi_delivery_rate_app_limited:1, tcpi_fastopen_client_fail:2; + + __u32 tcpi_rto; + __u32 tcpi_ato; + __u32 tcpi_snd_mss; + __u32 tcpi_rcv_mss; + + __u32 tcpi_unacked; + __u32 tcpi_sacked; + __u32 tcpi_lost; + __u32 tcpi_retrans; + __u32 tcpi_fackets; + + /* Times. */ + __u32 tcpi_last_data_sent; + __u32 tcpi_last_ack_sent; /* Not remembered, sorry. */ + __u32 tcpi_last_data_recv; + __u32 tcpi_last_ack_recv; + + /* Metrics. */ + __u32 tcpi_pmtu; + __u32 tcpi_rcv_ssthresh; + __u32 tcpi_rtt; + __u32 tcpi_rttvar; + __u32 tcpi_snd_ssthresh; + __u32 tcpi_snd_cwnd; + __u32 tcpi_advmss; + __u32 tcpi_reordering; + + __u32 tcpi_rcv_rtt; + __u32 tcpi_rcv_space; + + __u32 tcpi_total_retrans; + + __u64 tcpi_pacing_rate; + __u64 tcpi_max_pacing_rate; + __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */ + __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */ + + __u32 tcpi_notsent_bytes; + __u32 tcpi_min_rtt; + __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ + __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ + + __u64 tcpi_delivery_rate; + + __u64 tcpi_busy_time; /* Time (usec) busy sending data */ + __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ + __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ + + __u32 tcpi_delivered; + __u32 tcpi_delivered_ce; + + __u64 tcpi_bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut */ + __u64 tcpi_bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans */ + __u32 tcpi_dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups */ + __u32 tcpi_reord_seen; /* reordering events seen */ + + __u32 tcpi_rcv_ooopack; /* Out-of-order packets received */ + + __u32 tcpi_snd_wnd; /* peer's advertised receive window after + * scaling (bytes) + */ +}; + +/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ +enum { + TCP_NLA_PAD, + TCP_NLA_BUSY, /* Time (usec) busy sending data */ + TCP_NLA_RWND_LIMITED, /* Time (usec) limited by receive window */ + TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */ + TCP_NLA_DATA_SEGS_OUT, /* Data pkts sent including retransmission */ + TCP_NLA_TOTAL_RETRANS, /* Data pkts retransmitted */ + TCP_NLA_PACING_RATE, /* Pacing rate in bytes per second */ + TCP_NLA_DELIVERY_RATE, /* Delivery rate in bytes per second */ + TCP_NLA_SND_CWND, /* Sending congestion window */ + TCP_NLA_REORDERING, /* Reordering metric */ + TCP_NLA_MIN_RTT, /* minimum RTT */ + TCP_NLA_RECUR_RETRANS, /* Recurring retransmits for the current pkt */ + TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */ + TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ + TCP_NLA_CA_STATE, /* ca_state of socket */ + TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */ + TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */ + TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */ + TCP_NLA_BYTES_SENT, /* Data bytes sent including retransmission */ + TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */ + TCP_NLA_DSACK_DUPS, /* DSACK blocks received */ + TCP_NLA_REORD_SEEN, /* reordering events seen */ + TCP_NLA_SRTT, /* smoothed RTT in usecs */ + TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ + TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ + TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ +}; + +/* for TCP_MD5SIG socket option */ +#define TCP_MD5SIG_MAXKEYLEN 80 + +/* tcp_md5sig extension flags for TCP_MD5SIG_EXT */ +#define TCP_MD5SIG_FLAG_PREFIX 0x1 /* address prefix length */ +#define TCP_MD5SIG_FLAG_IFINDEX 0x2 /* ifindex set */ + +struct tcp_md5sig { + struct __kernel_sockaddr_storage tcpm_addr; /* address associated */ + __u8 tcpm_flags; /* extension flags */ + __u8 tcpm_prefixlen; /* address prefix */ + __u16 tcpm_keylen; /* key length */ + int tcpm_ifindex; /* device index for scope */ + __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ +}; + +/* INET_DIAG_MD5SIG */ +struct tcp_diag_md5sig { + __u8 tcpm_family; + __u8 tcpm_prefixlen; + __u16 tcpm_keylen; + __be32 tcpm_addr[4]; + __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; +}; + +/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ + +#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1 +struct tcp_zerocopy_receive { + __u64 address; /* in: address of mapping */ + __u32 length; /* in/out: number of bytes to map/mapped */ + __u32 recv_skip_hint; /* out: amount of bytes to skip */ + __u32 inq; /* out: amount of bytes in read queue */ + __s32 err; /* out: socket error */ + __u64 copybuf_address; /* in: copybuf address (small reads) */ + __s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */ + __u32 flags; /* in: flags */ +}; +#endif /* _UAPI_LINUX_TCP_H */ diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c index 889a6eb4aaca..1573dae4259d 100644 --- a/tools/lib/api/fs/cgroup.c +++ b/tools/lib/api/fs/cgroup.c @@ -8,12 +8,29 @@ #include <string.h> #include "fs.h" +struct cgroupfs_cache_entry { + char subsys[32]; + char mountpoint[PATH_MAX]; +}; + +/* just cache last used one */ +static struct cgroupfs_cache_entry cached; + int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys) { FILE *fp; - char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; - char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; - char *token, *saved_ptr = NULL; + char *line = NULL; + size_t len = 0; + char *p, *path; + char mountpoint[PATH_MAX]; + + if (!strcmp(cached.subsys, subsys)) { + if (strlen(cached.mountpoint) < maxlen) { + strcpy(buf, cached.mountpoint); + return 0; + } + return -1; + } fp = fopen("/proc/mounts", "r"); if (!fp) @@ -22,45 +39,63 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys) /* * in order to handle split hierarchy, we need to scan /proc/mounts * and inspect every cgroupfs mount point to find one that has - * perf_event subsystem + * the given subsystem. If we found v1, just use it. If not we can + * use v2 path as a fallback. */ - path_v1[0] = '\0'; - path_v2[0] = '\0'; + mountpoint[0] = '\0'; - while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" - __stringify(PATH_MAX)"s %*d %*d\n", - mountpoint, type, tokens) == 3) { + /* + * The /proc/mounts has the follow format: + * + * <devname> <mount point> <fs type> <options> ... + * + */ + while (getline(&line, &len, fp) != -1) { + /* skip devname */ + p = strchr(line, ' '); + if (p == NULL) + continue; + + /* save the mount point */ + path = ++p; + p = strchr(p, ' '); + if (p == NULL) + continue; - if (!path_v1[0] && !strcmp(type, "cgroup")) { + *p++ = '\0'; - token = strtok_r(tokens, ",", &saved_ptr); + /* check filesystem type */ + if (strncmp(p, "cgroup", 6)) + continue; - while (token != NULL) { - if (subsys && !strcmp(token, subsys)) { - strcpy(path_v1, mountpoint); - break; - } - token = strtok_r(NULL, ",", &saved_ptr); - } + if (p[6] == '2') { + /* save cgroup v2 path */ + strcpy(mountpoint, path); + continue; } - if (!path_v2[0] && !strcmp(type, "cgroup2")) - strcpy(path_v2, mountpoint); + /* now we have cgroup v1, check the options for subsystem */ + p += 7; - if (path_v1[0] && path_v2[0]) - break; + p = strstr(p, subsys); + if (p == NULL) + continue; + + /* sanity check: it should be separated by a space or a comma */ + if (!strchr(" ,", p[-1]) || !strchr(" ,", p[strlen(subsys)])) + continue; + + strcpy(mountpoint, path); + break; } + free(line); fclose(fp); - if (path_v1[0]) - path = path_v1; - else if (path_v2[0]) - path = path_v2; - else - return -1; + strncpy(cached.subsys, subsys, sizeof(cached.subsys) - 1); + strcpy(cached.mountpoint, mountpoint); - if (strlen(path) < maxlen) { - strcpy(buf, path); + if (mountpoint[0] && strlen(mountpoint) < maxlen) { + strcpy(buf, mountpoint); return 0; } return -1; diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 8a81b3679d2b..5d4cfac671d5 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only libbpf_version.h libbpf.pc -FEATURE-DUMP.libbpf libbpf.so.* TAGS tags diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 55bd78b3496f..887a494ad5fc 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -58,28 +58,7 @@ ifndef VERBOSE VERBOSE = 0 endif -FEATURE_USER = .libbpf -FEATURE_TESTS = libelf zlib bpf -FEATURE_DISPLAY = libelf zlib bpf - INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi -FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) - -check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help -ifdef MAKECMDGOALS -ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) - check_feat := 0 -endif -endif - -ifeq ($(check_feat),1) -ifeq ($(FEATURES_DUMP),) -include $(srctree)/tools/build/Makefile.feature -else -include $(FEATURES_DUMP) -endif -endif export prefix libdir src obj @@ -157,7 +136,7 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_SHARED): force $(BPF_HELPER_DEFS) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -175,7 +154,7 @@ $(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_STATIC): force $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h @@ -264,34 +243,16 @@ install_pkgconfig: $(PC_FILE) install: install_lib install_pkgconfig install_headers -### Cleaning rules - -config-clean: - $(call QUIET_CLEAN, feature-detect) - $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null - -clean: config-clean +clean: $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ $(addprefix $(OUTPUT), \ *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) - $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf - - -PHONY += force elfdep zdep bpfdep cscope tags +PHONY += force cscope tags force: -elfdep: - @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi - -zdep: - @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi - -bpfdep: - @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi - cscope: ls *.c *.h > cscope.files cscope -b -q -I $(srctree)/include -f cscope.out diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index bbcefb3ff5a5..53b3e199fb25 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -195,17 +195,22 @@ enum bpf_enum_value_kind { * (local) BTF, used to record relocation. */ #define bpf_core_read(dst, sz, src) \ - bpf_probe_read_kernel(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel(dst, sz, (const void *)__builtin_preserve_access_index(src)) +/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ +#define bpf_core_read_user(dst, sz, src) \ + bpf_probe_read_user(dst, sz, (const void *)__builtin_preserve_access_index(src)) /* * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() * additionally emitting BPF CO-RE field relocation for specified source * argument. */ #define bpf_core_read_str(dst, sz, src) \ - bpf_probe_read_kernel_str(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) + +/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ +#define bpf_core_read_user_str(dst, sz, src) \ + bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) #define ___concat(a, b) a ## b #define ___apply(fn, n) ___concat(fn, n) @@ -264,30 +269,29 @@ enum bpf_enum_value_kind { read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) /* "recursively" read a sequence of inner pointers using local __t var */ -#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a); -#define ___rd_last(...) \ - ___read(bpf_core_read, &__t, \ - ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); -#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__) -#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___read_ptrs(src, ...) \ - ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__) - -#define ___core_read0(fn, dst, src, a) \ +#define ___rd_first(fn, src, a) ___read(fn, &__t, ___type(src), src, a); +#define ___rd_last(fn, ...) \ + ___read(fn, &__t, ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); +#define ___rd_p1(fn, ...) const void *__t; ___rd_first(fn, __VA_ARGS__) +#define ___rd_p2(fn, ...) ___rd_p1(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p3(fn, ...) ___rd_p2(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p4(fn, ...) ___rd_p3(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p5(fn, ...) ___rd_p4(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p6(fn, ...) ___rd_p5(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p7(fn, ...) ___rd_p6(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p8(fn, ...) ___rd_p7(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p9(fn, ...) ___rd_p8(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___read_ptrs(fn, src, ...) \ + ___apply(___rd_p, ___narg(__VA_ARGS__))(fn, src, __VA_ARGS__) + +#define ___core_read0(fn, fn_ptr, dst, src, a) \ ___read(fn, dst, ___type(src), src, a); -#define ___core_readN(fn, dst, src, ...) \ - ___read_ptrs(src, ___nolast(__VA_ARGS__)) \ +#define ___core_readN(fn, fn_ptr, dst, src, ...) \ + ___read_ptrs(fn_ptr, src, ___nolast(__VA_ARGS__)) \ ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ ___last(__VA_ARGS__)); -#define ___core_read(fn, dst, src, a, ...) \ - ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \ +#define ___core_read(fn, fn_ptr, dst, src, a, ...) \ + ___apply(___core_read, ___empty(__VA_ARGS__))(fn, fn_ptr, dst, \ src, a, ##__VA_ARGS__) /* @@ -295,20 +299,73 @@ enum bpf_enum_value_kind { * BPF_CORE_READ(), in which final field is read into user-provided storage. * See BPF_CORE_READ() below for more details on general usage. */ -#define BPF_CORE_READ_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \ - }) +#define BPF_CORE_READ_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read, bpf_core_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* + * Variant of BPF_CORE_READ_INTO() for reading from user-space memory. + * + * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. + */ +#define BPF_CORE_READ_USER_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_user, bpf_core_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_INTO() */ +#define BPF_PROBE_READ_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read, bpf_probe_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_USER_INTO(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ +#define BPF_PROBE_READ_USER_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_user, bpf_probe_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) /* * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as * BPF_CORE_READ() for intermediate pointers, but then executes (and returns * corresponding error code) bpf_core_read_str() for final string read. */ -#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\ - }) +#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_str, bpf_core_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* + * Variant of BPF_CORE_READ_STR_INTO() for reading from user-space memory. + * + * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. + */ +#define BPF_CORE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_user_str, bpf_core_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_STR_INTO() */ +#define BPF_PROBE_READ_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_str, bpf_probe_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* + * Non-CO-RE variant of BPF_CORE_READ_USER_STR_INTO(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ +#define BPF_PROBE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_user_str, bpf_probe_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) /* * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially @@ -334,12 +391,46 @@ enum bpf_enum_value_kind { * N.B. Only up to 9 "field accessors" are supported, which should be more * than enough for any practical purpose. */ -#define BPF_CORE_READ(src, a, ...) \ - ({ \ - ___type((src), a, ##__VA_ARGS__) __r; \ - BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ - __r; \ - }) +#define BPF_CORE_READ(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* + * Variant of BPF_CORE_READ() for reading from user-space memory. + * + * NOTE: all the source types involved are still *kernel types* and need to + * exist in kernel (or kernel module) BTF, otherwise CO-RE relocation will + * fail. Custom user types are not relocatable with CO-RE. + * The typical situation in which BPF_CORE_READ_USER() might be used is to + * read kernel UAPI types from the user-space memory passed in as a syscall + * input argument. + */ +#define BPF_CORE_READ_USER(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ() */ +#define BPF_PROBE_READ(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_PROBE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* + * Non-CO-RE variant of BPF_CORE_READ_USER(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ +#define BPF_PROBE_READ_USER(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_PROBE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) #endif diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 72b251110c4d..ae6c975e0b87 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -30,7 +30,7 @@ #define SEC(NAME) __attribute__((section(NAME), used)) #ifndef __always_inline -#define __always_inline __attribute__((always_inline)) +#define __always_inline inline __attribute__((always_inline)) #endif #ifndef __noinline #define __noinline __attribute__((noinline)) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3c3f2bc6c652..d9c10830d749 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -240,11 +240,6 @@ static int btf_parse_hdr(struct btf *btf) } meta_left = btf->raw_size - sizeof(*hdr); - if (!meta_left) { - pr_debug("BTF has no data\n"); - return -EINVAL; - } - if (meta_left < hdr->str_off + hdr->str_len) { pr_debug("Invalid BTF total size:%u\n", btf->raw_size); return -EINVAL; @@ -863,6 +858,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, Elf_Scn *scn = NULL; Elf *elf = NULL; GElf_Ehdr ehdr; + size_t shstrndx; if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("failed to init libelf for %s\n", path); @@ -887,7 +883,14 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, pr_warn("failed to get EHDR from %s\n", path); goto done; } - if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { + + if (elf_getshdrstrndx(elf, &shstrndx)) { + pr_warn("failed to get section names section index for %s\n", + path); + goto done; + } + + if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) { pr_warn("failed to get e_shstrndx from %s\n", path); goto done; } @@ -902,7 +905,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, idx, path); goto done; } - name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); + name = elf_strptr(elf, shstrndx, sh.sh_name); if (!name) { pr_warn("failed to get section(%d) name from %s\n", idx, path); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6ae748f6ea11..d43cc3f29dae 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -395,7 +395,8 @@ struct extern_desc { unsigned long long addr; /* target btf_id of the corresponding kernel var. */ - int vmlinux_btf_id; + int kernel_btf_obj_fd; + int kernel_btf_id; /* local btf_id of the ksym extern's type. */ __u32 type_id; @@ -883,24 +884,24 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, if (btf_is_ptr(mtype)) { struct bpf_program *prog; - mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id); + prog = st_ops->progs[i]; + if (!prog) + continue; + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_mtype->type, &kern_mtype_id); - if (!btf_is_func_proto(mtype) || - !btf_is_func_proto(kern_mtype)) { - pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n", + + /* mtype->type must be a func_proto which was + * guaranteed in bpf_object__collect_st_ops_relos(), + * so only check kern_mtype for func_proto here. + */ + if (!btf_is_func_proto(kern_mtype)) { + pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", map->name, mname); return -ENOTSUP; } - prog = st_ops->progs[i]; - if (!prog) { - pr_debug("struct_ops init_kern %s: func ptr %s is not set\n", - map->name, mname); - continue; - } - prog->attach_btf_id = kern_type_id; prog->expected_attach_type = kern_member_idx; @@ -6162,7 +6163,8 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) } else /* EXT_KSYM */ { if (ext->ksym.type_id) { /* typed ksyms */ insn[0].src_reg = BPF_PSEUDO_BTF_ID; - insn[0].imm = ext->ksym.vmlinux_btf_id; + insn[0].imm = ext->ksym.kernel_btf_id; + insn[1].imm = ext->ksym.kernel_btf_obj_fd; } else { /* typeless ksyms */ insn[0].imm = (__u32)ext->ksym.addr; insn[1].imm = ext->ksym.addr >> 32; @@ -7319,7 +7321,8 @@ out: static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) { struct extern_desc *ext; - int i, id; + struct btf *btf; + int i, j, id, btf_fd, err; for (i = 0; i < obj->nr_extern; i++) { const struct btf_type *targ_var, *targ_type; @@ -7331,10 +7334,25 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) if (ext->type != EXT_KSYM || !ext->ksym.type_id) continue; - id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name, - BTF_KIND_VAR); + btf = obj->btf_vmlinux; + btf_fd = 0; + id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR); + if (id == -ENOENT) { + err = load_module_btfs(obj); + if (err) + return err; + + for (j = 0; j < obj->btf_module_cnt; j++) { + btf = obj->btf_modules[j].btf; + /* we assume module BTF FD is always >0 */ + btf_fd = obj->btf_modules[j].fd; + id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR); + if (id != -ENOENT) + break; + } + } if (id <= 0) { - pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n", + pr_warn("extern (ksym) '%s': failed to find BTF ID in kernel BTF(s).\n", ext->name); return -ESRCH; } @@ -7343,24 +7361,19 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) local_type_id = ext->ksym.type_id; /* find target type_id */ - targ_var = btf__type_by_id(obj->btf_vmlinux, id); - targ_var_name = btf__name_by_offset(obj->btf_vmlinux, - targ_var->name_off); - targ_type = skip_mods_and_typedefs(obj->btf_vmlinux, - targ_var->type, - &targ_type_id); + targ_var = btf__type_by_id(btf, id); + targ_var_name = btf__name_by_offset(btf, targ_var->name_off); + targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); ret = bpf_core_types_are_compat(obj->btf, local_type_id, - obj->btf_vmlinux, targ_type_id); + btf, targ_type_id); if (ret <= 0) { const struct btf_type *local_type; const char *targ_name, *local_name; local_type = btf__type_by_id(obj->btf, local_type_id); - local_name = btf__name_by_offset(obj->btf, - local_type->name_off); - targ_name = btf__name_by_offset(obj->btf_vmlinux, - targ_type->name_off); + local_name = btf__name_by_offset(obj->btf, local_type->name_off); + targ_name = btf__name_by_offset(btf, targ_type->name_off); pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", ext->name, local_type_id, @@ -7370,7 +7383,8 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) } ext->is_set = true; - ext->ksym.vmlinux_btf_id = id; + ext->ksym.kernel_btf_obj_fd = btf_fd; + ext->ksym.kernel_btf_id = id; pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n", ext->name, id, btf_kind_str(targ_var), targ_var_name); } diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e3e41ceeb1bc..ffbb588724d8 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -46,6 +46,11 @@ #define PF_XDP AF_XDP #endif +enum xsk_prog { + XSK_PROG_FALLBACK, + XSK_PROG_REDIRECT_FLAGS, +}; + struct xsk_umem { struct xsk_ring_prod *fill_save; struct xsk_ring_cons *comp_save; @@ -351,6 +356,54 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) +static enum xsk_prog get_xsk_prog(void) +{ + enum xsk_prog detected = XSK_PROG_FALLBACK; + struct bpf_load_program_attr prog_attr; + struct bpf_create_map_attr map_attr; + __u32 size_out, retval, duration; + char data_in = 0, data_out; + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, XDP_PASS), + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + int prog_fd, map_fd, ret; + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_XSKMAP; + map_attr.key_size = sizeof(int); + map_attr.value_size = sizeof(int); + map_attr.max_entries = 1; + + map_fd = bpf_create_map_xattr(&map_attr); + if (map_fd < 0) + return detected; + + insns[0].imm = map_fd; + + memset(&prog_attr, 0, sizeof(prog_attr)); + prog_attr.prog_type = BPF_PROG_TYPE_XDP; + prog_attr.insns = insns; + prog_attr.insns_cnt = ARRAY_SIZE(insns); + prog_attr.license = "GPL"; + + prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); + if (prog_fd < 0) { + close(map_fd); + return detected; + } + + ret = bpf_prog_test_run(prog_fd, 0, &data_in, 1, &data_out, &size_out, &retval, &duration); + if (!ret && retval == XDP_PASS) + detected = XSK_PROG_REDIRECT_FLAGS; + close(prog_fd); + close(map_fd); + return detected; +} + static int xsk_load_xdp_prog(struct xsk_socket *xsk) { static const int log_buf_size = 16 * 1024; @@ -358,7 +411,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) char log_buf[log_buf_size]; int err, prog_fd; - /* This is the C-program: + /* This is the fallback C-program: * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) * { * int ret, index = ctx->rx_queue_index; @@ -414,9 +467,31 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* The jumps are to this instruction */ BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, + /* This is the post-5.3 kernel C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS); + * } + */ + struct bpf_insn prog_redirect_flags[] = { + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + size_t insns_cnt[] = {sizeof(prog) / sizeof(struct bpf_insn), + sizeof(prog_redirect_flags) / sizeof(struct bpf_insn), + }; + struct bpf_insn *progs[] = {prog, prog_redirect_flags}; + enum xsk_prog option = get_xsk_prog(); + + prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option], "LGPL-2.1 or BSD-2-Clause", 0, log_buf, log_buf_size); if (prog_fd < 0) { @@ -442,7 +517,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) struct ifreq ifr = {}; int fd, err, ret; - fd = socket(AF_INET, SOCK_DGRAM, 0); + fd = socket(AF_LOCAL, SOCK_DGRAM, 0); if (fd < 0) return -errno; diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index cfcdbd7be066..17465d454a0e 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -367,21 +367,13 @@ static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, boo return map; } -static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, - struct perf_evsel *evsel, int idx, int cpu, - int thread) +static void perf_evsel__set_sid_idx(struct perf_evsel *evsel, int idx, int cpu, int thread) { struct perf_sample_id *sid = SID(evsel, cpu, thread); sid->idx = idx; - if (evlist->cpus && cpu >= 0) - sid->cpu = evlist->cpus->map[cpu]; - else - sid->cpu = -1; - if (!evsel->system_wide && evlist->threads && thread >= 0) - sid->tid = perf_thread_map__pid(evlist->threads, thread); - else - sid->tid = -1; + sid->cpu = perf_cpu_map__cpu(evsel->cpus, cpu); + sid->tid = perf_thread_map__pid(evsel->threads, thread); } static struct perf_mmap* @@ -500,8 +492,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) return -1; - perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, - thread); + perf_evsel__set_sid_idx(evsel, idx, cpu, thread); } } diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 988c539bedb6..d82054225fcc 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -23,10 +23,20 @@ struct perf_record_mmap2 { __u64 start; __u64 len; __u64 pgoff; - __u32 maj; - __u32 min; - __u64 ino; - __u64 ino_generation; + union { + struct { + __u32 maj; + __u32 min; + __u64 ino; + __u64 ino_generation; + }; + struct { + __u8 build_id_size; + __u8 __reserved_1; + __u16 __reserved_2; + __u8 build_id[20]; + }; + }; __u32 prot; __u32 flags; char filename[PATH_MAX]; diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index c8d45091e7c2..c70e9e03af3e 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_cpu_map__put(cpus); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 6d8ebe0c2504..e2ac0b7f432e 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -208,13 +208,13 @@ static int test_mmap_thread(void) char path[PATH_MAX]; int id, err, pid, go_pipe[2]; union perf_event *event; - char bf; int count = 0; snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id", sysfs__mountpoint()); if (filename__read_int(path, &id)) { + tests_failed++; fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); return -1; } @@ -229,6 +229,7 @@ static int test_mmap_thread(void) pid = fork(); if (!pid) { int i; + char bf; read(go_pipe[0], &bf, 1); @@ -266,7 +267,7 @@ static int test_mmap_thread(void) perf_evlist__enable(evlist); /* kick the child and wait for it to finish */ - write(go_pipe[1], &bf, 1); + write(go_pipe[1], "A", 1); waitpid(pid, NULL, 0); /* @@ -409,5 +410,5 @@ int main(int argc, char **argv) test_mmap_cpus(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 135722ac965b..0ad82d7a2a51 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -131,5 +131,5 @@ int main(int argc, char **argv) test_stat_thread_enable(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 7dc4d6fbedde..384471441b48 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_thread_map__put(threads); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt index 79acb75d56ea..b2da6365be63 100644 --- a/tools/memory-model/Documentation/glossary.txt +++ b/tools/memory-model/Documentation/glossary.txt @@ -33,10 +33,11 @@ Acquire: With respect to a lock, acquiring that lock, for example, acquire loads. When an acquire load returns the value stored by a release store - to that same variable, then all operations preceding that store - happen before any operations following that load acquire. + to that same variable, (in other words, the acquire load "reads + from" the release store), then all operations preceding that + store "happen before" any operations following that load acquire. - See also "Relaxed" and "Release". + See also "Happens-Before", "Reads-From", "Relaxed", and "Release". Coherence (co): When one CPU's store to a given variable overwrites either the value from another CPU's store or some later value, @@ -119,6 +120,11 @@ Fully Ordered: An operation such as smp_mb() that orders all of that orders all of its CPU's prior accesses, itself, and all of its CPU's subsequent accesses. +Happens-Before (hb): A relation between two accesses in which LKMM + guarantees the first access precedes the second. For more + detail, please see the "THE HAPPENS-BEFORE RELATION: hb" + section of explanation.txt. + Marked Access: An access to a variable that uses an special function or macro such as "r1 = READ_ONCE(x)" or "smp_store_release(&a, 1)". diff --git a/tools/memory-model/README b/tools/memory-model/README index 39d08d1f0443..9a84c45504ab 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -51,7 +51,7 @@ klitmus7 Compatibility Table ============ ========== target Linux herdtools7 ------------ ---------- - -- 4.18 7.48 -- + -- 4.14 7.48 -- 4.15 -- 4.19 7.49 -- 4.20 -- 5.5 7.54 -- 5.6 -- 7.56 -- diff --git a/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus index 772544f03fb5..967f9f2a6226 100644 --- a/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus @@ -7,9 +7,7 @@ C CoRR+poonceonce+Once * reads from the same variable are ordered. *) -{ - int x; -} +{} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus index 5faae98f7ffb..4635739f3974 100644 --- a/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus @@ -7,9 +7,7 @@ C CoRW+poonceonce+Once * a given variable and a later write to that same variable are ordered. *) -{ - int x; -} +{} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus index 77c9cc9f8dc6..bb068c92d8da 100644 --- a/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus @@ -7,9 +7,7 @@ C CoWR+poonceonce+Once * given variable and a later read from that same variable are ordered. *) -{ - int x; -} +{} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus index 85ef746f511a..0d9f0a958799 100644 --- a/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus +++ b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus @@ -7,9 +7,7 @@ C CoWW+poonceonce * writes to the same variable are ordered. *) -{ - int x; -} +{} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus index 87aa900125ab..e729d2776e89 100644 --- a/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus +++ b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus @@ -10,10 +10,7 @@ C IRIW+fencembonceonces+OnceOnce * process? This litmus test exercises LKMM's "propagation" rule. *) -{ - int x; - int y; -} +{} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus index f84022dca555..4b54dd6a6cd9 100644 --- a/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus +++ b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus @@ -10,10 +10,7 @@ C IRIW+poonceonces+OnceOnce * different process? *) -{ - int x; - int y; -} +{} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus index 398f624daa77..094d58df7789 100644 --- a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus +++ b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus @@ -7,12 +7,7 @@ C ISA2+pooncelock+pooncelock+pombonce * (in P0() and P1()) is visible to external process P2(). *) -{ - spinlock_t mylock; - int x; - int y; - int z; -} +{} P0(int *x, int *y, spinlock_t *mylock) { diff --git a/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus index 212a432ba16b..b321aa6f4ea5 100644 --- a/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus @@ -9,11 +9,7 @@ C ISA2+poonceonces * of the smp_load_acquire() invocations are replaced by READ_ONCE()? *) -{ - int x; - int y; - int z; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus index 7afd85672ccd..025b0462ec9b 100644 --- a/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus +++ b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus @@ -11,11 +11,7 @@ C ISA2+pooncerelease+poacquirerelease+poacquireonce * (AKA non-rf) link, so release-acquire is all that is needed. *) -{ - int x; - int y; - int z; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus index c8a93c7ee556..4727f5aaf03b 100644 --- a/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus +++ b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus @@ -11,10 +11,7 @@ C LB+fencembonceonce+ctrlonceonce * another control dependency and order would still be maintained.) *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus index 2fa029568fa1..07b9904b0e49 100644 --- a/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus +++ b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus @@ -8,10 +8,7 @@ C LB+poacquireonce+pooncerelease * to the other? *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/LB+poonceonces.litmus b/tools/memory-model/litmus-tests/LB+poonceonces.litmus index 2107306e8625..74c49cb3c37b 100644 --- a/tools/memory-model/litmus-tests/LB+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/LB+poonceonces.litmus @@ -7,10 +7,7 @@ C LB+poonceonces * be prevented even with no explicit ordering? *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus index c5c168d92973..f8ca1229857a 100644 --- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus +++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus @@ -8,10 +8,7 @@ C MP+fencewmbonceonce+fencermbonceonce * is usually better to use smp_store_release() and smp_load_acquire(). *) -{ - int buf; - int flag; -} +{} P0(int *buf, int *flag) // Producer { diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus index 20ff62649f1e..d84160b9c1ae 100644 --- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus +++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus @@ -10,9 +10,7 @@ C MP+onceassign+derefonce *) { - int *p=y; - int x; - int y=0; +p=y; } P0(int *x, int **p) // Producer diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus index 153917ad5dc9..ba91cc63e148 100644 --- a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus +++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus @@ -10,10 +10,7 @@ C MP+polockmbonce+poacquiresilsil * executed before the lock was acquired (loosely speaking). *) -{ - spinlock_t lo; - int x; -} +{} P0(spinlock_t *lo, int *x) // Producer { diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus index aad64397bb8c..a5ea3ed8f52e 100644 --- a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus +++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus @@ -10,10 +10,7 @@ C MP+polockonce+poacquiresilsil * speaking). *) -{ - spinlock_t lo; - int x; -} +{} P0(spinlock_t *lo, int *x) // Producer { diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus index 21cbca6f3be4..e6af05f70069 100644 --- a/tools/memory-model/litmus-tests/MP+polocks.litmus +++ b/tools/memory-model/litmus-tests/MP+polocks.litmus @@ -11,11 +11,7 @@ C MP+polocks * to see all prior accesses by those other CPUs. *) -{ - spinlock_t mylock; - int buf; - int flag; -} +{} P0(int *buf, int *flag, spinlock_t *mylock) // Producer { diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus index 9f9769d647c7..ba9c99c6cf65 100644 --- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus @@ -7,10 +7,7 @@ C MP+poonceonces * no ordering at all? *) -{ - int buf; - int flag; -} +{} P0(int *buf, int *flag) // Producer { diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus index cbe28e733443..f174bfe61702 100644 --- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus +++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus @@ -8,10 +8,7 @@ C MP+pooncerelease+poacquireonce * pattern. *) -{ - int buf; - int flag; -} +{} P0(int *buf, int *flag) // Producer { diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus index 012041bd4feb..b9599141160e 100644 --- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus +++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus @@ -11,11 +11,7 @@ C MP+porevlocks * see all prior accesses by those other CPUs. *) -{ - spinlock_t mylock; - int buf; - int flag; -} +{} P0(int *buf, int *flag, spinlock_t *mylock) // Consumer { diff --git a/tools/memory-model/litmus-tests/R+fencembonceonces.litmus b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus index af9463b39b4a..222a0b850b4a 100644 --- a/tools/memory-model/litmus-tests/R+fencembonceonces.litmus +++ b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus @@ -9,10 +9,7 @@ C R+fencembonceonces * cause the resulting test to be allowed. *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/R+poonceonces.litmus b/tools/memory-model/litmus-tests/R+poonceonces.litmus index bcd5574e304a..5386f128a131 100644 --- a/tools/memory-model/litmus-tests/R+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/R+poonceonces.litmus @@ -8,10 +8,7 @@ C R+poonceonces * store propagation delays. *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus index c36341d1aed6..18479823cd6c 100644 --- a/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus +++ b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus @@ -7,10 +7,7 @@ C S+fencewmbonceonce+poacquireonce * store against a subsequent store? *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/S+poonceonces.litmus b/tools/memory-model/litmus-tests/S+poonceonces.litmus index 7775c23143a0..8c9c2f81a580 100644 --- a/tools/memory-model/litmus-tests/S+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/S+poonceonces.litmus @@ -9,10 +9,7 @@ C S+poonceonces * READ_ONCE(), is ordering preserved? *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus index 833cdfeb7c09..ed5fff18d223 100644 --- a/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus +++ b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus @@ -9,10 +9,7 @@ C SB+fencembonceonces * suffice, but not much else.) *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/SB+poonceonces.litmus b/tools/memory-model/litmus-tests/SB+poonceonces.litmus index c92211ecbfdf..10d550730b25 100644 --- a/tools/memory-model/litmus-tests/SB+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/SB+poonceonces.litmus @@ -8,10 +8,7 @@ C SB+poonceonces * variable that the preceding process reads. *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus index 84344b455eb7..04a16603660b 100644 --- a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus +++ b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus @@ -6,10 +6,7 @@ C SB+rfionceonce-poonceonces * This litmus test demonstrates that LKMM is not fully multicopy atomic. *) -{ - int x; - int y; -} +{} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus index 431494708611..6a2bc12a1af1 100644 --- a/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus +++ b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus @@ -8,10 +8,7 @@ C WRC+poonceonces+Once * test has no ordering at all. *) -{ - int x; - int y; -} +{} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus index 554999c64db5..e9947250d7de 100644 --- a/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus @@ -10,10 +10,7 @@ C WRC+pooncerelease+fencermbonceonce+Once * is A-cumulative in LKMM. *) -{ - int x; - int y; -} +{} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus index 265a95ffef13..415248fb6699 100644 --- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus +++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus @@ -9,12 +9,7 @@ C Z6.0+pooncelock+poonceLock+pombonce * by CPUs not holding that lock. *) -{ - spinlock_t mylock; - int x; - int y; - int z; -} +{} P0(int *x, int *y, spinlock_t *mylock) { diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus index 0c9aea8e80df..10a2aa04cd07 100644 --- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus +++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus @@ -8,12 +8,7 @@ C Z6.0+pooncelock+pooncelock+pombonce * seen as ordered by a third process not holding that lock. *) -{ - spinlock_t mylock; - int x; - int y; - int z; -} +{} P0(int *x, int *y, spinlock_t *mylock) { diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus index 661f9aaa5791..88e70b87a683 100644 --- a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus +++ b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus @@ -14,11 +14,7 @@ C Z6.0+pooncerelease+poacquirerelease+fencembonceonce * involving locking.) *) -{ - int x; - int y; - int z; -} +{} P0(int *x, int *y) { diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore index 45cefda24c7b..14236db3677f 100644 --- a/tools/objtool/.gitignore +++ b/tools/objtool/.gitignore @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only arch/x86/lib/inat-tables.c -objtool +/objtool fixdep diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 0542e46c7552..30f38fdc0d56 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -315,13 +315,15 @@ they mean, and suggestions for how to fix them. function tracing inserts additional calls, which is not obvious from the sources). -10. file.o: warning: func()+0x5c: alternative modifies stack - - This means that an alternative includes instructions that modify the - stack. The problem is that there is only one ORC unwind table, this means - that the ORC unwind entries must be valid for each of the alternatives. - The easiest way to enforce this is to ensure alternatives do not contain - any ORC entries, which in turn implies the above constraint. +10. file.o: warning: func()+0x5c: stack layout conflict in alternatives + + This means that in the use of the alternative() or ALTERNATIVE() + macro, the code paths have conflicting modifications to the stack. + The problem is that there is only one ORC unwind table, which means + that the ORC unwind entries must be consistent for all possible + instruction boundaries regardless of which code has been patched. + This limitation can be overcome by massaging the alternatives with + NOPs to shift the stack changes around so they no longer conflict. 11. file.o: warning: unannotated intra-function call diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 5cdb19036d7f..92ce4fce7bc7 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -27,6 +27,7 @@ all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include \ + -I$(srctree)/tools/objtool/include \ -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS) @@ -46,10 +47,6 @@ ifeq ($(SRCARCH),x86) SUBCMD_ORC := y endif -ifeq ($(SUBCMD_ORC),y) - CFLAGS += -DINSN_USE_ORC -endif - export SUBCMD_CHECK SUBCMD_ORC export srctree OUTPUT CFLAGS SRCARCH AWK include $(srctree)/tools/build/Makefile.include diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cde9c36e40ae..549813cff8ab 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -11,11 +11,11 @@ #include "../../../arch/x86/lib/inat.c" #include "../../../arch/x86/lib/insn.c" -#include "../../check.h" -#include "../../elf.h" -#include "../../arch.h" -#include "../../warn.h" #include <asm/orc_types.h> +#include <objtool/check.h> +#include <objtool/elf.h> +#include <objtool/arch.h> +#include <objtool/warn.h> static unsigned char op_to_cfi_reg[][2] = { {CFI_AX, CFI_R8}, @@ -222,15 +222,38 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x89: - if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) { + if (rex_w && !rex_r && modrm_reg == 4) { - /* mov %rsp, reg */ - ADD_OP(op) { - op->src.type = OP_SRC_REG; - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + if (modrm_mod == 3) { + /* mov %rsp, reg */ + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + } + break; + + } else { + /* skip nontrivial SIB */ + if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x)) + break; + + /* skip RIP relative displacement */ + if (modrm_rm == 5 && modrm_mod == 0) + break; + + /* mov %rsp, disp(%reg) */ + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->dest.offset = insn.displacement.value; + } + break; } + break; } @@ -259,8 +282,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->dest.reg = CFI_BP; op->dest.offset = insn.displacement.value; } + break; + } - } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { + if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { /* mov reg, disp(%rsp) */ ADD_OP(op) { @@ -270,6 +295,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->dest.reg = CFI_SP; op->dest.offset = insn.displacement.value; } + break; } break; @@ -563,8 +589,8 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state) state->cfa.offset = 8; /* initial RA (return address) */ - state->regs[16].base = CFI_CFA; - state->regs[16].offset = -8; + state->regs[CFI_RA].base = CFI_CFA; + state->regs[CFI_RA].offset = -8; } const char *arch_nop_insn(int len) diff --git a/tools/objtool/arch/x86/include/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h index 79bc517efba8..79bc517efba8 100644 --- a/tools/objtool/arch/x86/include/cfi_regs.h +++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h diff --git a/tools/objtool/arch/x86/include/arch_elf.h b/tools/objtool/arch/x86/include/arch/elf.h index 69cc4264b28a..69cc4264b28a 100644 --- a/tools/objtool/arch/x86/include/arch_elf.h +++ b/tools/objtool/arch/x86/include/arch/elf.h diff --git a/tools/objtool/arch/x86/include/arch/endianness.h b/tools/objtool/arch/x86/include/arch/endianness.h new file mode 100644 index 000000000000..7c362527da20 --- /dev/null +++ b/tools/objtool/arch/x86/include/arch/endianness.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ARCH_ENDIANNESS_H +#define _ARCH_ENDIANNESS_H + +#include <endian.h> + +#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN + +#endif /* _ARCH_ENDIANNESS_H */ diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch/special.h index d818b2bffa02..d818b2bffa02 100644 --- a/tools/objtool/arch/x86/include/arch_special.h +++ b/tools/objtool/arch/x86/include/arch/special.h diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index fd4af88c0ea5..e707d9bcd161 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <string.h> -#include "../../special.h" -#include "../../builtin.h" +#include <objtool/special.h> +#include <objtool/builtin.h> #define X86_FEATURE_POPCNT (4 * 32 + 23) #define X86_FEATURE_SMAP (9 * 32 + 20) @@ -48,7 +48,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, * replacement group. */ return insn->offset == special_alt->new_off && - (insn->type == INSN_CALL || is_static_jump(insn)); + (insn->type == INSN_CALL || is_jump(insn)); } /* diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index b84cdc72b51f..c3a85d8f6c5c 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -15,8 +15,8 @@ #include <subcmd/parse-options.h> #include <string.h> -#include "builtin.h" -#include "objtool.h" +#include <objtool/builtin.h> +#include <objtool/objtool.h> bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 7b31121fa60b..8273bbf7cebb 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -13,8 +13,8 @@ */ #include <string.h> -#include "builtin.h" -#include "objtool.h" +#include <objtool/builtin.h> +#include <objtool/objtool.h> static const char *orc_usage[] = { "objtool orc generate [<options>] file.o", @@ -51,11 +51,7 @@ int cmd_orc(int argc, const char **argv) if (list_empty(&file->insn_list)) return 0; - ret = create_orc(file); - if (ret) - return ret; - - ret = create_orc_sections(file); + ret = orc_create(file); if (ret) return ret; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 85993e606782..080a3d6cbd75 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -6,21 +6,20 @@ #include <string.h> #include <stdlib.h> -#include "builtin.h" -#include "cfi.h" -#include "arch.h" -#include "check.h" -#include "special.h" -#include "warn.h" -#include "arch_elf.h" +#include <arch/elf.h> +#include <objtool/builtin.h> +#include <objtool/cfi.h> +#include <objtool/arch.h> +#include <objtool/check.h> +#include <objtool/special.h> +#include <objtool/warn.h> +#include <objtool/endianness.h> #include <linux/objtool.h> #include <linux/hashtable.h> #include <linux/kernel.h> #include <linux/static_call_types.h> -#define FAKE_JUMP_OFFSET -1 - struct alternative { struct list_head list; struct instruction *insn; @@ -111,15 +110,20 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, static bool is_sibling_call(struct instruction *insn) { + /* + * Assume only ELF functions can make sibling calls. This ensures + * sibling call detection consistency between vmlinux.o and individual + * objects. + */ + if (!insn->func) + return false; + /* An indirect jump is either a sibling call or a jump to a table. */ if (insn->type == INSN_JUMP_DYNAMIC) return list_empty(&insn->alts); - if (!is_static_jump(insn)) - return false; - /* add_jump_destinations() sets insn->call_dest for sibling calls. */ - return !!insn->call_dest; + return (is_static_jump(insn) && insn->call_dest); } /* @@ -156,6 +160,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "machine_real_restart", "rewind_stack_do_exit", "kunit_try_catch_throw", + "xen_start_kernel", }; if (!func) @@ -502,8 +507,21 @@ static int create_static_call_sections(struct objtool_file *file) key_sym = find_symbol_by_name(file->elf, tmp); if (!key_sym) { - WARN("static_call: can't find static_call_key symbol: %s", tmp); - return -1; + if (!module) { + WARN("static_call: can't find static_call_key symbol: %s", tmp); + return -1; + } + + /* + * For modules(), the key might not be exported, which + * means the module can make static calls but isn't + * allowed to change them. + * + * In that case we temporarily set the key to be the + * trampoline address. This is fixed up in + * static_call_add_module(). + */ + key_sym = insn->call_dest; } free(key_name); @@ -846,22 +864,16 @@ static int add_jump_destinations(struct objtool_file *file) if (!is_static_jump(insn)) continue; - if (insn->offset == FAKE_JUMP_OFFSET) - continue; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + insn->offset, insn->len); if (!reloc) { dest_sec = insn->sec; dest_off = arch_jump_destination(insn); } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); - } else if (reloc->sym->sec->idx) { - dest_sec = reloc->sym->sec; - dest_off = reloc->sym->sym.st_value + - arch_dest_reloc_offset(reloc->addend); - } else if (strstr(reloc->sym->name, "_indirect_thunk_")) { + } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) || + !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) { /* * Retpoline jumps are really dynamic jumps in * disguise, so convert them accordingly. @@ -873,14 +885,21 @@ static int add_jump_destinations(struct objtool_file *file) insn->retpoline_safe = true; continue; - } else { - /* external sibling call */ + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ insn->call_dest = reloc->sym; if (insn->call_dest->static_call_tramp) { list_add_tail(&insn->static_call_node, &file->static_call_list); } continue; + } else if (reloc->sym->sec->idx) { + dest_sec = reloc->sym->sec; + dest_off = reloc->sym->sym.st_value + + arch_dest_reloc_offset(reloc->addend); + } else { + /* non-func asm code jumping to another file */ + continue; } insn->jump_dest = find_insn(file, dest_sec, dest_off); @@ -921,15 +940,15 @@ static int add_jump_destinations(struct objtool_file *file) * case where the parent function's only reference to a * subfunction is through a jump table. */ - if (!strstr(insn->func->name, ".cold.") && - strstr(insn->jump_dest->func->name, ".cold.")) { + if (!strstr(insn->func->name, ".cold") && + strstr(insn->jump_dest->func->name, ".cold")) { insn->func->cfunc = insn->jump_dest->func; insn->jump_dest->func->pfunc = insn->func; } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && insn->jump_dest->offset == insn->jump_dest->func->offset) { - /* internal sibling call */ + /* internal sibling call (without reloc) */ insn->call_dest = insn->jump_dest->func; if (insn->call_dest->static_call_tramp) { list_add_tail(&insn->static_call_node, @@ -1058,73 +1077,83 @@ static int add_call_destinations(struct objtool_file *file) } /* - * The .alternatives section requires some extra special care, over and above - * what other special sections require: - * - * 1. Because alternatives are patched in-place, we need to insert a fake jump - * instruction at the end so that validate_branch() skips all the original - * replaced instructions when validating the new instruction path. - * - * 2. An added wrinkle is that the new instruction length might be zero. In - * that case the old instructions are replaced with noops. We simulate that - * by creating a fake jump as the only new instruction. - * - * 3. In some cases, the alternative section includes an instruction which - * conditionally jumps to the _end_ of the entry. We have to modify these - * jumps' destinations to point back to .text rather than the end of the - * entry in .altinstr_replacement. + * The .alternatives section requires some extra special care over and above + * other special sections because alternatives are patched in place. */ static int handle_group_alt(struct objtool_file *file, struct special_alt *special_alt, struct instruction *orig_insn, struct instruction **new_insn) { - static unsigned int alt_group_next_index = 1; - struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; - unsigned int alt_group = alt_group_next_index++; + struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL; + struct alt_group *orig_alt_group, *new_alt_group; unsigned long dest_off; + + orig_alt_group = malloc(sizeof(*orig_alt_group)); + if (!orig_alt_group) { + WARN("malloc failed"); + return -1; + } + orig_alt_group->cfi = calloc(special_alt->orig_len, + sizeof(struct cfi_state *)); + if (!orig_alt_group->cfi) { + WARN("calloc failed"); + return -1; + } + last_orig_insn = NULL; insn = orig_insn; sec_for_each_insn_from(file, insn) { if (insn->offset >= special_alt->orig_off + special_alt->orig_len) break; - insn->alt_group = alt_group; + insn->alt_group = orig_alt_group; last_orig_insn = insn; } + orig_alt_group->orig_group = NULL; + orig_alt_group->first_insn = orig_insn; + orig_alt_group->last_insn = last_orig_insn; + + + new_alt_group = malloc(sizeof(*new_alt_group)); + if (!new_alt_group) { + WARN("malloc failed"); + return -1; + } - if (next_insn_same_sec(file, last_orig_insn)) { - fake_jump = malloc(sizeof(*fake_jump)); - if (!fake_jump) { + if (special_alt->new_len < special_alt->orig_len) { + /* + * Insert a fake nop at the end to make the replacement + * alt_group the same size as the original. This is needed to + * allow propagate_alt_cfi() to do its magic. When the last + * instruction affects the stack, the instruction after it (the + * nop) will propagate the new state to the shared CFI array. + */ + nop = malloc(sizeof(*nop)); + if (!nop) { WARN("malloc failed"); return -1; } - memset(fake_jump, 0, sizeof(*fake_jump)); - INIT_LIST_HEAD(&fake_jump->alts); - INIT_LIST_HEAD(&fake_jump->stack_ops); - init_cfi_state(&fake_jump->cfi); + memset(nop, 0, sizeof(*nop)); + INIT_LIST_HEAD(&nop->alts); + INIT_LIST_HEAD(&nop->stack_ops); + init_cfi_state(&nop->cfi); - fake_jump->sec = special_alt->new_sec; - fake_jump->offset = FAKE_JUMP_OFFSET; - fake_jump->type = INSN_JUMP_UNCONDITIONAL; - fake_jump->jump_dest = list_next_entry(last_orig_insn, list); - fake_jump->func = orig_insn->func; + nop->sec = special_alt->new_sec; + nop->offset = special_alt->new_off + special_alt->new_len; + nop->len = special_alt->orig_len - special_alt->new_len; + nop->type = INSN_NOP; + nop->func = orig_insn->func; + nop->alt_group = new_alt_group; + nop->ignore = orig_insn->ignore_alts; } if (!special_alt->new_len) { - if (!fake_jump) { - WARN("%s: empty alternative at end of section", - special_alt->orig_sec->name); - return -1; - } - - *new_insn = fake_jump; - return 0; + *new_insn = nop; + goto end; } - last_new_insn = NULL; - alt_group = alt_group_next_index++; insn = *new_insn; sec_for_each_insn_from(file, insn) { struct reloc *alt_reloc; @@ -1136,7 +1165,7 @@ static int handle_group_alt(struct objtool_file *file, insn->ignore = orig_insn->ignore_alts; insn->func = orig_insn->func; - insn->alt_group = alt_group; + insn->alt_group = new_alt_group; /* * Since alternative replacement code is copy/pasted by the @@ -1163,14 +1192,8 @@ static int handle_group_alt(struct objtool_file *file, continue; dest_off = arch_jump_destination(insn); - if (dest_off == special_alt->new_off + special_alt->new_len) { - if (!fake_jump) { - WARN("%s: alternative jump to end of section", - special_alt->orig_sec->name); - return -1; - } - insn->jump_dest = fake_jump; - } + if (dest_off == special_alt->new_off + special_alt->new_len) + insn->jump_dest = next_insn_same_sec(file, last_orig_insn); if (!insn->jump_dest) { WARN_FUNC("can't find alternative jump destination", @@ -1185,9 +1208,13 @@ static int handle_group_alt(struct objtool_file *file, return -1; } - if (fake_jump) - list_add(&fake_jump->list, &last_new_insn->list); - + if (nop) + list_add(&nop->list, &last_new_insn->list); +end: + new_alt_group->orig_group = orig_alt_group; + new_alt_group->first_insn = *new_insn; + new_alt_group->last_insn = nop ? : last_new_insn; + new_alt_group->cfi = orig_alt_group->cfi; return 0; } @@ -1479,13 +1506,20 @@ static int add_jump_table_alts(struct objtool_file *file) return 0; } +static void set_func_state(struct cfi_state *state) +{ + state->cfa = initial_func_cfi.cfa; + memcpy(&state->regs, &initial_func_cfi.regs, + CFI_NUM_REGS * sizeof(struct cfi_reg)); + state->stack_size = initial_func_cfi.cfa.offset; +} + static int read_unwind_hints(struct objtool_file *file) { struct section *sec, *relocsec; struct reloc *reloc; struct unwind_hint *hint; struct instruction *insn; - struct cfi_reg *cfa; int i; sec = find_section_by_name(file->elf, ".discard.unwind_hints"); @@ -1520,22 +1554,20 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - cfa = &insn->cfi.cfa; + insn->hint = true; - if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) { - insn->ret_offset = hint->sp_offset; + if (hint->type == UNWIND_HINT_TYPE_FUNC) { + set_func_state(&insn->cfi); continue; } - insn->hint = true; - if (arch_decode_hint_reg(insn, hint->sp_reg)) { WARN_FUNC("unsupported unwind_hint sp base reg %d", insn->sec, insn->offset, hint->sp_reg); return -1; } - cfa->offset = hint->sp_offset; + insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset); insn->cfi.type = hint->type; insn->cfi.end = hint->end; } @@ -1791,27 +1823,18 @@ static bool is_fentry_call(struct instruction *insn) static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state) { - u8 ret_offset = insn->ret_offset; struct cfi_state *cfi = &state->cfi; int i; if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap) return true; - if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset) + if (cfi->cfa.offset != initial_func_cfi.cfa.offset) return true; - if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset) + if (cfi->stack_size != initial_func_cfi.cfa.offset) return true; - /* - * If there is a ret offset hint then don't check registers - * because a callee-saved register might have been pushed on - * the stack. - */ - if (ret_offset) - return false; - for (i = 0; i < CFI_NUM_REGS; i++) { if (cfi->regs[i].base != initial_func_cfi.regs[i].base || cfi->regs[i].offset != initial_func_cfi.regs[i].offset) @@ -1821,12 +1844,20 @@ static bool has_modified_stack_frame(struct instruction *insn, struct insn_state return false; } +static bool check_reg_frame_pos(const struct cfi_reg *reg, + int expected_offset) +{ + return reg->base == CFI_CFA && + reg->offset == expected_offset; +} + static bool has_valid_stack_frame(struct insn_state *state) { struct cfi_state *cfi = &state->cfi; - if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA && - cfi->regs[CFI_BP].offset == -16) + if (cfi->cfa.base == CFI_BP && + check_reg_frame_pos(&cfi->regs[CFI_BP], -cfi->cfa.offset) && + check_reg_frame_pos(&cfi->regs[CFI_RA], -cfi->cfa.offset + 8)) return true; if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP) @@ -1955,8 +1986,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, case OP_SRC_REG: if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP && cfa->base == CFI_SP && - regs[CFI_BP].base == CFI_CFA && - regs[CFI_BP].offset == -cfa->offset) { + check_reg_frame_pos(®s[CFI_BP], -cfa->offset)) { /* mov %rsp, %rbp */ cfa->base = op->dest.reg; @@ -2016,6 +2046,38 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, cfa->offset = -cfi->vals[op->src.reg].offset; cfi->stack_size = cfa->offset; + } else if (cfa->base == CFI_SP && + cfi->vals[op->src.reg].base == CFI_SP_INDIRECT && + cfi->vals[op->src.reg].offset == cfa->offset) { + + /* + * Stack swizzle: + * + * 1: mov %rsp, (%[tos]) + * 2: mov %[tos], %rsp + * ... + * 3: pop %rsp + * + * Where: + * + * 1 - places a pointer to the previous + * stack at the Top-of-Stack of the + * new stack. + * + * 2 - switches to the new stack. + * + * 3 - pops the Top-of-Stack to restore + * the original stack. + * + * Note: we set base to SP_INDIRECT + * here and preserve offset. Therefore + * when the unwinder reaches ToS it + * will dereference SP and then add the + * offset to find the next frame, IOW: + * (%rsp) + offset. + */ + cfa->base = CFI_SP_INDIRECT; + } else { cfa->base = CFI_UNDEFINED; cfa->offset = 0; @@ -2041,6 +2103,17 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, break; } + if (!cfi->drap && op->src.reg == CFI_SP && + op->dest.reg == CFI_BP && cfa->base == CFI_SP && + check_reg_frame_pos(®s[CFI_BP], -cfa->offset + op->src.offset)) { + + /* lea disp(%rsp), %rbp */ + cfa->base = CFI_BP; + cfa->offset -= op->src.offset; + cfi->bp_scratch = false; + break; + } + if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { /* drap: lea disp(%rsp), %drap */ @@ -2107,6 +2180,13 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, case OP_SRC_POP: case OP_SRC_POPF: + if (op->dest.reg == CFI_SP && cfa->base == CFI_SP_INDIRECT) { + + /* pop %rsp; # restore from a stack swizzle */ + cfa->base = CFI_SP; + break; + } + if (!cfi->drap && op->dest.reg == cfa->base) { /* pop %rbp */ @@ -2135,6 +2215,14 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, break; case OP_SRC_REG_INDIRECT: + if (!cfi->drap && op->dest.reg == cfa->base && + op->dest.reg == CFI_BP) { + + /* mov disp(%rsp), %rbp */ + cfa->base = CFI_SP; + cfa->offset = cfi->stack_size; + } + if (cfi->drap && op->src.reg == CFI_BP && op->src.offset == cfi->drap_offset) { @@ -2156,6 +2244,12 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, /* mov disp(%rbp), %reg */ /* mov disp(%rsp), %reg */ restore_reg(cfi, op->dest.reg); + + } else if (op->src.reg == CFI_SP && + op->src.offset == regs[op->dest.reg].offset + cfi->stack_size) { + + /* mov disp(%rsp), %reg */ + restore_reg(cfi, op->dest.reg); } break; @@ -2233,6 +2327,18 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, /* mov reg, disp(%rsp) */ save_reg(cfi, op->src.reg, CFI_CFA, op->dest.offset - cfi->cfa.offset); + + } else if (op->dest.reg == CFI_SP) { + + /* mov reg, disp(%rsp) */ + save_reg(cfi, op->src.reg, CFI_CFA, + op->dest.offset - cfi->stack_size); + + } else if (op->src.reg == CFI_SP && op->dest.offset == 0) { + + /* mov %rsp, (%reg); # setup a stack swizzle. */ + cfi->vals[op->dest.reg].base = CFI_SP_INDIRECT; + cfi->vals[op->dest.reg].offset = cfa->offset; } break; @@ -2280,22 +2386,47 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, return 0; } -static int handle_insn_ops(struct instruction *insn, struct insn_state *state) +/* + * The stack layouts of alternatives instructions can sometimes diverge when + * they have stack modifications. That's fine as long as the potential stack + * layouts don't conflict at any given potential instruction boundary. + * + * Flatten the CFIs of the different alternative code streams (both original + * and replacement) into a single shared CFI array which can be used to detect + * conflicts and nicely feed a linear array of ORC entries to the unwinder. + */ +static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn) { - struct stack_op *op; + struct cfi_state **alt_cfi; + int group_off; - list_for_each_entry(op, &insn->stack_ops, list) { - struct cfi_state old_cfi = state->cfi; - int res; + if (!insn->alt_group) + return 0; - res = update_cfi_state(insn, &state->cfi, op); - if (res) - return res; + alt_cfi = insn->alt_group->cfi; + group_off = insn->offset - insn->alt_group->first_insn->offset; - if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) { - WARN_FUNC("alternative modifies stack", insn->sec, insn->offset); + if (!alt_cfi[group_off]) { + alt_cfi[group_off] = &insn->cfi; + } else { + if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) { + WARN_FUNC("stack layout conflict in alternatives", + insn->sec, insn->offset); return -1; } + } + + return 0; +} + +static int handle_insn_ops(struct instruction *insn, struct insn_state *state) +{ + struct stack_op *op; + + list_for_each_entry(op, &insn->stack_ops, list) { + + if (update_cfi_state(insn, &state->cfi, op)) + return 1; if (op->dest.type == OP_DEST_PUSHF) { if (!state->uaccess_stack) { @@ -2485,28 +2616,20 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct return 0; } -/* - * Alternatives should not contain any ORC entries, this in turn means they - * should not contain any CFI ops, which implies all instructions should have - * the same same CFI state. - * - * It is possible to constuct alternatives that have unreachable holes that go - * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED - * states which then results in ORC entries, which we just said we didn't want. - * - * Avoid them by copying the CFI entry of the first instruction into the whole - * alternative. - */ -static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn) +static struct instruction *next_insn_to_validate(struct objtool_file *file, + struct instruction *insn) { - struct instruction *first_insn = insn; - int alt_group = insn->alt_group; + struct alt_group *alt_group = insn->alt_group; - sec_for_each_insn_continue(file, insn) { - if (insn->alt_group != alt_group) - break; - insn->cfi = first_insn->cfi; - } + /* + * Simulate the fact that alternatives are patched in-place. When the + * end of a replacement alt_group is reached, redirect objtool flow to + * the end of the original alt_group. + */ + if (alt_group && insn == alt_group->last_insn && alt_group->orig_group) + return next_insn_same_sec(file, alt_group->orig_group->last_insn); + + return next_insn_same_sec(file, insn); } /* @@ -2527,7 +2650,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, sec = insn->sec; while (1) { - next_insn = next_insn_same_sec(file, insn); + next_insn = next_insn_to_validate(file, insn); if (file->c_file && func && insn->func && func != insn->func->pfunc) { WARN("%s() falls through to next function %s()", @@ -2560,6 +2683,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, insn->visited |= visited; + if (propagate_alt_cfi(file, insn)) + return 1; + if (!insn->ignore_alts && !list_empty(&insn->alts)) { bool skip_orig = false; @@ -2575,9 +2701,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } } - if (insn->alt_group) - fill_alternative_cfi(file, insn); - if (skip_orig) return 0; } @@ -2615,7 +2738,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_JUMP_CONDITIONAL: case INSN_JUMP_UNCONDITIONAL: - if (func && is_sibling_call(insn)) { + if (is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; @@ -2637,7 +2760,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_JUMP_DYNAMIC: case INSN_JUMP_DYNAMIC_CONDITIONAL: - if (func && is_sibling_call(insn)) { + if (is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; @@ -2680,15 +2803,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_STD: - if (state.df) + if (state.df) { WARN_FUNC("recursive STD", sec, insn->offset); + return 1; + } state.df = true; break; case INSN_CLD: - if (!state.df && func) + if (!state.df && func) { WARN_FUNC("redundant CLD", sec, insn->offset); + return 1; + } state.df = false; break; @@ -2811,9 +2938,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio !strcmp(insn->sec->name, ".altinstr_aux")) return true; - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET) - return true; - if (!insn->func) return false; @@ -2899,10 +3023,7 @@ static int validate_section(struct objtool_file *file, struct section *sec) continue; init_insn_state(&state, sec); - state.cfi.cfa = initial_func_cfi.cfa; - memcpy(&state.cfi.regs, &initial_func_cfi.regs, - CFI_NUM_REGS * sizeof(struct cfi_reg)); - state.cfi.stack_size = initial_func_cfi.cfa.offset; + set_func_state(&state.cfi); warnings += validate_symbol(file, sec, func, &state); } @@ -3023,14 +3144,10 @@ int check(struct objtool_file *file) } out: - if (ret < 0) { - /* - * Fatal error. The binary is corrupt or otherwise broken in - * some way, or objtool itself is broken. Fail the kernel - * build. - */ - return ret; - } - + /* + * For now, don't fail the kernel build on fatal warnings. These + * errors are still fairly common due to the growing matrix of + * supported toolchains and their recent pace of change. + */ return 0; } diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index be89c741ba9a..93fa833a49a5 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -15,10 +15,10 @@ #include <string.h> #include <unistd.h> #include <errno.h> -#include "builtin.h" +#include <objtool/builtin.h> -#include "elf.h" -#include "warn.h" +#include <objtool/elf.h> +#include <objtool/warn.h> #define MAX_NAME_LEN 128 @@ -43,75 +43,24 @@ static void elf_hash_init(struct hlist_head *table) #define elf_hash_for_each_possible(name, obj, member, key) \ hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member) -static void rb_add(struct rb_root *tree, struct rb_node *node, - int (*cmp)(struct rb_node *, const struct rb_node *)) -{ - struct rb_node **link = &tree->rb_node; - struct rb_node *parent = NULL; - - while (*link) { - parent = *link; - if (cmp(node, parent) < 0) - link = &parent->rb_left; - else - link = &parent->rb_right; - } - - rb_link_node(node, parent, link); - rb_insert_color(node, tree); -} - -static struct rb_node *rb_find_first(const struct rb_root *tree, const void *key, - int (*cmp)(const void *key, const struct rb_node *)) -{ - struct rb_node *node = tree->rb_node; - struct rb_node *match = NULL; - - while (node) { - int c = cmp(key, node); - if (c <= 0) { - if (!c) - match = node; - node = node->rb_left; - } else if (c > 0) { - node = node->rb_right; - } - } - - return match; -} - -static struct rb_node *rb_next_match(struct rb_node *node, const void *key, - int (*cmp)(const void *key, const struct rb_node *)) -{ - node = rb_next(node); - if (node && cmp(key, node)) - node = NULL; - return node; -} - -#define rb_for_each(tree, node, key, cmp) \ - for ((node) = rb_find_first((tree), (key), (cmp)); \ - (node); (node) = rb_next_match((node), (key), (cmp))) - -static int symbol_to_offset(struct rb_node *a, const struct rb_node *b) +static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b) { struct symbol *sa = rb_entry(a, struct symbol, node); struct symbol *sb = rb_entry(b, struct symbol, node); if (sa->offset < sb->offset) - return -1; + return true; if (sa->offset > sb->offset) - return 1; + return false; if (sa->len < sb->len) - return -1; + return true; if (sa->len > sb->len) - return 1; + return false; sa->alias = sb; - return 0; + return false; } static int symbol_by_offset(const void *key, const struct rb_node *node) @@ -165,7 +114,7 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) { struct rb_node *node; - rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) { + rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { struct symbol *s = rb_entry(node, struct symbol, node); if (s->offset == offset && s->type != STT_SECTION) @@ -179,7 +128,7 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset) { struct rb_node *node; - rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) { + rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { struct symbol *s = rb_entry(node, struct symbol, node); if (s->offset == offset && s->type == STT_FUNC) @@ -193,7 +142,7 @@ struct symbol *find_symbol_containing(const struct section *sec, unsigned long o { struct rb_node *node; - rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) { + rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { struct symbol *s = rb_entry(node, struct symbol, node); if (s->type != STT_SECTION) @@ -207,7 +156,7 @@ struct symbol *find_func_containing(struct section *sec, unsigned long offset) { struct rb_node *node; - rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) { + rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { struct symbol *s = rb_entry(node, struct symbol, node); if (s->type == STT_FUNC) @@ -380,8 +329,11 @@ static int read_symbols(struct elf *elf) symtab = find_section_by_name(elf, ".symtab"); if (!symtab) { - WARN("missing symbol table"); - return -1; + /* + * A missing symbol table is actually possible if it's an empty + * .o file. This can happen for thunk_64.o. + */ + return 0; } symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); @@ -439,7 +391,7 @@ static int read_symbols(struct elf *elf) sym->offset = sym->sym.st_value; sym->len = sym->sym.st_size; - rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset); + rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset); pnode = rb_prev(&sym->node); if (pnode) entry = &rb_entry(pnode, struct symbol, node)->list; @@ -448,6 +400,13 @@ static int read_symbols(struct elf *elf) list_add(&sym->list, entry); elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); + + /* + * Don't store empty STT_NOTYPE symbols in the rbtree. They + * can exist within a function, confusing the sorting. + */ + if (!sym->len) + rb_erase(&sym->node, &sym->sec->symbol_tree); } if (stats) @@ -855,25 +814,27 @@ static int elf_rebuild_rel_reloc_section(struct section *sec, int nr) { struct reloc *reloc; int idx = 0, size; - GElf_Rel *relocs; + void *buf; /* Allocate a buffer for relocations */ - size = nr * sizeof(*relocs); - relocs = malloc(size); - if (!relocs) { + size = nr * sizeof(GElf_Rel); + buf = malloc(size); + if (!buf) { perror("malloc"); return -1; } - sec->data->d_buf = relocs; + sec->data->d_buf = buf; sec->data->d_size = size; + sec->data->d_type = ELF_T_REL; sec->sh.sh_size = size; idx = 0; list_for_each_entry(reloc, &sec->reloc_list, list) { - relocs[idx].r_offset = reloc->offset; - relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); + reloc->rel.r_offset = reloc->offset; + reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); + gelf_update_rel(sec->data, idx, &reloc->rel); idx++; } @@ -884,26 +845,28 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) { struct reloc *reloc; int idx = 0, size; - GElf_Rela *relocs; + void *buf; /* Allocate a buffer for relocations with addends */ - size = nr * sizeof(*relocs); - relocs = malloc(size); - if (!relocs) { + size = nr * sizeof(GElf_Rela); + buf = malloc(size); + if (!buf) { perror("malloc"); return -1; } - sec->data->d_buf = relocs; + sec->data->d_buf = buf; sec->data->d_size = size; + sec->data->d_type = ELF_T_RELA; sec->sh.sh_size = size; idx = 0; list_for_each_entry(reloc, &sec->reloc_list, list) { - relocs[idx].r_offset = reloc->offset; - relocs[idx].r_addend = reloc->addend; - relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); + reloc->rela.r_offset = reloc->offset; + reloc->rela.r_addend = reloc->addend; + reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); + gelf_update_rela(sec->data, idx, &reloc->rela); idx++; } diff --git a/tools/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 4a84c3081b8e..6ff0685f5cc5 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -8,12 +8,8 @@ #include <stdbool.h> #include <linux/list.h> -#include "objtool.h" -#include "cfi.h" - -#ifdef INSN_USE_ORC -#include <asm/orc_types.h> -#endif +#include <objtool/objtool.h> +#include <objtool/cfi.h> enum insn_type { INSN_JUMP_CONDITIONAL, diff --git a/tools/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 2502bb27de17..2502bb27de17 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h diff --git a/tools/objtool/cfi.h b/tools/objtool/include/objtool/cfi.h index c7c59c6a44ee..fd5cb0bed9bf 100644 --- a/tools/objtool/cfi.h +++ b/tools/objtool/include/objtool/cfi.h @@ -6,7 +6,7 @@ #ifndef _OBJTOOL_CFI_H #define _OBJTOOL_CFI_H -#include "cfi_regs.h" +#include <arch/cfi_regs.h> #define CFI_UNDEFINED -1 #define CFI_CFA -2 diff --git a/tools/objtool/check.h b/tools/objtool/include/objtool/check.h index 070baec2050a..f5be798107bc 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -7,8 +7,8 @@ #define _CHECK_H #include <stdbool.h> -#include "cfi.h" -#include "arch.h" +#include <objtool/cfi.h> +#include <objtool/arch.h> struct insn_state { struct cfi_state cfi; @@ -19,6 +19,23 @@ struct insn_state { s8 instr; }; +struct alt_group { + /* + * Pointer from a replacement group to the original group. NULL if it + * *is* the original group. + */ + struct alt_group *orig_group; + + /* First and last instructions in the group */ + struct instruction *first_insn, *last_insn; + + /* + * Byte-offset-addressed len-sized array of pointers to CFI structs. + * This is shared with the other alt_groups in the same alternative. + */ + struct cfi_state **cfi; +}; + struct instruction { struct list_head list; struct hlist_node hash; @@ -34,8 +51,7 @@ struct instruction { bool retpoline_safe; s8 instr; u8 visited; - u8 ret_offset; - int alt_group; + struct alt_group *alt_group; struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; @@ -44,9 +60,6 @@ struct instruction { struct symbol *func; struct list_head stack_ops; struct cfi_state cfi; -#ifdef INSN_USE_ORC - struct orc_entry orc; -#endif }; static inline bool is_static_jump(struct instruction *insn) @@ -55,6 +68,17 @@ static inline bool is_static_jump(struct instruction *insn) insn->type == INSN_JUMP_UNCONDITIONAL; } +static inline bool is_dynamic_jump(struct instruction *insn) +{ + return insn->type == INSN_JUMP_DYNAMIC || + insn->type == INSN_JUMP_DYNAMIC_CONDITIONAL; +} + +static inline bool is_jump(struct instruction *insn) +{ + return is_static_jump(insn) || is_dynamic_jump(insn); +} + struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); diff --git a/tools/objtool/elf.h b/tools/objtool/include/objtool/elf.h index e6890cc70a25..e6890cc70a25 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h new file mode 100644 index 000000000000..10241341eff3 --- /dev/null +++ b/tools/objtool/include/objtool/endianness.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _OBJTOOL_ENDIANNESS_H +#define _OBJTOOL_ENDIANNESS_H + +#include <arch/endianness.h> +#include <linux/kernel.h> +#include <endian.h> + +#ifndef __TARGET_BYTE_ORDER +#error undefined arch __TARGET_BYTE_ORDER +#endif + +#if __BYTE_ORDER != __TARGET_BYTE_ORDER +#define __NEED_BSWAP 1 +#else +#define __NEED_BSWAP 0 +#endif + +/* + * Does a byte swap if target endianness doesn't match the host, i.e. cross + * compilation for little endian on big endian and vice versa. + * To be used for multi-byte values conversion, which are read from / about + * to be written to a target native endianness ELF file. + */ +#define bswap_if_needed(val) \ +({ \ + __typeof__(val) __ret; \ + switch (sizeof(val)) { \ + case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break; \ + case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break; \ + case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break; \ + default: \ + BUILD_BUG(); break; \ + } \ + __ret; \ +}) + +#endif /* _OBJTOOL_ENDIANNESS_H */ diff --git a/tools/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index cf004dd60c2b..e68e37476c15 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -10,7 +10,7 @@ #include <linux/list.h> #include <linux/hashtable.h> -#include "elf.h" +#include <objtool/elf.h> #define __weak __attribute__((weak)) @@ -27,7 +27,6 @@ struct objtool_file *objtool_open_read(const char *_objname); int check(struct objtool_file *file); int orc_dump(const char *objname); -int create_orc(struct objtool_file *file); -int create_orc_sections(struct objtool_file *file); +int orc_create(struct objtool_file *file); #endif /* _OBJTOOL_H */ diff --git a/tools/objtool/special.h b/tools/objtool/include/objtool/special.h index abddf38ef334..8a09f4e9d480 100644 --- a/tools/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -7,8 +7,8 @@ #define _SPECIAL_H #include <stdbool.h> -#include "check.h" -#include "elf.h" +#include <objtool/check.h> +#include <objtool/elf.h> #define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" diff --git a/tools/objtool/warn.h b/tools/objtool/include/objtool/warn.h index 7799f60de80a..d99c4675e4a5 100644 --- a/tools/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h @@ -11,7 +11,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> -#include "elf.h" +#include <objtool/elf.h> extern const char *objname; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index c1819a6f2a18..7b97ce499405 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -21,9 +21,9 @@ #include <subcmd/pager.h> #include <linux/kernel.h> -#include "builtin.h" -#include "objtool.h" -#include "warn.h" +#include <objtool/builtin.h> +#include <objtool/objtool.h> +#include <objtool/warn.h> struct cmd_struct { const char *name; diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index 5e6a95368d35..f5a8508c42d6 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -6,8 +6,9 @@ #include <unistd.h> #include <linux/objtool.h> #include <asm/orc_types.h> -#include "objtool.h" -#include "warn.h" +#include <objtool/objtool.h> +#include <objtool/warn.h> +#include <objtool/endianness.h> static const char *reg_name(unsigned int reg) { @@ -54,7 +55,7 @@ static void print_reg(unsigned int reg, int offset) if (reg == ORC_REG_BP_INDIRECT) printf("(bp%+d)", offset); else if (reg == ORC_REG_SP_INDIRECT) - printf("(sp%+d)", offset); + printf("(sp)%+d", offset); else if (reg == ORC_REG_UNDEFINED) printf("(und)"); else @@ -197,11 +198,11 @@ int orc_dump(const char *_objname) printf(" sp:"); - print_reg(orc[i].sp_reg, orc[i].sp_offset); + print_reg(orc[i].sp_reg, bswap_if_needed(orc[i].sp_offset)); printf(" bp:"); - print_reg(orc[i].bp_reg, orc[i].bp_offset); + print_reg(orc[i].bp_reg, bswap_if_needed(orc[i].bp_offset)); printf(" type:%s end:%d\n", orc_type_name(orc[i].type), orc[i].end); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 9ce68b385a1b..738aa5021bc4 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -9,93 +9,91 @@ #include <linux/objtool.h> #include <asm/orc_types.h> -#include "check.h" -#include "warn.h" +#include <objtool/check.h> +#include <objtool/warn.h> +#include <objtool/endianness.h> -int create_orc(struct objtool_file *file) +static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi) { - struct instruction *insn; + struct instruction *insn = container_of(cfi, struct instruction, cfi); + struct cfi_reg *bp = &cfi->regs[CFI_BP]; - for_each_insn(file, insn) { - struct orc_entry *orc = &insn->orc; - struct cfi_reg *cfa = &insn->cfi.cfa; - struct cfi_reg *bp = &insn->cfi.regs[CFI_BP]; + memset(orc, 0, sizeof(*orc)); - if (!insn->sec->text) - continue; - - orc->end = insn->cfi.end; - - if (cfa->base == CFI_UNDEFINED) { - orc->sp_reg = ORC_REG_UNDEFINED; - continue; - } + orc->end = cfi->end; - switch (cfa->base) { - case CFI_SP: - orc->sp_reg = ORC_REG_SP; - break; - case CFI_SP_INDIRECT: - orc->sp_reg = ORC_REG_SP_INDIRECT; - break; - case CFI_BP: - orc->sp_reg = ORC_REG_BP; - break; - case CFI_BP_INDIRECT: - orc->sp_reg = ORC_REG_BP_INDIRECT; - break; - case CFI_R10: - orc->sp_reg = ORC_REG_R10; - break; - case CFI_R13: - orc->sp_reg = ORC_REG_R13; - break; - case CFI_DI: - orc->sp_reg = ORC_REG_DI; - break; - case CFI_DX: - orc->sp_reg = ORC_REG_DX; - break; - default: - WARN_FUNC("unknown CFA base reg %d", - insn->sec, insn->offset, cfa->base); - return -1; - } + if (cfi->cfa.base == CFI_UNDEFINED) { + orc->sp_reg = ORC_REG_UNDEFINED; + return 0; + } - switch(bp->base) { - case CFI_UNDEFINED: - orc->bp_reg = ORC_REG_UNDEFINED; - break; - case CFI_CFA: - orc->bp_reg = ORC_REG_PREV_SP; - break; - case CFI_BP: - orc->bp_reg = ORC_REG_BP; - break; - default: - WARN_FUNC("unknown BP base reg %d", - insn->sec, insn->offset, bp->base); - return -1; - } + switch (cfi->cfa.base) { + case CFI_SP: + orc->sp_reg = ORC_REG_SP; + break; + case CFI_SP_INDIRECT: + orc->sp_reg = ORC_REG_SP_INDIRECT; + break; + case CFI_BP: + orc->sp_reg = ORC_REG_BP; + break; + case CFI_BP_INDIRECT: + orc->sp_reg = ORC_REG_BP_INDIRECT; + break; + case CFI_R10: + orc->sp_reg = ORC_REG_R10; + break; + case CFI_R13: + orc->sp_reg = ORC_REG_R13; + break; + case CFI_DI: + orc->sp_reg = ORC_REG_DI; + break; + case CFI_DX: + orc->sp_reg = ORC_REG_DX; + break; + default: + WARN_FUNC("unknown CFA base reg %d", + insn->sec, insn->offset, cfi->cfa.base); + return -1; + } - orc->sp_offset = cfa->offset; - orc->bp_offset = bp->offset; - orc->type = insn->cfi.type; + switch (bp->base) { + case CFI_UNDEFINED: + orc->bp_reg = ORC_REG_UNDEFINED; + break; + case CFI_CFA: + orc->bp_reg = ORC_REG_PREV_SP; + break; + case CFI_BP: + orc->bp_reg = ORC_REG_BP; + break; + default: + WARN_FUNC("unknown BP base reg %d", + insn->sec, insn->offset, bp->base); + return -1; } + orc->sp_offset = cfi->cfa.offset; + orc->bp_offset = bp->offset; + orc->type = cfi->type; + return 0; } -static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec, - unsigned int idx, struct section *insn_sec, - unsigned long insn_off, struct orc_entry *o) +static int write_orc_entry(struct elf *elf, struct section *orc_sec, + struct section *ip_rsec, unsigned int idx, + struct section *insn_sec, unsigned long insn_off, + struct orc_entry *o) { struct orc_entry *orc; struct reloc *reloc; /* populate ORC data */ - orc = (struct orc_entry *)u_sec->data->d_buf + idx; + orc = (struct orc_entry *)orc_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); + orc->sp_offset = bswap_if_needed(orc->sp_offset); + orc->bp_offset = bswap_if_needed(orc->bp_offset); /* populate reloc for ip */ reloc = malloc(sizeof(*reloc)); @@ -114,102 +112,149 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti reloc->type = R_X86_64_PC32; reloc->offset = idx * sizeof(int); - reloc->sec = ip_relocsec; + reloc->sec = ip_rsec; elf_add_reloc(elf, reloc); return 0; } -int create_orc_sections(struct objtool_file *file) -{ - struct instruction *insn, *prev_insn; - struct section *sec, *u_sec, *ip_relocsec; - unsigned int idx; +struct orc_list_entry { + struct list_head list; + struct orc_entry orc; + struct section *insn_sec; + unsigned long insn_off; +}; - struct orc_entry empty = { - .sp_reg = ORC_REG_UNDEFINED, - .bp_reg = ORC_REG_UNDEFINED, - .type = UNWIND_HINT_TYPE_CALL, - }; +static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc, + struct section *sec, unsigned long offset) +{ + struct orc_list_entry *entry = malloc(sizeof(*entry)); - sec = find_section_by_name(file->elf, ".orc_unwind"); - if (sec) { - WARN("file already has .orc_unwind section, skipping"); + if (!entry) { + WARN("malloc failed"); return -1; } - /* count the number of needed orcs */ - idx = 0; - for_each_sec(file, sec) { - if (!sec->text) - continue; - - prev_insn = NULL; - sec_for_each_insn(file, sec, insn) { - if (!prev_insn || - memcmp(&insn->orc, &prev_insn->orc, - sizeof(struct orc_entry))) { - idx++; - } - prev_insn = insn; - } - - /* section terminator */ - if (prev_insn) - idx++; - } - if (!idx) - return -1; + entry->orc = *orc; + entry->insn_sec = sec; + entry->insn_off = offset; + list_add_tail(&entry->list, orc_list); + return 0; +} - /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ - sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx); - if (!sec) - return -1; +static unsigned long alt_group_len(struct alt_group *alt_group) +{ + return alt_group->last_insn->offset + + alt_group->last_insn->len - + alt_group->first_insn->offset; +} - ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!ip_relocsec) - return -1; +int orc_create(struct objtool_file *file) +{ + struct section *sec, *ip_rsec, *orc_sec; + unsigned int nr = 0, idx = 0; + struct orc_list_entry *entry; + struct list_head orc_list; - /* create .orc_unwind section */ - u_sec = elf_create_section(file->elf, ".orc_unwind", 0, - sizeof(struct orc_entry), idx); + struct orc_entry null = { + .sp_reg = ORC_REG_UNDEFINED, + .bp_reg = ORC_REG_UNDEFINED, + .type = UNWIND_HINT_TYPE_CALL, + }; - /* populate sections */ - idx = 0; + /* Build a deduplicated list of ORC entries: */ + INIT_LIST_HEAD(&orc_list); for_each_sec(file, sec) { + struct orc_entry orc, prev_orc = {0}; + struct instruction *insn; + bool empty = true; + if (!sec->text) continue; - prev_insn = NULL; sec_for_each_insn(file, sec, insn) { - if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc, - sizeof(struct orc_entry))) { + struct alt_group *alt_group = insn->alt_group; + int i; - if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, - insn->sec, insn->offset, - &insn->orc)) + if (!alt_group) { + if (init_orc_entry(&orc, &insn->cfi)) return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; + if (orc_list_add(&orc_list, &orc, sec, + insn->offset)) + return -1; + nr++; + prev_orc = orc; + empty = false; + continue; + } - idx++; + /* + * Alternatives can have different stack layout + * possibilities (but they shouldn't conflict). + * Instead of traversing the instructions, use the + * alt_group's flattened byte-offset-addressed CFI + * array. + */ + for (i = 0; i < alt_group_len(alt_group); i++) { + struct cfi_state *cfi = alt_group->cfi[i]; + if (!cfi) + continue; + if (init_orc_entry(&orc, cfi)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; + if (orc_list_add(&orc_list, &orc, insn->sec, + insn->offset + i)) + return -1; + nr++; + prev_orc = orc; + empty = false; } - prev_insn = insn; - } - /* section terminator */ - if (prev_insn) { - if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, - prev_insn->sec, - prev_insn->offset + prev_insn->len, - &empty)) - return -1; + /* Skip to the end of the alt_group */ + insn = alt_group->last_insn; + } - idx++; + /* Add a section terminator */ + if (!empty) { + orc_list_add(&orc_list, &null, sec, sec->len); + nr++; } } + if (!nr) + return 0; + + /* Create .orc_unwind, .orc_unwind_ip and .rela.orc_unwind_ip sections: */ + sec = find_section_by_name(file->elf, ".orc_unwind"); + if (sec) { + WARN("file already has .orc_unwind section, skipping"); + return -1; + } + orc_sec = elf_create_section(file->elf, ".orc_unwind", 0, + sizeof(struct orc_entry), nr); + if (!orc_sec) + return -1; + + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr); + if (!sec) + return -1; + ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA); + if (!ip_rsec) + return -1; + + /* Write ORC entries to sections: */ + list_for_each_entry(entry, &orc_list, list) { + if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++, + entry->insn_sec, entry->insn_off, + &entry->orc)) + return -1; + } - if (elf_rebuild_reloc_section(file->elf, ip_relocsec)) + if (elf_rebuild_reloc_section(file->elf, ip_rsec)) return -1; return 0; diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 1a2420febd08..2c7fbda7b055 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -11,10 +11,11 @@ #include <stdlib.h> #include <string.h> -#include "builtin.h" -#include "special.h" -#include "warn.h" -#include "arch_special.h" +#include <arch/special.h> +#include <objtool/builtin.h> +#include <objtool/special.h> +#include <objtool/warn.h> +#include <objtool/endianness.h> struct special_entry { const char *sec; @@ -77,8 +78,9 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, if (entry->feature) { unsigned short feature; - feature = *(unsigned short *)(sec->data->d_buf + offset + - entry->feature); + feature = bswap_if_needed(*(unsigned short *)(sec->data->d_buf + + offset + + entry->feature)); arch_handle_alternative(feature, alt); } diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c index 7843e9a7a72f..8314e824db4a 100644 --- a/tools/objtool/weak.c +++ b/tools/objtool/weak.c @@ -7,7 +7,7 @@ #include <stdbool.h> #include <errno.h> -#include "objtool.h" +#include <objtool/objtool.h> #define UNSUPPORTED(name) \ ({ \ @@ -25,12 +25,7 @@ int __weak orc_dump(const char *_objname) UNSUPPORTED("orc"); } -int __weak create_orc(struct objtool_file *file) -{ - UNSUPPORTED("orc"); -} - -int __weak create_orc_sections(struct objtool_file *file) +int __weak orc_create(struct objtool_file *file) { UNSUPPORTED("orc"); } diff --git a/tools/perf/Build b/tools/perf/Build index 5f392dbb88fc..db61dbe2b543 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -24,6 +24,7 @@ perf-y += builtin-mem.o perf-y += builtin-data.o perf-y += builtin-version.o perf-y += builtin-c2c.o +perf-y += builtin-daemon.o perf-$(CONFIG_TRACE) += builtin-trace.o perf-$(CONFIG_LIBELF) += builtin-probe.o diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt index a4e392156488..c0d22fbe9201 100644 --- a/tools/perf/Documentation/examples.txt +++ b/tools/perf/Documentation/examples.txt @@ -3,7 +3,7 @@ ****** perf by examples ****** ------------------------------ -[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] +[ From an e-mail by Ingo Molnar, https://lore.kernel.org/lkml/20090804195717.GA5998@elte.hu ] First, discovery/enumeration of available counters can be done via diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 079cdfabb352..0f1005209a2b 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -4,7 +4,7 @@ r synthesize branches events (returns only) x synthesize transactions events w synthesize ptwrite events - p synthesize power events + p synthesize power events (incl. PSB events for Intel PT) o synthesize other events recorded due to the use of aux-output (refer to perf record) e synthesize error events diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index f6de0952ff3c..bb167e32a1d7 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -74,6 +74,12 @@ OPTIONS used when creating a uprobe for a process that resides in a different mount namespace from the perf(1) utility. +--debuginfod=URLs:: + Specify debuginfod URL to be used when retrieving perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + buildid-cache.debuginfod=http://192.168.122.174:8002 + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1] diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 5c379adf8304..153bde14bbe0 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -238,6 +238,13 @@ buildid.*:: cache location, or to disable it altogether. If you want to disable it, set buildid.dir to /dev/null. The default is $HOME/.debug +buildid-cache.*:: + buildid-cache.debuginfod=URLs + Specify debuginfod URLs to be used when retrieving perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + buildid-cache.debuginfod=http://192.168.122.174:8002 + annotate.*:: These are in control of addresses, jump function, source code in lines of assembly code from a specific program. @@ -552,11 +559,12 @@ kmem.*:: record.*:: record.build-id:: - This option can be 'cache', 'no-cache' or 'skip'. + This option can be 'cache', 'no-cache', 'skip' or 'mmap'. 'cache' is to post-process data and save/update the binaries into the build-id cache (in ~/.debug). This is the default. But if this option is 'no-cache', it will not update the build-id cache. 'skip' skips post-processing and does not update the cache. + 'mmap' skips post-processing and reads build-ids from MMAP events. record.call-graph:: This is identical to 'call-graph.record-mode', except it is @@ -695,6 +703,20 @@ auxtrace.*:: If the directory does not exist or has the wrong file type, the current directory is used. +daemon.*:: + + daemon.base:: + Base path for daemon data. All sessions data are stored under + this path. + +session-<NAME>.*:: + + session-<NAME>.run:: + + Defines new record session for daemon. The value is record's + command line without the 'record' keyword. + + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-daemon.txt b/tools/perf/Documentation/perf-daemon.txt new file mode 100644 index 000000000000..f558f8e4bc9b --- /dev/null +++ b/tools/perf/Documentation/perf-daemon.txt @@ -0,0 +1,208 @@ +perf-daemon(1) +============== + + +NAME +---- +perf-daemon - Run record sessions on background + + +SYNOPSIS +-------- +[verse] +'perf daemon' +'perf daemon' [<options>] +'perf daemon start' [<options>] +'perf daemon stop' [<options>] +'perf daemon signal' [<options>] +'perf daemon ping' [<options>] + + +DESCRIPTION +----------- +This command allows to run simple daemon process that starts and +monitors configured record sessions. + +You can imagine 'perf daemon' of background process with several +'perf record' child tasks, like: + + # ps axjf + ... + 1 916507 ... perf daemon start + 916507 916508 ... \_ perf record --control=fifo:control,ack -m 10M -e cycles --overwrite --switch-output -a + 916507 916509 ... \_ perf record --control=fifo:control,ack -m 20M -e sched:* --overwrite --switch-output -a + +Not every 'perf record' session is suitable for running under daemon. +User need perf session that either produces data on query, like the +flight recorder sessions in above example or session that is configured +to produce data periodically, like with --switch-output configuration +for time and size. + +Each session is started with control setup (with perf record --control +options). + +Sessions are configured through config file, see CONFIG FILE section +with EXAMPLES. + + +OPTIONS +------- +-v:: +--verbose:: + Be more verbose. + +--config=<PATH>:: + Config file path. If not provided, perf will check system and default + locations (/etc/perfconfig, $HOME/.perfconfig). + +--base=<PATH>:: + Base directory path. Each daemon instance is running on top + of base directory. Only one instance of server can run on + top of one directory at the time. + +All generic options are available also under commands. + + +START COMMAND +------------- +The start command creates the daemon process. + +-f:: +--foreground:: + Do not put the process in background. + + +STOP COMMAND +------------ +The stop command stops all the session and the daemon process. + + +SIGNAL COMMAND +-------------- +The signal command sends signal to configured sessions. + +--session:: + Send signal to specific session. + + +PING COMMAND +------------ +The ping command sends control ping to configured sessions. + +--session:: + Send ping to specific session. + + +CONFIG FILE +----------- +The daemon is configured within standard perf config file by +following new variables: + +daemon.base: + Base path for daemon data. All sessions data are + stored under this path. + +session-<NAME>.run: + Defines new record session. The value is record's command + line without the 'record' keyword. + +Each perf record session is run in daemon.base/<NAME> directory. + + +EXAMPLES +-------- +Example with 2 record sessions: + + # cat ~/.perfconfig + [daemon] + base=/opt/perfdata + + [session-cycles] + run = -m 10M -e cycles --overwrite --switch-output -a + + [session-sched] + run = -m 20M -e sched:* --overwrite --switch-output -a + + +Starting the daemon: + + # perf daemon start + + +Check sessions: + + # perf daemon + [603349:daemon] base: /opt/perfdata + [603350:cycles] perf record -m 10M -e cycles --overwrite --switch-output -a + [603351:sched] perf record -m 20M -e sched:* --overwrite --switch-output -a + +First line is daemon process info with configured daemon base. + + +Check sessions with more info: + + # perf daemon -v + [603349:daemon] base: /opt/perfdata + output: /opt/perfdata/output + lock: /opt/perfdata/lock + up: 1 minutes + [603350:cycles] perf record -m 10M -e cycles --overwrite --switch-output -a + base: /opt/perfdata/session-cycles + output: /opt/perfdata/session-cycles/output + control: /opt/perfdata/session-cycles/control + ack: /opt/perfdata/session-cycles/ack + up: 1 minutes + [603351:sched] perf record -m 20M -e sched:* --overwrite --switch-output -a + base: /opt/perfdata/session-sched + output: /opt/perfdata/session-sched/output + control: /opt/perfdata/session-sched/control + ack: /opt/perfdata/session-sched/ack + up: 1 minutes + +The 'base' path is daemon/session base. +The 'lock' file is daemon's lock file guarding that no other +daemon is running on top of the base. +The 'output' file is perf record output for specific session. +The 'control' and 'ack' files are perf control files. +The 'up' number shows minutes daemon/session is running. + + +Make sure control session is online: + + # perf daemon ping + OK cycles + OK sched + + +Send USR2 signal to session 'cycles' to generate perf.data file: + + # perf daemon signal --session cycles + signal 12 sent to session 'cycles [603452]' + + # tail -2 /opt/perfdata/session-cycles/output + [ perf record: dump data: Woken up 1 times ] + [ perf record: Dump perf.data.2020123017013149 ] + + +Send USR2 signal to all sessions: + + # perf daemon signal + signal 12 sent to session 'cycles [603452]' + signal 12 sent to session 'sched [603453]' + + # tail -2 /opt/perfdata/session-cycles/output + [ perf record: dump data: Woken up 1 times ] + [ perf record: Dump perf.data.2020123017024689 ] + # tail -2 /opt/perfdata/session-sched/output + [ perf record: dump data: Woken up 1 times ] + [ perf record: Dump perf.data.2020123017024713 ] + + +Stop daemon: + + # perf daemon stop + + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-config[1] diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index cd362dc2af07..1dcec73c910c 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -858,7 +858,7 @@ The letters are: b synthesize "branches" events x synthesize "transactions" events w synthesize "ptwrite" events - p synthesize "power" events + p synthesize "power" events (incl. PSB events) c synthesize branches events (calls only) r synthesize branches events (returns only) e synthesize tracing error events @@ -913,6 +913,11 @@ Where: For more details refer to the Intel 64 and IA-32 Architectures Software Developer Manuals. +PSB events show when a PSB+ occurred and also the byte-offset in the trace. +Emitting a PSB+ can cause a CPU a slight delay. When doing timing analysis +of code with Intel PT, it is useful to know if a timing bubble was caused +by Intel PT or not. + Error events show where the decoder lost the trace. Error events are quite important. Users must know if what they are seeing is a complete picture or not. The "e" option may be followed by flags which affect what errors @@ -1141,6 +1146,88 @@ XED include::build-xed.txt[] + +Tracing Virtual Machines +------------------------ + +Currently, only kernel tracing is supported and only with "timeless" decoding +i.e. no TSC timestamps + +Other limitations and caveats + + VMX controls may suppress packets needed for decoding resulting in decoding errors + VMX controls may block the perf NMI to the host potentially resulting in lost trace data + Guest kernel self-modifying code (e.g. jump labels or JIT-compiled eBPF) will result in decoding errors + Guest thread information is unknown + Guest VCPU is unknown but may be able to be inferred from the host thread + Callchains are not supported + +Example + +Start VM + + $ sudo virsh start kubuntu20.04 + Domain kubuntu20.04 started + +Mount the guest file system. Note sshfs needs -o direct_io to enable reading of proc files. root access is needed to read /proc/kcore. + + $ mkdir vm0 + $ sshfs -o direct_io root@vm0:/ vm0 + +Copy the guest /proc/kallsyms, /proc/modules and /proc/kcore + + $ perf buildid-cache -v --kcore vm0/proc/kcore + kcore added to build-id cache directory /home/user/.debug/[kernel.kcore]/9600f316a53a0f54278885e8d9710538ec5f6a08/2021021807494306 + $ KALLSYMS=/home/user/.debug/[kernel.kcore]/9600f316a53a0f54278885e8d9710538ec5f6a08/2021021807494306/kallsyms + +Find the VM process + + $ ps -eLl | grep 'KVM\|PID' + F S UID PID PPID LWP C PRI NI ADDR SZ WCHAN TTY TIME CMD + 3 S 64055 1430 1 1440 1 80 0 - 1921718 - ? 00:02:47 CPU 0/KVM + 3 S 64055 1430 1 1441 1 80 0 - 1921718 - ? 00:02:41 CPU 1/KVM + 3 S 64055 1430 1 1442 1 80 0 - 1921718 - ? 00:02:38 CPU 2/KVM + 3 S 64055 1430 1 1443 2 80 0 - 1921718 - ? 00:03:18 CPU 3/KVM + +Start an open-ended perf record, tracing the VM process, do something on the VM, and then ctrl-C to stop. +TSC is not supported and tsc=0 must be specified. That means mtc is useless, so add mtc=0. +However, IPC can still be determined, hence cyc=1 can be added. +Only kernel decoding is supported, so 'k' must be specified. +Intel PT traces both the host and the guest so --guest and --host need to be specified. +Without timestamps, --per-thread must be specified to distinguish threads. + + $ sudo perf kvm --guest --host --guestkallsyms $KALLSYMS record --kcore -e intel_pt/tsc=0,mtc=0,cyc=1/k -p 1430 --per-thread + ^C + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 5.829 MB ] + +perf script can be used to provide an instruction trace + + $ perf script --guestkallsyms $KALLSYMS --insn-trace --xed -F+ipc | grep -C10 vmresume | head -21 + CPU 0/KVM 1440 ffffffff82133cdd __vmx_vcpu_run+0x3d ([kernel.kallsyms]) movq 0x48(%rax), %r9 + CPU 0/KVM 1440 ffffffff82133ce1 __vmx_vcpu_run+0x41 ([kernel.kallsyms]) movq 0x50(%rax), %r10 + CPU 0/KVM 1440 ffffffff82133ce5 __vmx_vcpu_run+0x45 ([kernel.kallsyms]) movq 0x58(%rax), %r11 + CPU 0/KVM 1440 ffffffff82133ce9 __vmx_vcpu_run+0x49 ([kernel.kallsyms]) movq 0x60(%rax), %r12 + CPU 0/KVM 1440 ffffffff82133ced __vmx_vcpu_run+0x4d ([kernel.kallsyms]) movq 0x68(%rax), %r13 + CPU 0/KVM 1440 ffffffff82133cf1 __vmx_vcpu_run+0x51 ([kernel.kallsyms]) movq 0x70(%rax), %r14 + CPU 0/KVM 1440 ffffffff82133cf5 __vmx_vcpu_run+0x55 ([kernel.kallsyms]) movq 0x78(%rax), %r15 + CPU 0/KVM 1440 ffffffff82133cf9 __vmx_vcpu_run+0x59 ([kernel.kallsyms]) movq (%rax), %rax + CPU 0/KVM 1440 ffffffff82133cfc __vmx_vcpu_run+0x5c ([kernel.kallsyms]) callq 0xffffffff82133c40 + CPU 0/KVM 1440 ffffffff82133c40 vmx_vmenter+0x0 ([kernel.kallsyms]) jz 0xffffffff82133c46 + CPU 0/KVM 1440 ffffffff82133c42 vmx_vmenter+0x2 ([kernel.kallsyms]) vmresume IPC: 0.11 (50/445) + :1440 1440 ffffffffbb678b06 native_write_msr+0x6 ([guest.kernel.kallsyms]) nopl %eax, (%rax,%rax,1) + :1440 1440 ffffffffbb678b0b native_write_msr+0xb ([guest.kernel.kallsyms]) retq IPC: 0.04 (2/41) + :1440 1440 ffffffffbb666646 lapic_next_deadline+0x26 ([guest.kernel.kallsyms]) data16 nop + :1440 1440 ffffffffbb666648 lapic_next_deadline+0x28 ([guest.kernel.kallsyms]) xor %eax, %eax + :1440 1440 ffffffffbb66664a lapic_next_deadline+0x2a ([guest.kernel.kallsyms]) popq %rbp + :1440 1440 ffffffffbb66664b lapic_next_deadline+0x2b ([guest.kernel.kallsyms]) retq IPC: 0.16 (4/25) + :1440 1440 ffffffffbb74607f clockevents_program_event+0x8f ([guest.kernel.kallsyms]) test %eax, %eax + :1440 1440 ffffffffbb746081 clockevents_program_event+0x91 ([guest.kernel.kallsyms]) jz 0xffffffffbb74603c IPC: 0.06 (2/30) + :1440 1440 ffffffffbb74603c clockevents_program_event+0x4c ([guest.kernel.kallsyms]) popq %rbx + :1440 1440 ffffffffbb74603d clockevents_program_event+0x4d ([guest.kernel.kallsyms]) popq %r12 + + + SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 199ea0f0a6c0..66177511c5c4 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -63,6 +63,9 @@ OPTIONS --phys-data:: Record/Report sample physical addresses +--data-page-size:: + Record/Report sample data address page size + RECORD OPTIONS -------------- -e:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 34cf651ee237..f3161c9673e9 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -296,6 +296,9 @@ OPTIONS --data-page-size:: Record the sampled data address data page size. +--code-page-size:: + Record the sampled code address (ip) page size + -T:: --timestamp:: Record the sample timestamps. Use it with 'perf report -D' to see the @@ -485,6 +488,9 @@ Specify vmlinux path which has debuginfo. --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. +--buildid-mmap:: +Record build ids in mmap2 events, disables build id cache (implies --no-buildid). + --aio[=n]:: Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4). Asynchronous mode is supported only when linking Perf tool with libc library @@ -640,9 +646,18 @@ ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. Listen on ctl-fd descriptor for command to control measurement. Available commands: - 'enable' : enable events - 'disable' : disable events - 'snapshot': AUX area tracing snapshot). + 'enable' : enable events + 'disable' : disable events + 'enable name' : enable event 'name' + 'disable name' : disable event 'name' + 'snapshot' : AUX area tracing snapshot). + 'stop' : stop perf record + 'ping' : ping + + 'evlist [-v|-g|-F] : display all events + -F Show just the sample frequency used for each event. + -v Show all fields. + -g Show event group information. Measurements can be started with events disabled using --delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor to synchronize with the diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8f7f4e9605d8..f546b5e9db05 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -108,6 +108,10 @@ OPTIONS - period: Raw number of event count of sample - time: Separate the samples by time stamp with the resolution specified by --time-quantum (default 100ms). Specify with overhead and before it. + - code_page_size: the code page size of sampled code address (ip) + - ins_lat: Instruction latency in core cycles. This is the global instruction + latency + - local_ins_lat: Local instruction latency version By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) @@ -139,7 +143,7 @@ OPTIONS If the --mem-mode option is used, the following sort keys are also available (incompatible with --branch-stack): - symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. + symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline, blocked. - symbol_daddr: name of data symbol being executed on at the time of sample - dso_daddr: name of library or module containing the data being executed @@ -151,9 +155,11 @@ OPTIONS - dcacheline: the cacheline the data address is on at the time of the sample - phys_daddr: physical address of data being executed on at the time of sample - data_page_size: the data page size of data being executed on at the time of sample + - blocked: reason of blocked load access for the data at the time of the sample And the default sort keys are changed to local_weight, mem, sym, dso, - symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. + symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat, + see '--mem-mode'. If the data file has tracepoint event(s), following (dynamic) sort keys are also available: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 44d37210fc8f..5b8b61075039 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -118,7 +118,7 @@ OPTIONS comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, - metric, misc, srccode, ipc, data_page_size. + metric, misc, srccode, ipc, data_page_size, code_page_size. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -422,9 +422,32 @@ include::itrace.txt[] Only consider the listed symbols. Symbols are typically a name but they may also be hexadecimal address. + The hexadecimal address may be the start address of a symbol or + any other address to filter the trace records + For example, to select the symbol noploop or the address 0x4007a0: perf script --symbols=noploop,0x4007a0 + Support filtering trace records by symbol name, start address of + symbol, any hexadecimal address and address range. + + The comparison order is: + + 1. symbol name comparison + 2. symbol start address comparison. + 3. any hexadecimal address comparison. + 4. address range comparison (see --addr-range). + +--addr-range:: + Use with -S or --symbols to list traced records within address range. + + For example, to list the traced records within the address range + [0x4007a0, 0x0x4007a9]: + perf script -S 0x4007a0 --addr-range 10 + +--dsos=:: + Only consider symbols in these DSOs. + --call-trace:: Show call stream for intel_pt traces. The CPUs are interleaved, but can be filtered with -C. diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 5d4a673d7621..08a1714494f8 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -75,6 +75,24 @@ report:: --tid=<tid>:: stat events on existing thread id (comma separated list) +-b:: +--bpf-prog:: + stat events on existing bpf program id (comma separated list), + requiring root rights. bpftool-prog could be used to find program + id all bpf programs in the system. For example: + + # bpftool prog | head -n 1 + 17247: tracepoint name sys_enter tag 192d548b9d754067 gpl + + # perf stat -e cycles,instructions --bpf-prog 17247 --timeout 1000 + + Performance counter stats for 'BPF program(s) 17247': + + 85,967 cycles + 28,982 instructions # 0.34 insn per cycle + + 1.102235068 seconds time elapsed + ifdef::HAVE_LIBPFM[] --pfm-events events:: Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) @@ -358,7 +376,7 @@ See perf list output for the possble metrics and metricgroups. Do not aggregate counts across all monitored CPUs. --topdown:: -Print top down level 1 metrics if supported by the CPU. This allows to +Print complete top-down metrics supported by the CPU. This allows to determine bottle necks in the CPU pipeline for CPU bound workloads, by breaking the cycles consumed down into frontend bound, backend bound, bad speculation and retiring. @@ -393,6 +411,18 @@ To interpret the results it is usually needed to know on which CPUs the workload runs on. If needed the CPUs can be forced using taskset. +--td-level:: +Print the top-down statistics that equal to or lower than the input level. +It allows users to print the interested top-down metrics level instead of +the complete top-down metrics. + +The availability of the top-down metrics level depends on the hardware. For +example, Ice Lake only supports L1 top-down metrics. The Sapphire Rapids +supports both L1 and L2 top-down metrics. + +Default: 0 means the max level that the current hardware support. +Error out if the input is higher than the supported max level. + --no-merge:: Do not merge results from same PMUs. diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt index 3c39bb3dc5fa..10f07f9455b8 100644 --- a/tools/perf/Documentation/topdown.txt +++ b/tools/perf/Documentation/topdown.txt @@ -121,7 +121,7 @@ to read slots and the topdown metrics at different points of the program: #define RDPMC_METRIC (1 << 29) /* return metric counters */ #define FIXED_COUNTER_SLOTS 3 -#define METRIC_COUNTER_TOPDOWN_L1 0 +#define METRIC_COUNTER_TOPDOWN_L1_L2 0 static inline uint64_t read_slots(void) { @@ -130,7 +130,7 @@ static inline uint64_t read_slots(void) static inline uint64_t read_metrics(void) { - return _rdpmc(RDPMC_METRIC | METRIC_COUNTER_TOPDOWN_L1); + return _rdpmc(RDPMC_METRIC | METRIC_COUNTER_TOPDOWN_L1_L2); } Then the program can be instrumented to read these metrics at different @@ -152,11 +152,21 @@ The binary ratios in the metric value can be converted to float ratios: #define GET_METRIC(m, i) (((m) >> (i*8)) & 0xff) +/* L1 Topdown metric events */ #define TOPDOWN_RETIRING(val) ((float)GET_METRIC(val, 0) / 0xff) #define TOPDOWN_BAD_SPEC(val) ((float)GET_METRIC(val, 1) / 0xff) #define TOPDOWN_FE_BOUND(val) ((float)GET_METRIC(val, 2) / 0xff) #define TOPDOWN_BE_BOUND(val) ((float)GET_METRIC(val, 3) / 0xff) +/* + * L2 Topdown metric events. + * Available on Sapphire Rapids and later platforms. + */ +#define TOPDOWN_HEAVY_OPS(val) ((float)GET_METRIC(val, 4) / 0xff) +#define TOPDOWN_BR_MISPREDICT(val) ((float)GET_METRIC(val, 5) / 0xff) +#define TOPDOWN_FETCH_LAT(val) ((float)GET_METRIC(val, 6) / 0xff) +#define TOPDOWN_MEM_BOUND(val) ((float)GET_METRIC(val, 7) / 0xff) + and then converted to percent for printing. The ratios in the metric accumulate for the time when the counter @@ -190,8 +200,8 @@ for that time period. fe_bound_slots = GET_METRIC(metric_b, 2) * slots_b - fe_bound_slots_a be_bound_slots = GET_METRIC(metric_b, 3) * slots_b - be_bound_slots_a -Later the individual ratios for the measurement period can be recreated -from these counts. +Later the individual ratios of L1 metric events for the measurement period can +be recreated from these counts. slots_delta = slots_b - slots_a retiring_ratio = (float)retiring_slots / slots_delta @@ -205,6 +215,48 @@ from these counts. fe_bound_ratio * 100., be_bound_ratio * 100.); +The individual ratios of L2 metric events for the measurement period can be +recreated from L1 and L2 metric counters. (Available on Sapphire Rapids and +later platforms) + + # compute scaled metrics for measurement a + heavy_ops_slots_a = GET_METRIC(metric_a, 4) * slots_a + br_mispredict_slots_a = GET_METRIC(metric_a, 5) * slots_a + fetch_lat_slots_a = GET_METRIC(metric_a, 6) * slots_a + mem_bound_slots_a = GET_METRIC(metric_a, 7) * slots_a + + # compute delta scaled metrics between b and a + heavy_ops_slots = GET_METRIC(metric_b, 4) * slots_b - heavy_ops_slots_a + br_mispredict_slots = GET_METRIC(metric_b, 5) * slots_b - br_mispredict_slots_a + fetch_lat_slots = GET_METRIC(metric_b, 6) * slots_b - fetch_lat_slots_a + mem_bound_slots = GET_METRIC(metric_b, 7) * slots_b - mem_bound_slots_a + + slots_delta = slots_b - slots_a + heavy_ops_ratio = (float)heavy_ops_slots / slots_delta + light_ops_ratio = retiring_ratio - heavy_ops_ratio; + + br_mispredict_ratio = (float)br_mispredict_slots / slots_delta + machine_clears_ratio = bad_spec_ratio - br_mispredict_ratio; + + fetch_lat_ratio = (float)fetch_lat_slots / slots_delta + fetch_bw_ratio = fe_bound_ratio - fetch_lat_ratio; + + mem_bound_ratio = (float)mem_bound_slots / slota_delta + core_bound_ratio = be_bound_ratio - mem_bound_ratio; + + printf("Heavy Operations %.2f%% Light Operations %.2f%% " + "Branch Mispredict %.2f%% Machine Clears %.2f%% " + "Fetch Latency %.2f%% Fetch Bandwidth %.2f%% " + "Mem Bound %.2f%% Core Bound %.2f%%\n", + heavy_ops_ratio * 100., + light_ops_ratio * 100., + br_mispredict_ratio * 100., + machine_clears_ratio * 100., + fetch_lat_ratio * 100., + fetch_bw_ratio * 100., + mem_bound_ratio * 100., + core_bound_ratio * 100.); + Resetting metrics counters ========================== @@ -248,6 +300,24 @@ a sampling read group. Since the SLOTS event must be the leader of a TopDown group, the second event of the group is the sampling event. For example, perf record -e '{slots, $sampling_event, topdown-retiring}:S' +Extension on Sapphire Rapids Server +=================================== +The metrics counter is extended to support TMA method level 2 metrics. +The lower half of the register is the TMA level 1 metrics (legacy). +The upper half is also divided into four 8-bit fields for the new level 2 +metrics. Four more TopDown metric events are exposed for the end-users, +topdown-heavy-ops, topdown-br-mispredict, topdown-fetch-lat and +topdown-mem-bound. + +Each of the new level 2 metrics in the upper half is a subset of the +corresponding level 1 metric in the lower half. Software can deduce the +other four level 2 metrics by subtracting corresponding metrics as below. + + Light_Operations = Retiring - Heavy_Operations + Machine_Clears = Bad_Speculation - Branch_Mispredicts + Fetch_Bandwidth = Frontend_Bound - Fetch_Latency + Core_Bound = Backend_Bound - Memory_Bound + [1] https://software.intel.com/en-us/top-down-microarchitecture-analysis-method-win [2] https://github.com/andikleen/pmu-tools/wiki/toplev-manual diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index ce8516e4de34..d8e59d31399a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -621,6 +621,15 @@ ifndef NO_LIBBPF endif endif +ifdef BUILD_BPF_SKEL + $(call feature_check,clang-bpf-co-re) + ifeq ($(feature-clang-bpf-co-re), 0) + dummy := $(error Error: clang too old. Please install recent clang) + endif + $(call detected,CONFIG_PERF_BPF_SKEL) + CFLAGS += -DHAVE_BPF_SKEL +endif + dwarf-post-unwind := 1 dwarf-post-unwind-text := BUG diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 62f3deb1d3a8..5345ac70cd83 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -126,6 +126,8 @@ include ../scripts/utilities.mak # # Define NO_LIBDEBUGINFOD if you do not want support debuginfod # +# Define BUILD_BPF_SKEL to enable BPF skeletons +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -175,8 +177,13 @@ endef LD += $(EXTRA_LDFLAGS) +HOSTCC ?= gcc +HOSTLD ?= ld +HOSTAR ?= ar +CLANG ?= clang +LLVM_STRIP ?= llvm-strip + PKG_CONFIG = $(CROSS_COMPILE)pkg-config -LLVM_CONFIG ?= llvm-config RM = rm -f LN = ln -f @@ -731,7 +738,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(x86_arch_prctl_code_array) \ $(rename_flags_array) \ $(arch_errno_name_array) \ - $(sync_file_range_arrays) + $(sync_file_range_arrays) \ + bpf-skel $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -1004,7 +1012,43 @@ config-clean: python-clean: $(python-clean) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean +SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) +SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) +SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h + +ifdef BUILD_BPF_SKEL +BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool +LIBBPF_SRC := $(abspath ../lib/bpf) +BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(BPF_PATH) -I$(LIBBPF_SRC)/.. + +$(SKEL_TMP_OUT): + $(Q)$(MKDIR) -p $@ + +$(BPFTOOL): | $(SKEL_TMP_OUT) + CFLAGS= $(MAKE) -C ../bpf/bpftool \ + OUTPUT=$(SKEL_TMP_OUT)/ bootstrap + +$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) + $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf $(BPF_INCLUDE) \ + -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@ + +$(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) + $(QUIET_GENSKEL)$(BPFTOOL) gen skeleton $< > $@ + +bpf-skel: $(SKELETONS) + +.PRECIOUS: $(SKEL_TMP_OUT)/%.bpf.o + +else # BUILD_BPF_SKEL + +bpf-skel: + +endif # BUILD_BPF_SKEL + +bpf-skel-clean: + $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) + +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean bpf-skel-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h index ed20e0253e25..4085419283d0 100644 --- a/tools/perf/arch/arm/include/perf_regs.h +++ b/tools/perf/arch/arm/include/perf_regs.h @@ -15,7 +15,7 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM_PC #define PERF_REG_SP PERF_REG_ARM_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_ARM_R0: diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h index baaa5e64a3fb..fa3e07459f76 100644 --- a/tools/perf/arch/arm64/include/perf_regs.h +++ b/tools/perf/arch/arm64/include/perf_regs.h @@ -15,7 +15,7 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM64_PC #define PERF_REG_SP PERF_REG_ARM64_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_ARM64_X0: diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index d41b27e781d3..40c5e0b5bda8 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> #include <stdio.h> #include <string.h> #include "debug.h" @@ -23,5 +24,5 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) p->end += SYMBOL_LIMIT; else p->end = c->start; - pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end); + pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); } diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 54efa12fdbea..2518cde18b34 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -1,4 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <regex.h> +#include <string.h> +#include <linux/kernel.h> +#include <linux/zalloc.h> + +#include "../../../util/debug.h" +#include "../../../util/event.h" #include "../../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { @@ -37,3 +45,89 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(pc, PERF_REG_ARM64_PC), SMPL_REG_END }; + +/* %xNUM */ +#define SDT_OP_REGEX1 "^(x[1-2]?[0-9]|3[0-1])$" + +/* [sp], [sp, NUM] */ +#define SDT_OP_REGEX2 "^\\[sp(, )?([0-9]+)?\\]$" + +static regex_t sdt_op_regex1, sdt_op_regex2; + +static int sdt_init_op_regex(void) +{ + static int initialized; + int ret = 0; + + if (initialized) + return 0; + + ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED); + if (ret) + goto error; + + ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED); + if (ret) + goto free_regex1; + + initialized = 1; + return 0; + +free_regex1: + regfree(&sdt_op_regex1); +error: + pr_debug4("Regex compilation error.\n"); + return ret; +} + +/* + * SDT marker arguments on Arm64 uses %xREG or [sp, NUM], currently + * support these two formats. + */ +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + int ret, new_len; + regmatch_t rm[5]; + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) { + /* Extract xNUM */ + new_len = 2; /* % NULL */ + new_len += (int)(rm[1].rm_eo - rm[1].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%%%.*s", + (int)(rm[1].rm_eo - rm[1].rm_so), old_op + rm[1].rm_so); + } else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) { + /* [sp], [sp, NUM] or [sp,NUM] */ + new_len = 7; /* + ( % s p ) NULL */ + + /* If the arugment is [sp], need to fill offset '0' */ + if (rm[2].rm_so == -1) + new_len += 1; + else + new_len += (int)(rm[2].rm_eo - rm[2].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + if (rm[2].rm_so == -1) + scnprintf(*new_op, new_len, "+0(%%sp)"); + else + scnprintf(*new_op, new_len, "+%.*s(%%sp)", + (int)(rm[2].rm_eo - rm[2].rm_so), + old_op + rm[2].rm_so); + } else { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + return SDT_ARG_VALID; +} diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h index 8f336ea1161a..25ac3bdcb9d1 100644 --- a/tools/perf/arch/csky/include/perf_regs.h +++ b/tools/perf/arch/csky/include/perf_regs.h @@ -15,7 +15,7 @@ #define PERF_REG_IP PERF_REG_CSKY_PC #define PERF_REG_SP PERF_REG_CSKY_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_CSKY_A0: diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 63f3ac91049f..04e5dc07e93f 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -71,9 +71,15 @@ static const char *reg_names[] = { [PERF_REG_POWERPC_MMCR3] = "mmcr3", [PERF_REG_POWERPC_SIER2] = "sier2", [PERF_REG_POWERPC_SIER3] = "sier3", + [PERF_REG_POWERPC_PMC1] = "pmc1", + [PERF_REG_POWERPC_PMC2] = "pmc2", + [PERF_REG_POWERPC_PMC3] = "pmc3", + [PERF_REG_POWERPC_PMC4] = "pmc4", + [PERF_REG_POWERPC_PMC5] = "pmc5", + [PERF_REG_POWERPC_PMC6] = "pmc6", }; -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { return reg_names[id]; } diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index e86e210bf514..b7945e5a543b 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,4 +1,5 @@ perf-y += header.o +perf-y += machine.o perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o diff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.c new file mode 100644 index 000000000000..e652a1aa8132 --- /dev/null +++ b/tools/perf/arch/powerpc/util/machine.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <inttypes.h> +#include <stdio.h> +#include <string.h> +#include <internal/lib.h> // page_size +#include "debug.h" +#include "symbol.h" + +/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000 + * whereas the modules are located at very high memory addresses, + * for example 0xc00800000xxxxxxx. The gap between end of kernel text segment + * and beginning of first module's text segment is very high. + * Therefore do not fill this gap and do not assign it to the kernel dso map. + */ + +void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) +{ + if (strchr(p->name, '[') == NULL && strchr(c->name, '[')) + /* Limit the range of last kernel symbol */ + p->end += page_size; + else + p->end = c->start; + pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); +} diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index 2b6d4704e3aa..8116a253f91f 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -68,6 +68,12 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3), SMPL_REG(sier2, PERF_REG_POWERPC_SIER2), SMPL_REG(sier3, PERF_REG_POWERPC_SIER3), + SMPL_REG(pmc1, PERF_REG_POWERPC_PMC1), + SMPL_REG(pmc2, PERF_REG_POWERPC_PMC2), + SMPL_REG(pmc3, PERF_REG_POWERPC_PMC3), + SMPL_REG(pmc4, PERF_REG_POWERPC_PMC4), + SMPL_REG(pmc5, PERF_REG_POWERPC_PMC5), + SMPL_REG(pmc6, PERF_REG_POWERPC_PMC6), SMPL_REG_END }; diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h index 7a8bcde7a2b1..6b02a767c918 100644 --- a/tools/perf/arch/riscv/include/perf_regs.h +++ b/tools/perf/arch/riscv/include/perf_regs.h @@ -19,7 +19,7 @@ #define PERF_REG_IP PERF_REG_RISCV_PC #define PERF_REG_SP PERF_REG_RISCV_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_RISCV_PC: diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index bcfbaed78cc2..ce3031526623 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -14,7 +14,7 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_S390_PC #define PERF_REG_SP PERF_REG_S390_R15 -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_S390_R0: diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index 724efb2d842d..7644a4f6d4a4 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> #include <unistd.h> #include <stdio.h> #include <string.h> @@ -48,5 +49,5 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) p->end = roundup(p->end, page_size); else p->end = c->start; - pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end); + pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); } diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index b7321337d100..cddc4cdc0d9b 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -23,7 +23,7 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_X86_IP #define PERF_REG_SP PERF_REG_X86_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_X86_AX: diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c index 745f29adb14b..f782ef8c5982 100644 --- a/tools/perf/arch/x86/tests/insn-x86.c +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -48,6 +48,7 @@ static int get_op(const char *op_str) {"int", INTEL_PT_OP_INT}, {"syscall", INTEL_PT_OP_SYSCALL}, {"sysret", INTEL_PT_OP_SYSRET}, + {"vmentry", INTEL_PT_OP_VMENTRY}, {NULL, 0}, }; struct val_data *val; diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c index 901bf1f449c4..c933e3dcd0a8 100644 --- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c @@ -66,8 +66,8 @@ struct test_data { {7, {0x9d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 4, 0x60504030201}, 0, 0 }, {9, {0xdd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_FUP, 6, 0x807060504030201}, 0, 0 }, /* Paging Information Packet */ - {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201}, 0, 0 }, - {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201 | (1ULL << 63)}, 0, 0 }, + {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060402}, 0, 0 }, + {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060403}, 0, 0 }, /* Mode Exec Packet */ {2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 }, {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 0, 64}, 0, 0 }, diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 347c39b960eb..0c72d418932e 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -6,6 +6,9 @@ perf-y += perf_regs.o perf-y += topdown.o perf-y += machine.o perf-y += event.o +perf-y += evlist.o +perf-y += mem-events.o +perf-y += evsel.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 047dc00eafa6..9b31734ee968 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -75,3 +75,28 @@ int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, } #endif + +void arch_perf_parse_sample_weight(struct perf_sample *data, + const __u64 *array, u64 type) +{ + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT) + data->weight = weight.full; + else { + data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + } +} + +void arch_perf_synthesize_sample_weight(const struct perf_sample *data, + __u64 *array, u64 type) +{ + *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + *array &= 0xffffffff; + *array |= ((u64)data->ins_lat << 32); + } +} diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c new file mode 100644 index 000000000000..8c6732cc7794 --- /dev/null +++ b/tools/perf/arch/x86/util/evlist.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include "util/pmu.h" +#include "util/evlist.h" +#include "util/parse-events.h" + +#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}" + +int arch_evlist__add_default_attrs(struct evlist *evlist) +{ + if (!pmu_have_event("cpu", "slots")) + return 0; + + return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL); +} diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c new file mode 100644 index 000000000000..2f733cdc8dbb --- /dev/null +++ b/tools/perf/arch/x86/util/evsel.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include "util/evsel.h" + +void arch_evsel__set_sample_weight(struct evsel *evsel) +{ + evsel__set_sample_bit(evsel, WEIGHT_STRUCT); +} diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c new file mode 100644 index 000000000000..588110fd8904 --- /dev/null +++ b/tools/perf/arch/x86/util/mem-events.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/pmu.h" +#include "map_symbol.h" +#include "mem-events.h" + +static char mem_loads_name[100]; +static bool mem_loads_name__init; + +#define MEM_LOADS_AUX 0x8203 +#define MEM_LOADS_AUX_NAME "{cpu/mem-loads-aux/,cpu/mem-loads,ldlat=%u/pp}:S" + +bool is_mem_loads_aux_event(struct evsel *leader) +{ + if (!pmu_have_event("cpu", "mem-loads-aux")) + return false; + + return leader->core.attr.config == MEM_LOADS_AUX; +} + +char *perf_mem_events__name(int i) +{ + struct perf_mem_event *e = perf_mem_events__ptr(i); + + if (!e) + return NULL; + + if (i == PERF_MEM_EVENTS__LOAD) { + if (mem_loads_name__init) + return mem_loads_name; + + mem_loads_name__init = true; + + if (pmu_have_event("cpu", "mem-loads-aux")) { + scnprintf(mem_loads_name, sizeof(mem_loads_name), + MEM_LOADS_AUX_NAME, perf_mem_events__loads_ldlat); + } else { + scnprintf(mem_loads_name, sizeof(mem_loads_name), + e->name, perf_mem_events__loads_ldlat); + } + return mem_loads_name; + } + + return (char *)e->name; +} diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index ca2d591aad8a..ddaca75c3bc0 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -21,7 +21,6 @@ #include <sys/resource.h> #include <sys/epoll.h> #include <sys/eventfd.h> -#include <internal/cpumap.h> #include <perf/cpumap.h> #include "../util/stat.h" diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 75dca9773186..0a0ff1247c83 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -76,7 +76,6 @@ #include <sys/epoll.h> #include <sys/eventfd.h> #include <sys/types.h> -#include <internal/cpumap.h> #include <perf/cpumap.h> #include "../util/stat.h" diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 915bf3da7ce2..b65373ce5c4f 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -20,7 +20,6 @@ #include <linux/kernel.h> #include <linux/zalloc.h> #include <sys/time.h> -#include <internal/cpumap.h> #include <perf/cpumap.h> #include "../util/stat.h" diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index bb25d8beb3b8..89c6d160379c 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -14,7 +14,6 @@ #include <linux/kernel.h> #include <linux/zalloc.h> #include <errno.h> -#include <internal/cpumap.h> #include <perf/cpumap.h> #include "bench.h" #include "futex.h" diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 7a15c2e61022..5fa23295ee5f 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -20,7 +20,6 @@ #include <linux/kernel.h> #include <linux/time64.h> #include <errno.h> -#include <internal/cpumap.h> #include <perf/cpumap.h> #include "bench.h" #include "futex.h" diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index cd2b81a845ac..6e6f5247e1fe 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -29,7 +29,6 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe #include <linux/time64.h> #include <errno.h> #include "futex.h" -#include <internal/cpumap.h> #include <perf/cpumap.h> #include <err.h> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 2dfcef3e371e..6d217868f53c 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -20,7 +20,6 @@ #include <linux/kernel.h> #include <linux/time64.h> #include <errno.h> -#include <internal/cpumap.h> #include <perf/cpumap.h> #include "bench.h" #include "futex.h" diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index a25411926e48..ecd0d3cb6f5c 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -27,6 +27,7 @@ #include "util/time-utils.h" #include "util/util.h" #include "util/probe-file.h" +#include "util/config.h" #include <linux/string.h> #include <linux/err.h> @@ -348,12 +349,21 @@ static int build_id_cache__show_all(void) return 0; } +static int perf_buildid_cache_config(const char *var, const char *value, void *cb) +{ + const char **debuginfod = cb; + + if (!strcmp(var, "buildid-cache.debuginfod")) + *debuginfod = strdup(value); + + return 0; +} + int cmd_buildid_cache(int argc, const char **argv) { struct strlist *list; struct str_node *pos; - int ret = 0; - int ns_id = -1; + int ret, ns_id = -1; bool force = false; bool list_files = false; bool opts_flag = false; @@ -363,7 +373,8 @@ int cmd_buildid_cache(int argc, const char **argv) *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, - *kcore_filename = NULL; + *kcore_filename = NULL, + *debuginfod = NULL; char sbuf[STRERR_BUFSIZE]; struct perf_data data = { @@ -388,6 +399,8 @@ int cmd_buildid_cache(int argc, const char **argv) OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_STRING('u', "update", &update_name_list_str, "file list", "file(s) to update"), + OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url", + "set debuginfod url"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"), OPT_END() @@ -397,6 +410,10 @@ int cmd_buildid_cache(int argc, const char **argv) NULL }; + ret = perf_config(perf_buildid_cache_config, &debuginfod); + if (ret) + return ret; + argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); @@ -408,6 +425,11 @@ int cmd_buildid_cache(int argc, const char **argv) if (argc || !(list_files || opts_flag)) usage_with_options(buildid_cache_usage, buildid_cache_options); + if (debuginfod) { + pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod); + setenv("DEBUGINFOD_URLS", debuginfod, 1); + } + /* -l is exclusive. It can not be used with other options. */ if (list_files && opts_flag) { usage_with_options_msg(buildid_cache_usage, diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index e3ef75583514..87f5b1a4a7fa 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -77,6 +77,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits) perf_header__has_feat(&session->header, HEADER_AUXTRACE)) with_hits = false; + if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID)) + with_hits = true; + /* * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c5babeaa3b38..e3b9d63077ef 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -97,8 +97,8 @@ struct perf_c2c { bool symbol_full; bool stitch_lbr; - /* HITM shared clines stats */ - struct c2c_stats hitm_stats; + /* Shared cache line stats */ + struct c2c_stats shared_clines_stats; int shared_clines; int display; @@ -876,7 +876,7 @@ static struct c2c_stats *total_stats(struct hist_entry *he) return &hists->stats; } -static double percent(int st, int tot) +static double percent(u32 st, u32 tot) { return tot ? 100. * (double) st / (double) tot : 0; } @@ -1048,6 +1048,19 @@ empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return 0; } +static int display_metrics(struct perf_hpp *hpp, u32 val, u32 sum) +{ + int ret; + + if (sum != 0) + ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", + percent(val, sum)); + else + ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); + + return ret; +} + static int node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, struct hist_entry *he) @@ -1091,29 +1104,23 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num); advance_hpp(hpp, ret); - #define DISPLAY_HITM(__h) \ - if (c2c_he->stats.__h> 0) { \ - ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \ - percent(stats->__h, c2c_he->stats.__h));\ - } else { \ - ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \ - } - switch (c2c.display) { case DISPLAY_RMT: - DISPLAY_HITM(rmt_hitm); + ret = display_metrics(hpp, stats->rmt_hitm, + c2c_he->stats.rmt_hitm); break; case DISPLAY_LCL: - DISPLAY_HITM(lcl_hitm); + ret = display_metrics(hpp, stats->lcl_hitm, + c2c_he->stats.lcl_hitm); break; case DISPLAY_TOT: - DISPLAY_HITM(tot_hitm); + ret = display_metrics(hpp, stats->tot_hitm, + c2c_he->stats.tot_hitm); + break; default: break; } - #undef DISPLAY_HITM - advance_hpp(hpp, ret); if (c2c_he->stats.store > 0) { @@ -1851,53 +1858,69 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, #define DISPLAY_LINE_LIMIT 0.001 +static u8 filter_display(u32 val, u32 sum) +{ + if (sum == 0 || ((double)val / sum) < DISPLAY_LINE_LIMIT) + return HIST_FILTER__C2C; + + return 0; +} + static bool he__display(struct hist_entry *he, struct c2c_stats *stats) { struct c2c_hist_entry *c2c_he; - double ld_dist; if (c2c.show_all) return true; c2c_he = container_of(he, struct c2c_hist_entry, he); -#define FILTER_HITM(__h) \ - if (stats->__h) { \ - ld_dist = ((double)c2c_he->stats.__h / stats->__h); \ - if (ld_dist < DISPLAY_LINE_LIMIT) \ - he->filtered = HIST_FILTER__C2C; \ - } else { \ - he->filtered = HIST_FILTER__C2C; \ - } - switch (c2c.display) { case DISPLAY_LCL: - FILTER_HITM(lcl_hitm); + he->filtered = filter_display(c2c_he->stats.lcl_hitm, + stats->lcl_hitm); break; case DISPLAY_RMT: - FILTER_HITM(rmt_hitm); + he->filtered = filter_display(c2c_he->stats.rmt_hitm, + stats->rmt_hitm); break; case DISPLAY_TOT: - FILTER_HITM(tot_hitm); + he->filtered = filter_display(c2c_he->stats.tot_hitm, + stats->tot_hitm); + break; default: break; } -#undef FILTER_HITM - return he->filtered == 0; } -static inline int valid_hitm_or_store(struct hist_entry *he) +static inline bool is_valid_hist_entry(struct hist_entry *he) { struct c2c_hist_entry *c2c_he; - bool has_hitm; + bool has_record = false; c2c_he = container_of(he, struct c2c_hist_entry, he); - has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm : - c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm : - c2c_he->stats.rmt_hitm; - return has_hitm || c2c_he->stats.store; + + /* It's a valid entry if contains stores */ + if (c2c_he->stats.store) + return true; + + switch (c2c.display) { + case DISPLAY_LCL: + has_record = !!c2c_he->stats.lcl_hitm; + break; + case DISPLAY_RMT: + has_record = !!c2c_he->stats.rmt_hitm; + break; + case DISPLAY_TOT: + has_record = !!c2c_he->stats.tot_hitm; + break; + default: + break; + } + + return has_record; } static void set_node_width(struct c2c_hist_entry *c2c_he, int len) @@ -1951,7 +1974,7 @@ static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) calc_width(c2c_he); - if (!valid_hitm_or_store(he)) + if (!is_valid_hist_entry(he)) he->filtered = HIST_FILTER__C2C; return 0; @@ -1961,7 +1984,7 @@ static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; struct c2c_hists *c2c_hists; - bool display = he__display(he, &c2c.hitm_stats); + bool display = he__display(he, &c2c.shared_clines_stats); c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; @@ -2048,14 +2071,14 @@ static int setup_nodes(struct perf_session *session) #define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) -static int resort_hitm_cb(struct hist_entry *he, void *arg __maybe_unused) +static int resort_shared_cl_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; c2c_he = container_of(he, struct c2c_hist_entry, he); if (HAS_HITMS(c2c_he)) { c2c.shared_clines++; - c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats); + c2c_add_stats(&c2c.shared_clines_stats, &c2c_he->stats); } return 0; @@ -2111,6 +2134,8 @@ static void print_c2c__display_stats(FILE *out) fprintf(out, " Load MESI State Exclusive : %10d\n", stats->ld_excl); fprintf(out, " Load MESI State Shared : %10d\n", stats->ld_shared); fprintf(out, " Load LLC Misses : %10d\n", llc_misses); + fprintf(out, " Load access blocked by data : %10d\n", stats->blk_data); + fprintf(out, " Load access blocked by address : %10d\n", stats->blk_addr); fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.); fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.); fprintf(out, " LLC Misses to Remote cache (HIT) : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.); @@ -2126,7 +2151,7 @@ static void print_c2c__display_stats(FILE *out) static void print_shared_cacheline_info(FILE *out) { - struct c2c_stats *stats = &c2c.hitm_stats; + struct c2c_stats *stats = &c2c.shared_clines_stats; int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm; fprintf(out, "=================================================\n"); @@ -2139,6 +2164,7 @@ static void print_shared_cacheline_info(FILE *out) fprintf(out, " L2D hits on shared lines : %10d\n", stats->ld_l2hit); fprintf(out, " LLC hits on shared lines : %10d\n", stats->ld_llchit + stats->lcl_hitm); fprintf(out, " Locked Access on shared lines : %10d\n", stats->locks); + fprintf(out, " Blocked Access on shared lines : %10d\n", stats->blk_data + stats->blk_addr); fprintf(out, " Store HITs on shared lines : %10d\n", stats->store); fprintf(out, " Store L1D hits on shared lines : %10d\n", stats->st_l1hit); fprintf(out, " Total Merged records : %10d\n", hitm_cnt + stats->store); @@ -2176,16 +2202,17 @@ static void print_pareto(FILE *out) struct perf_hpp_list hpp_list; struct rb_node *nd; int ret; + const char *cl_output; + + cl_output = "cl_num," + "cl_rmt_hitm," + "cl_lcl_hitm," + "cl_stores_l1hit," + "cl_stores_l1miss," + "dcacheline"; perf_hpp_list__init(&hpp_list); - ret = hpp_list__parse(&hpp_list, - "cl_num," - "cl_rmt_hitm," - "cl_lcl_hitm," - "cl_stores_l1hit," - "cl_stores_l1miss," - "dcacheline", - NULL); + ret = hpp_list__parse(&hpp_list, cl_output, NULL); if (WARN_ONCE(ret, "failed to setup sort entries\n")) return; @@ -2729,6 +2756,7 @@ static int perf_c2c__report(int argc, const char **argv) OPT_END() }; int err = 0; + const char *output_str, *sort_str = NULL; argc = parse_options(argc, argv, options, report_c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -2805,29 +2833,34 @@ static int perf_c2c__report(int argc, const char **argv) goto out_mem2node; } - c2c_hists__reinit(&c2c.hists, - "cl_idx," - "dcacheline," - "dcacheline_node," - "dcacheline_count," - "percent_hitm," - "tot_hitm,lcl_hitm,rmt_hitm," - "tot_recs," - "tot_loads," - "tot_stores," - "stores_l1hit,stores_l1miss," - "ld_fbhit,ld_l1hit,ld_l2hit," - "ld_lclhit,lcl_hitm," - "ld_rmthit,rmt_hitm," - "dram_lcl,dram_rmt", - c2c.display == DISPLAY_TOT ? "tot_hitm" : - c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" - ); + output_str = "cl_idx," + "dcacheline," + "dcacheline_node," + "dcacheline_count," + "percent_hitm," + "tot_hitm,lcl_hitm,rmt_hitm," + "tot_recs," + "tot_loads," + "tot_stores," + "stores_l1hit,stores_l1miss," + "ld_fbhit,ld_l1hit,ld_l2hit," + "ld_lclhit,lcl_hitm," + "ld_rmthit,rmt_hitm," + "dram_lcl,dram_rmt"; + + if (c2c.display == DISPLAY_TOT) + sort_str = "tot_hitm"; + else if (c2c.display == DISPLAY_RMT) + sort_str = "rmt_hitm"; + else if (c2c.display == DISPLAY_LCL) + sort_str = "lcl_hitm"; + + c2c_hists__reinit(&c2c.hists, output_str, sort_str); ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); - hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb); + hists__output_resort_cb(&c2c.hists.hists, &prog, resort_shared_cl_cb); hists__iterate_cb(&c2c.hists.hists, resort_cl_cb); ui_progress__finish(); diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c new file mode 100644 index 000000000000..617feaf020f6 --- /dev/null +++ b/tools/perf/builtin-daemon.c @@ -0,0 +1,1521 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <internal/lib.h> +#include <subcmd/parse-options.h> +#include <api/fd/array.h> +#include <api/fs/fs.h> +#include <linux/zalloc.h> +#include <linux/string.h> +#include <linux/limits.h> +#include <linux/string.h> +#include <string.h> +#include <sys/file.h> +#include <signal.h> +#include <stdlib.h> +#include <time.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <sys/inotify.h> +#include <libgen.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/stat.h> +#include <sys/signalfd.h> +#include <sys/wait.h> +#include <poll.h> +#include <sys/stat.h> +#include <time.h> +#include "builtin.h" +#include "perf.h" +#include "debug.h" +#include "config.h" +#include "util.h" + +#define SESSION_OUTPUT "output" +#define SESSION_CONTROL "control" +#define SESSION_ACK "ack" + +/* + * Session states: + * + * OK - session is up and running + * RECONFIG - session is pending for reconfiguration, + * new values are already loaded in session object + * KILL - session is pending to be killed + * + * Session object life and its state is maintained by + * following functions: + * + * setup_server_config + * - reads config file and setup session objects + * with following states: + * + * OK - no change needed + * RECONFIG - session needs to be changed + * (run variable changed) + * KILL - session needs to be killed + * (session is no longer in config file) + * + * daemon__reconfig + * - scans session objects and does following actions + * for states: + * + * OK - skip + * RECONFIG - session is killed and re-run with new config + * KILL - session is killed + * + * - all sessions have OK state on the function exit + */ +enum daemon_session_state { + OK, + RECONFIG, + KILL, +}; + +struct daemon_session { + char *base; + char *name; + char *run; + char *control; + int pid; + struct list_head list; + enum daemon_session_state state; + time_t start; +}; + +struct daemon { + const char *config; + char *config_real; + char *config_base; + const char *csv_sep; + const char *base_user; + char *base; + struct list_head sessions; + FILE *out; + char perf[PATH_MAX]; + int signal_fd; + time_t start; +}; + +static struct daemon __daemon = { + .sessions = LIST_HEAD_INIT(__daemon.sessions), +}; + +static const char * const daemon_usage[] = { + "perf daemon start [<options>]", + "perf daemon [<options>]", + NULL +}; + +static bool done; + +static void sig_handler(int sig __maybe_unused) +{ + done = true; +} + +static struct daemon_session *daemon__add_session(struct daemon *config, char *name) +{ + struct daemon_session *session = zalloc(sizeof(*session)); + + if (!session) + return NULL; + + session->name = strdup(name); + if (!session->name) { + free(session); + return NULL; + } + + session->pid = -1; + list_add_tail(&session->list, &config->sessions); + return session; +} + +static struct daemon_session *daemon__find_session(struct daemon *daemon, char *name) +{ + struct daemon_session *session; + + list_for_each_entry(session, &daemon->sessions, list) { + if (!strcmp(session->name, name)) + return session; + } + + return NULL; +} + +static int get_session_name(const char *var, char *session, int len) +{ + const char *p = var + sizeof("session-") - 1; + + while (*p != '.' && *p != 0x0 && len--) + *session++ = *p++; + + *session = 0; + return *p == '.' ? 0 : -EINVAL; +} + +static int session_config(struct daemon *daemon, const char *var, const char *value) +{ + struct daemon_session *session; + char name[100]; + + if (get_session_name(var, name, sizeof(name))) + return -EINVAL; + + var = strchr(var, '.'); + if (!var) + return -EINVAL; + + var++; + + session = daemon__find_session(daemon, name); + + if (!session) { + /* New session is defined. */ + session = daemon__add_session(daemon, name); + if (!session) + return -ENOMEM; + + pr_debug("reconfig: found new session %s\n", name); + + /* Trigger reconfig to start it. */ + session->state = RECONFIG; + } else if (session->state == KILL) { + /* Current session is defined, no action needed. */ + pr_debug("reconfig: found current session %s\n", name); + session->state = OK; + } + + if (!strcmp(var, "run")) { + bool same = false; + + if (session->run) + same = !strcmp(session->run, value); + + if (!same) { + if (session->run) { + free(session->run); + pr_debug("reconfig: session %s is changed\n", name); + } + + session->run = strdup(value); + if (!session->run) + return -ENOMEM; + + /* + * Either new or changed run value is defined, + * trigger reconfig for the session. + */ + session->state = RECONFIG; + } + } + + return 0; +} + +static int server_config(const char *var, const char *value, void *cb) +{ + struct daemon *daemon = cb; + + if (strstarts(var, "session-")) { + return session_config(daemon, var, value); + } else if (!strcmp(var, "daemon.base") && !daemon->base_user) { + if (daemon->base && strcmp(daemon->base, value)) { + pr_err("failed: can't redefine base, bailing out\n"); + return -EINVAL; + } + daemon->base = strdup(value); + if (!daemon->base) + return -ENOMEM; + } + + return 0; +} + +static int client_config(const char *var, const char *value, void *cb) +{ + struct daemon *daemon = cb; + + if (!strcmp(var, "daemon.base") && !daemon->base_user) { + daemon->base = strdup(value); + if (!daemon->base) + return -ENOMEM; + } + + return 0; +} + +static int check_base(struct daemon *daemon) +{ + struct stat st; + + if (!daemon->base) { + pr_err("failed: base not defined\n"); + return -EINVAL; + } + + if (stat(daemon->base, &st)) { + switch (errno) { + case EACCES: + pr_err("failed: permission denied for '%s' base\n", + daemon->base); + return -EACCES; + case ENOENT: + pr_err("failed: base '%s' does not exists\n", + daemon->base); + return -EACCES; + default: + pr_err("failed: can't access base '%s': %s\n", + daemon->base, strerror(errno)); + return -errno; + } + } + + if ((st.st_mode & S_IFMT) != S_IFDIR) { + pr_err("failed: base '%s' is not directory\n", + daemon->base); + return -EINVAL; + } + + return 0; +} + +static int setup_client_config(struct daemon *daemon) +{ + struct perf_config_set *set = perf_config_set__load_file(daemon->config_real); + int err = -ENOMEM; + + if (set) { + err = perf_config_set(set, client_config, daemon); + perf_config_set__delete(set); + } + + return err ?: check_base(daemon); +} + +static int setup_server_config(struct daemon *daemon) +{ + struct perf_config_set *set; + struct daemon_session *session; + int err = -ENOMEM; + + pr_debug("reconfig: started\n"); + + /* + * Mark all sessions for kill, the server config + * will set following states, see explanation at + * enum daemon_session_state declaration. + */ + list_for_each_entry(session, &daemon->sessions, list) + session->state = KILL; + + set = perf_config_set__load_file(daemon->config_real); + if (set) { + err = perf_config_set(set, server_config, daemon); + perf_config_set__delete(set); + } + + return err ?: check_base(daemon); +} + +static int daemon_session__run(struct daemon_session *session, + struct daemon *daemon) +{ + char buf[PATH_MAX]; + char **argv; + int argc, fd; + + if (asprintf(&session->base, "%s/session-%s", + daemon->base, session->name) < 0) { + perror("failed: asprintf"); + return -1; + } + + if (mkdir(session->base, 0755) && errno != EEXIST) { + perror("failed: mkdir"); + return -1; + } + + session->start = time(NULL); + + session->pid = fork(); + if (session->pid < 0) + return -1; + if (session->pid > 0) { + pr_info("reconfig: ruining session [%s:%d]: %s\n", + session->name, session->pid, session->run); + return 0; + } + + if (chdir(session->base)) { + perror("failed: chdir"); + return -1; + } + + fd = open("/dev/null", O_RDONLY); + if (fd < 0) { + perror("failed: open /dev/null"); + return -1; + } + + dup2(fd, 0); + close(fd); + + fd = open(SESSION_OUTPUT, O_RDWR|O_CREAT|O_TRUNC, 0644); + if (fd < 0) { + perror("failed: open session output"); + return -1; + } + + dup2(fd, 1); + dup2(fd, 2); + close(fd); + + if (mkfifo(SESSION_CONTROL, O_RDWR) && errno != EEXIST) { + perror("failed: create control fifo"); + return -1; + } + + if (mkfifo(SESSION_ACK, O_RDWR) && errno != EEXIST) { + perror("failed: create ack fifo"); + return -1; + } + + scnprintf(buf, sizeof(buf), "%s record --control=fifo:%s,%s %s", + daemon->perf, SESSION_CONTROL, SESSION_ACK, session->run); + + argv = argv_split(buf, &argc); + if (!argv) + exit(-1); + + exit(execve(daemon->perf, argv, NULL)); + return -1; +} + +static pid_t handle_signalfd(struct daemon *daemon) +{ + struct daemon_session *session; + struct signalfd_siginfo si; + ssize_t err; + int status; + pid_t pid; + + err = read(daemon->signal_fd, &si, sizeof(struct signalfd_siginfo)); + if (err != sizeof(struct signalfd_siginfo)) + return -1; + + list_for_each_entry(session, &daemon->sessions, list) { + + if (session->pid != (int) si.ssi_pid) + continue; + + pid = waitpid(session->pid, &status, 0); + if (pid == session->pid) { + if (WIFEXITED(status)) { + pr_info("session '%s' exited, status=%d\n", + session->name, WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + pr_info("session '%s' killed (signal %d)\n", + session->name, WTERMSIG(status)); + } else if (WIFSTOPPED(status)) { + pr_info("session '%s' stopped (signal %d)\n", + session->name, WSTOPSIG(status)); + } else { + pr_info("session '%s' Unexpected status (0x%x)\n", + session->name, status); + } + } + + session->state = KILL; + session->pid = -1; + return pid; + } + + return 0; +} + +static int daemon_session__wait(struct daemon_session *session, struct daemon *daemon, + int secs) +{ + struct pollfd pollfd = { + .fd = daemon->signal_fd, + .events = POLLIN, + }; + pid_t wpid = 0, pid = session->pid; + time_t start; + + start = time(NULL); + + do { + int err = poll(&pollfd, 1, 1000); + + if (err > 0) { + wpid = handle_signalfd(daemon); + } else if (err < 0) { + perror("failed: poll\n"); + return -1; + } + + if (start + secs < time(NULL)) + return -1; + } while (wpid != pid); + + return 0; +} + +static bool daemon__has_alive_session(struct daemon *daemon) +{ + struct daemon_session *session; + + list_for_each_entry(session, &daemon->sessions, list) { + if (session->pid != -1) + return true; + } + + return false; +} + +static int daemon__wait(struct daemon *daemon, int secs) +{ + struct pollfd pollfd = { + .fd = daemon->signal_fd, + .events = POLLIN, + }; + time_t start; + + start = time(NULL); + + do { + int err = poll(&pollfd, 1, 1000); + + if (err > 0) { + handle_signalfd(daemon); + } else if (err < 0) { + perror("failed: poll\n"); + return -1; + } + + if (start + secs < time(NULL)) + return -1; + } while (daemon__has_alive_session(daemon)); + + return 0; +} + +static int daemon_session__control(struct daemon_session *session, + const char *msg, bool do_ack) +{ + struct pollfd pollfd = { .events = POLLIN, }; + char control_path[PATH_MAX]; + char ack_path[PATH_MAX]; + int control, ack = -1, len; + char buf[20]; + int ret = -1; + ssize_t err; + + /* open the control file */ + scnprintf(control_path, sizeof(control_path), "%s/%s", + session->base, SESSION_CONTROL); + + control = open(control_path, O_WRONLY|O_NONBLOCK); + if (!control) + return -1; + + if (do_ack) { + /* open the ack file */ + scnprintf(ack_path, sizeof(ack_path), "%s/%s", + session->base, SESSION_ACK); + + ack = open(ack_path, O_RDONLY, O_NONBLOCK); + if (!ack) { + close(control); + return -1; + } + } + + /* write the command */ + len = strlen(msg); + + err = writen(control, msg, len); + if (err != len) { + pr_err("failed: write to control pipe: %d (%s)\n", + errno, control_path); + goto out; + } + + if (!do_ack) + goto out; + + /* wait for an ack */ + pollfd.fd = ack; + + if (!poll(&pollfd, 1, 2000)) { + pr_err("failed: control ack timeout\n"); + goto out; + } + + if (!(pollfd.revents & POLLIN)) { + pr_err("failed: did not received an ack\n"); + goto out; + } + + err = read(ack, buf, sizeof(buf)); + if (err > 0) + ret = strcmp(buf, "ack\n"); + else + perror("failed: read ack %d\n"); + +out: + if (ack != -1) + close(ack); + + close(control); + return ret; +} + +static int setup_server_socket(struct daemon *daemon) +{ + struct sockaddr_un addr; + char path[PATH_MAX]; + int fd = socket(AF_UNIX, SOCK_STREAM, 0); + + if (fd < 0) { + fprintf(stderr, "socket: %s\n", strerror(errno)); + return -1; + } + + if (fcntl(fd, F_SETFD, FD_CLOEXEC)) { + perror("failed: fcntl FD_CLOEXEC"); + close(fd); + return -1; + } + + scnprintf(path, sizeof(path), "%s/control", daemon->base); + + if (strlen(path) + 1 >= sizeof(addr.sun_path)) { + pr_err("failed: control path too long '%s'\n", path); + close(fd); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + + strlcpy(addr.sun_path, path, sizeof(addr.sun_path) - 1); + unlink(path); + + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + perror("failed: bind"); + close(fd); + return -1; + } + + if (listen(fd, 1) == -1) { + perror("failed: listen"); + close(fd); + return -1; + } + + return fd; +} + +enum { + CMD_LIST = 0, + CMD_SIGNAL = 1, + CMD_STOP = 2, + CMD_PING = 3, + CMD_MAX, +}; + +#define SESSION_MAX 64 + +union cmd { + int cmd; + + /* CMD_LIST */ + struct { + int cmd; + int verbose; + char csv_sep; + } list; + + /* CMD_SIGNAL */ + struct { + int cmd; + int sig; + char name[SESSION_MAX]; + } signal; + + /* CMD_PING */ + struct { + int cmd; + char name[SESSION_MAX]; + } ping; +}; + +enum { + PING_OK = 0, + PING_FAIL = 1, + PING_MAX, +}; + +static int daemon_session__ping(struct daemon_session *session) +{ + return daemon_session__control(session, "ping", true) ? PING_FAIL : PING_OK; +} + +static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) +{ + char csv_sep = cmd->list.csv_sep; + struct daemon_session *session; + time_t curr = time(NULL); + + if (csv_sep) { + fprintf(out, "%d%c%s%c%s%c%s/%s", + /* pid daemon */ + getpid(), csv_sep, "daemon", + /* base */ + csv_sep, daemon->base, + /* output */ + csv_sep, daemon->base, SESSION_OUTPUT); + + fprintf(out, "%c%s/%s", + /* lock */ + csv_sep, daemon->base, "lock"); + + fprintf(out, "%c%lu", + /* session up time */ + csv_sep, (curr - daemon->start) / 60); + + fprintf(out, "\n"); + } else { + fprintf(out, "[%d:daemon] base: %s\n", getpid(), daemon->base); + if (cmd->list.verbose) { + fprintf(out, " output: %s/%s\n", + daemon->base, SESSION_OUTPUT); + fprintf(out, " lock: %s/lock\n", + daemon->base); + fprintf(out, " up: %lu minutes\n", + (curr - daemon->start) / 60); + } + } + + list_for_each_entry(session, &daemon->sessions, list) { + if (csv_sep) { + fprintf(out, "%d%c%s%c%s", + /* pid */ + session->pid, + /* name */ + csv_sep, session->name, + /* base */ + csv_sep, session->run); + + fprintf(out, "%c%s%c%s/%s", + /* session dir */ + csv_sep, session->base, + /* session output */ + csv_sep, session->base, SESSION_OUTPUT); + + fprintf(out, "%c%s/%s%c%s/%s", + /* session control */ + csv_sep, session->base, SESSION_CONTROL, + /* session ack */ + csv_sep, session->base, SESSION_ACK); + + fprintf(out, "%c%lu", + /* session up time */ + csv_sep, (curr - session->start) / 60); + + fprintf(out, "\n"); + } else { + fprintf(out, "[%d:%s] perf record %s\n", + session->pid, session->name, session->run); + if (!cmd->list.verbose) + continue; + fprintf(out, " base: %s\n", + session->base); + fprintf(out, " output: %s/%s\n", + session->base, SESSION_OUTPUT); + fprintf(out, " control: %s/%s\n", + session->base, SESSION_CONTROL); + fprintf(out, " ack: %s/%s\n", + session->base, SESSION_ACK); + fprintf(out, " up: %lu minutes\n", + (curr - session->start) / 60); + } + } + + return 0; +} + +static int daemon_session__signal(struct daemon_session *session, int sig) +{ + if (session->pid < 0) + return -1; + return kill(session->pid, sig); +} + +static int cmd_session_kill(struct daemon *daemon, union cmd *cmd, FILE *out) +{ + struct daemon_session *session; + bool all = false; + + all = !strcmp(cmd->signal.name, "all"); + + list_for_each_entry(session, &daemon->sessions, list) { + if (all || !strcmp(cmd->signal.name, session->name)) { + daemon_session__signal(session, cmd->signal.sig); + fprintf(out, "signal %d sent to session '%s [%d]'\n", + cmd->signal.sig, session->name, session->pid); + } + } + + return 0; +} + +static const char *ping_str[PING_MAX] = { + [PING_OK] = "OK", + [PING_FAIL] = "FAIL", +}; + +static int cmd_session_ping(struct daemon *daemon, union cmd *cmd, FILE *out) +{ + struct daemon_session *session; + bool all = false, found = false; + + all = !strcmp(cmd->ping.name, "all"); + + list_for_each_entry(session, &daemon->sessions, list) { + if (all || !strcmp(cmd->ping.name, session->name)) { + int state = daemon_session__ping(session); + + fprintf(out, "%-4s %s\n", ping_str[state], session->name); + found = true; + } + } + + if (!found && !all) { + fprintf(out, "%-4s %s (not found)\n", + ping_str[PING_FAIL], cmd->ping.name); + } + return 0; +} + +static int handle_server_socket(struct daemon *daemon, int sock_fd) +{ + int ret = -1, fd; + FILE *out = NULL; + union cmd cmd; + + fd = accept(sock_fd, NULL, NULL); + if (fd < 0) { + perror("failed: accept"); + return -1; + } + + if (sizeof(cmd) != readn(fd, &cmd, sizeof(cmd))) { + perror("failed: read"); + goto out; + } + + out = fdopen(fd, "w"); + if (!out) { + perror("failed: fdopen"); + goto out; + } + + switch (cmd.cmd) { + case CMD_LIST: + ret = cmd_session_list(daemon, &cmd, out); + break; + case CMD_SIGNAL: + ret = cmd_session_kill(daemon, &cmd, out); + break; + case CMD_STOP: + done = 1; + ret = 0; + pr_debug("perf daemon is exciting\n"); + break; + case CMD_PING: + ret = cmd_session_ping(daemon, &cmd, out); + break; + default: + break; + } + + fclose(out); +out: + /* If out is defined, then fd is closed via fclose. */ + if (!out) + close(fd); + return ret; +} + +static int setup_client_socket(struct daemon *daemon) +{ + struct sockaddr_un addr; + char path[PATH_MAX]; + int fd = socket(AF_UNIX, SOCK_STREAM, 0); + + if (fd == -1) { + perror("failed: socket"); + return -1; + } + + scnprintf(path, sizeof(path), "%s/control", daemon->base); + + if (strlen(path) + 1 >= sizeof(addr.sun_path)) { + pr_err("failed: control path too long '%s'\n", path); + close(fd); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strlcpy(addr.sun_path, path, sizeof(addr.sun_path) - 1); + + if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) { + perror("failed: connect"); + close(fd); + return -1; + } + + return fd; +} + +static void daemon_session__kill(struct daemon_session *session, + struct daemon *daemon) +{ + int how = 0; + + do { + switch (how) { + case 0: + daemon_session__control(session, "stop", false); + break; + case 1: + daemon_session__signal(session, SIGTERM); + break; + case 2: + daemon_session__signal(session, SIGKILL); + break; + default: + break; + } + how++; + + } while (daemon_session__wait(session, daemon, 10)); +} + +static void daemon__signal(struct daemon *daemon, int sig) +{ + struct daemon_session *session; + + list_for_each_entry(session, &daemon->sessions, list) + daemon_session__signal(session, sig); +} + +static void daemon_session__delete(struct daemon_session *session) +{ + free(session->base); + free(session->name); + free(session->run); + free(session); +} + +static void daemon_session__remove(struct daemon_session *session) +{ + list_del(&session->list); + daemon_session__delete(session); +} + +static void daemon__stop(struct daemon *daemon) +{ + struct daemon_session *session; + + list_for_each_entry(session, &daemon->sessions, list) + daemon_session__control(session, "stop", false); +} + +static void daemon__kill(struct daemon *daemon) +{ + int how = 0; + + do { + switch (how) { + case 0: + daemon__stop(daemon); + break; + case 1: + daemon__signal(daemon, SIGTERM); + break; + case 2: + daemon__signal(daemon, SIGKILL); + break; + default: + break; + } + how++; + + } while (daemon__wait(daemon, 10)); +} + +static void daemon__exit(struct daemon *daemon) +{ + struct daemon_session *session, *h; + + list_for_each_entry_safe(session, h, &daemon->sessions, list) + daemon_session__remove(session); + + free(daemon->config_real); + free(daemon->config_base); + free(daemon->base); +} + +static int daemon__reconfig(struct daemon *daemon) +{ + struct daemon_session *session, *n; + + list_for_each_entry_safe(session, n, &daemon->sessions, list) { + /* No change. */ + if (session->state == OK) + continue; + + /* Remove session. */ + if (session->state == KILL) { + if (session->pid > 0) { + daemon_session__kill(session, daemon); + pr_info("reconfig: session '%s' killed\n", session->name); + } + daemon_session__remove(session); + continue; + } + + /* Reconfig session. */ + if (session->pid > 0) { + daemon_session__kill(session, daemon); + pr_info("reconfig: session '%s' killed\n", session->name); + } + if (daemon_session__run(session, daemon)) + return -1; + + session->state = OK; + } + + return 0; +} + +static int setup_config_changes(struct daemon *daemon) +{ + char *basen = strdup(daemon->config_real); + char *dirn = strdup(daemon->config_real); + char *base, *dir; + int fd, wd = -1; + + if (!dirn || !basen) + goto out; + + fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC); + if (fd < 0) { + perror("failed: inotify_init"); + goto out; + } + + dir = dirname(dirn); + base = basename(basen); + pr_debug("config file: %s, dir: %s\n", base, dir); + + wd = inotify_add_watch(fd, dir, IN_CLOSE_WRITE); + if (wd >= 0) { + daemon->config_base = strdup(base); + if (!daemon->config_base) { + close(fd); + wd = -1; + } + } else { + perror("failed: inotify_add_watch"); + } + +out: + free(basen); + free(dirn); + return wd < 0 ? -1 : fd; +} + +static bool process_inotify_event(struct daemon *daemon, char *buf, ssize_t len) +{ + char *p = buf; + + while (p < (buf + len)) { + struct inotify_event *event = (struct inotify_event *) p; + + /* + * We monitor config directory, check if our + * config file was changes. + */ + if ((event->mask & IN_CLOSE_WRITE) && + !(event->mask & IN_ISDIR)) { + if (!strcmp(event->name, daemon->config_base)) + return true; + } + p += sizeof(*event) + event->len; + } + return false; +} + +static int handle_config_changes(struct daemon *daemon, int conf_fd, + bool *config_changed) +{ + char buf[4096]; + ssize_t len; + + while (!(*config_changed)) { + len = read(conf_fd, buf, sizeof(buf)); + if (len == -1) { + if (errno != EAGAIN) { + perror("failed: read"); + return -1; + } + return 0; + } + *config_changed = process_inotify_event(daemon, buf, len); + } + return 0; +} + +static int setup_config(struct daemon *daemon) +{ + if (daemon->base_user) { + daemon->base = strdup(daemon->base_user); + if (!daemon->base) + return -ENOMEM; + } + + if (daemon->config) { + char *real = realpath(daemon->config, NULL); + + if (!real) { + perror("failed: realpath"); + return -1; + } + daemon->config_real = real; + return 0; + } + + if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) + daemon->config_real = strdup(perf_etc_perfconfig()); + else if (perf_config_global() && perf_home_perfconfig()) + daemon->config_real = strdup(perf_home_perfconfig()); + + return daemon->config_real ? 0 : -1; +} + +#ifndef F_TLOCK +#define F_TLOCK 2 + +#include <sys/file.h> + +static int lockf(int fd, int cmd, off_t len) +{ + if (cmd != F_TLOCK || len != 0) + return -1; + + return flock(fd, LOCK_EX | LOCK_NB); +} +#endif // F_TLOCK + +/* + * Each daemon tries to create and lock BASE/lock file, + * if it's successful we are sure we're the only daemon + * running over the BASE. + * + * Once daemon is finished, file descriptor to lock file + * is closed and lock is released. + */ +static int check_lock(struct daemon *daemon) +{ + char path[PATH_MAX]; + char buf[20]; + int fd, pid; + ssize_t len; + + scnprintf(path, sizeof(path), "%s/lock", daemon->base); + + fd = open(path, O_RDWR|O_CREAT|O_CLOEXEC, 0640); + if (fd < 0) + return -1; + + if (lockf(fd, F_TLOCK, 0) < 0) { + filename__read_int(path, &pid); + fprintf(stderr, "failed: another perf daemon (pid %d) owns %s\n", + pid, daemon->base); + close(fd); + return -1; + } + + scnprintf(buf, sizeof(buf), "%d", getpid()); + len = strlen(buf); + + if (write(fd, buf, len) != len) { + perror("failed: write"); + close(fd); + return -1; + } + + if (ftruncate(fd, len)) { + perror("failed: ftruncate"); + close(fd); + return -1; + } + + return 0; +} + +static int go_background(struct daemon *daemon) +{ + int pid, fd; + + pid = fork(); + if (pid < 0) + return -1; + + if (pid > 0) + return 1; + + if (setsid() < 0) + return -1; + + if (check_lock(daemon)) + return -1; + + umask(0); + + if (chdir(daemon->base)) { + perror("failed: chdir"); + return -1; + } + + fd = open("output", O_RDWR|O_CREAT|O_TRUNC, 0644); + if (fd < 0) { + perror("failed: open"); + return -1; + } + + if (fcntl(fd, F_SETFD, FD_CLOEXEC)) { + perror("failed: fcntl FD_CLOEXEC"); + close(fd); + return -1; + } + + close(0); + dup2(fd, 1); + dup2(fd, 2); + close(fd); + + daemon->out = fdopen(1, "w"); + if (!daemon->out) { + close(1); + close(2); + return -1; + } + + setbuf(daemon->out, NULL); + return 0; +} + +static int setup_signalfd(struct daemon *daemon) +{ + sigset_t mask; + + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + + if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1) + return -1; + + daemon->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC); + return daemon->signal_fd; +} + +static int __cmd_start(struct daemon *daemon, struct option parent_options[], + int argc, const char **argv) +{ + bool foreground = false; + struct option start_options[] = { + OPT_BOOLEAN('f', "foreground", &foreground, "stay on console"), + OPT_PARENT(parent_options), + OPT_END() + }; + int sock_fd = -1, conf_fd = -1, signal_fd = -1; + int sock_pos, file_pos, signal_pos; + struct fdarray fda; + int err = 0; + + argc = parse_options(argc, argv, start_options, daemon_usage, 0); + if (argc) + usage_with_options(daemon_usage, start_options); + + daemon->start = time(NULL); + + if (setup_config(daemon)) { + pr_err("failed: config not found\n"); + return -1; + } + + if (setup_server_config(daemon)) + return -1; + + if (foreground && check_lock(daemon)) + return -1; + + if (!foreground) { + err = go_background(daemon); + if (err) { + /* original process, exit normally */ + if (err == 1) + err = 0; + daemon__exit(daemon); + return err; + } + } + + debug_set_file(daemon->out); + debug_set_display_time(true); + + pr_info("daemon started (pid %d)\n", getpid()); + + fdarray__init(&fda, 3); + + sock_fd = setup_server_socket(daemon); + if (sock_fd < 0) + goto out; + + conf_fd = setup_config_changes(daemon); + if (conf_fd < 0) + goto out; + + signal_fd = setup_signalfd(daemon); + if (signal_fd < 0) + goto out; + + sock_pos = fdarray__add(&fda, sock_fd, POLLIN|POLLERR|POLLHUP, 0); + if (sock_pos < 0) + goto out; + + file_pos = fdarray__add(&fda, conf_fd, POLLIN|POLLERR|POLLHUP, 0); + if (file_pos < 0) + goto out; + + signal_pos = fdarray__add(&fda, signal_fd, POLLIN|POLLERR|POLLHUP, 0); + if (signal_pos < 0) + goto out; + + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + signal(SIGPIPE, SIG_IGN); + + while (!done && !err) { + err = daemon__reconfig(daemon); + + if (!err && fdarray__poll(&fda, -1)) { + bool reconfig = false; + + if (fda.entries[sock_pos].revents & POLLIN) + err = handle_server_socket(daemon, sock_fd); + if (fda.entries[file_pos].revents & POLLIN) + err = handle_config_changes(daemon, conf_fd, &reconfig); + if (fda.entries[signal_pos].revents & POLLIN) + err = handle_signalfd(daemon) < 0; + + if (reconfig) + err = setup_server_config(daemon); + } + } + +out: + fdarray__exit(&fda); + + daemon__kill(daemon); + daemon__exit(daemon); + + if (sock_fd != -1) + close(sock_fd); + if (conf_fd != -1) + close(conf_fd); + if (conf_fd != -1) + close(signal_fd); + + pr_info("daemon exited\n"); + fclose(daemon->out); + return err; +} + +static int send_cmd(struct daemon *daemon, union cmd *cmd) +{ + int ret = -1, fd; + char *line = NULL; + size_t len = 0; + ssize_t nread; + FILE *in = NULL; + + if (setup_client_config(daemon)) + return -1; + + fd = setup_client_socket(daemon); + if (fd < 0) + return -1; + + if (sizeof(*cmd) != writen(fd, cmd, sizeof(*cmd))) { + perror("failed: write"); + goto out; + } + + in = fdopen(fd, "r"); + if (!in) { + perror("failed: fdopen"); + goto out; + } + + while ((nread = getline(&line, &len, in)) != -1) { + if (fwrite(line, nread, 1, stdout) != 1) + goto out_fclose; + fflush(stdout); + } + + ret = 0; +out_fclose: + fclose(in); + free(line); +out: + /* If in is defined, then fd is closed via fclose. */ + if (!in) + close(fd); + return ret; +} + +static int send_cmd_list(struct daemon *daemon) +{ + union cmd cmd = { .cmd = CMD_LIST, }; + + cmd.list.verbose = verbose; + cmd.list.csv_sep = daemon->csv_sep ? *daemon->csv_sep : 0; + + return send_cmd(daemon, &cmd); +} + +static int __cmd_signal(struct daemon *daemon, struct option parent_options[], + int argc, const char **argv) +{ + const char *name = "all"; + struct option start_options[] = { + OPT_STRING(0, "session", &name, "session", + "Sent signal to specific session"), + OPT_PARENT(parent_options), + OPT_END() + }; + union cmd cmd; + + argc = parse_options(argc, argv, start_options, daemon_usage, 0); + if (argc) + usage_with_options(daemon_usage, start_options); + + if (setup_config(daemon)) { + pr_err("failed: config not found\n"); + return -1; + } + + cmd.signal.cmd = CMD_SIGNAL, + cmd.signal.sig = SIGUSR2; + strncpy(cmd.signal.name, name, sizeof(cmd.signal.name) - 1); + + return send_cmd(daemon, &cmd); +} + +static int __cmd_stop(struct daemon *daemon, struct option parent_options[], + int argc, const char **argv) +{ + struct option start_options[] = { + OPT_PARENT(parent_options), + OPT_END() + }; + union cmd cmd = { .cmd = CMD_STOP, }; + + argc = parse_options(argc, argv, start_options, daemon_usage, 0); + if (argc) + usage_with_options(daemon_usage, start_options); + + if (setup_config(daemon)) { + pr_err("failed: config not found\n"); + return -1; + } + + return send_cmd(daemon, &cmd); +} + +static int __cmd_ping(struct daemon *daemon, struct option parent_options[], + int argc, const char **argv) +{ + const char *name = "all"; + struct option ping_options[] = { + OPT_STRING(0, "session", &name, "session", + "Ping to specific session"), + OPT_PARENT(parent_options), + OPT_END() + }; + union cmd cmd = { .cmd = CMD_PING, }; + + argc = parse_options(argc, argv, ping_options, daemon_usage, 0); + if (argc) + usage_with_options(daemon_usage, ping_options); + + if (setup_config(daemon)) { + pr_err("failed: config not found\n"); + return -1; + } + + scnprintf(cmd.ping.name, sizeof(cmd.ping.name), "%s", name); + return send_cmd(daemon, &cmd); +} + +int cmd_daemon(int argc, const char **argv) +{ + struct option daemon_options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_STRING(0, "config", &__daemon.config, + "config file", "config file path"), + OPT_STRING(0, "base", &__daemon.base_user, + "directory", "base directory"), + OPT_STRING_OPTARG('x', "field-separator", &__daemon.csv_sep, + "field separator", "print counts with custom separator", ","), + OPT_END() + }; + + perf_exe(__daemon.perf, sizeof(__daemon.perf)); + __daemon.out = stdout; + + argc = parse_options(argc, argv, daemon_options, daemon_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + if (argc) { + if (!strcmp(argv[0], "start")) + return __cmd_start(&__daemon, daemon_options, argc, argv); + if (!strcmp(argv[0], "signal")) + return __cmd_signal(&__daemon, daemon_options, argc, argv); + else if (!strcmp(argv[0], "stop")) + return __cmd_stop(&__daemon, daemon_options, argc, argv); + else if (!strcmp(argv[0], "ping")) + return __cmd_ping(&__daemon, daemon_options, argc, argv); + + pr_err("failed: unknown command '%s'\n", argv[0]); + return -1; + } + + if (setup_config(&__daemon)) { + pr_err("failed: config not found\n"); + return -1; + } + + return send_cmd_list(&__daemon); +} diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 43937f4b399a..6fe44d97fde5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -313,7 +313,7 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool, * if jit marker, then inject jit mmaps and generate ELF images */ ret = jit_process(inject->session, &inject->output, machine, - event->mmap.filename, event->mmap.pid, &n); + event->mmap.filename, event->mmap.pid, event->mmap.tid, &n); if (ret < 0) return ret; if (ret) { @@ -413,7 +413,7 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, * if jit marker, then inject jit mmaps and generate ELF images */ ret = jit_process(inject->session, &inject->output, machine, - event->mmap2.filename, event->mmap2.pid, &n); + event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n); if (ret < 0) return ret; if (ret) { diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 823742036ddb..cdd2b9f643f6 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -30,6 +30,7 @@ struct perf_mem { bool dump_raw; bool force; bool phys_addr; + bool data_page_size; int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -124,6 +125,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (mem->phys_addr) rec_argv[i++] = "--phys-data"; + if (mem->data_page_size) + rec_argv[i++] = "--data-page-size"; + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { e = perf_mem_events__ptr(j); if (!e->record) @@ -172,7 +176,8 @@ dump_raw_samples(struct perf_tool *tool, { struct perf_mem *mem = container_of(tool, struct perf_mem, tool); struct addr_location al; - const char *fmt; + const char *fmt, *field_sep; + char str[PAGE_SIZE_NAME_LEN]; if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", @@ -186,60 +191,47 @@ dump_raw_samples(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - if (mem->phys_addr) { - if (symbol_conf.field_sep) { - fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64 - "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; - } else { - fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 - "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64 - "%s%s:%s\n"; - symbol_conf.field_sep = " "; - } + field_sep = symbol_conf.field_sep; + if (field_sep) { + fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s"; + } else { + fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64"%s"; + symbol_conf.field_sep = " "; + } + printf(fmt, + sample->pid, + symbol_conf.field_sep, + sample->tid, + symbol_conf.field_sep, + sample->ip, + symbol_conf.field_sep, + sample->addr, + symbol_conf.field_sep); - printf(fmt, - sample->pid, - symbol_conf.field_sep, - sample->tid, - symbol_conf.field_sep, - sample->ip, - symbol_conf.field_sep, - sample->addr, - symbol_conf.field_sep, + if (mem->phys_addr) { + printf("0x%016"PRIx64"%s", sample->phys_addr, - symbol_conf.field_sep, - sample->weight, - symbol_conf.field_sep, - sample->data_src, - symbol_conf.field_sep, - al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", - al.sym ? al.sym->name : "???"); - } else { - if (symbol_conf.field_sep) { - fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 - "%s0x%"PRIx64"%s%s:%s\n"; - } else { - fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 - "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; - symbol_conf.field_sep = " "; - } + symbol_conf.field_sep); + } - printf(fmt, - sample->pid, - symbol_conf.field_sep, - sample->tid, - symbol_conf.field_sep, - sample->ip, - symbol_conf.field_sep, - sample->addr, - symbol_conf.field_sep, - sample->weight, - symbol_conf.field_sep, - sample->data_src, - symbol_conf.field_sep, - al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", - al.sym ? al.sym->name : "???"); + if (mem->data_page_size) { + printf("%s%s", + get_page_size_name(sample->data_page_size, str), + symbol_conf.field_sep); } + + if (field_sep) + fmt = "%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; + else + fmt = "%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; + + printf(fmt, + sample->weight, + symbol_conf.field_sep, + sample->data_src, + symbol_conf.field_sep, + al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", + al.sym ? al.sym->name : "???"); out_put: addr_location__put(&al); return 0; @@ -287,10 +279,15 @@ static int report_raw_events(struct perf_mem *mem) if (ret < 0) goto out_delete; + printf("# PID, TID, IP, ADDR, "); + if (mem->phys_addr) - printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - else - printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); + printf("PHYS ADDR, "); + + if (mem->data_page_size) + printf("DATA PAGE SIZE, "); + + printf("LOCAL WEIGHT, DSRC, SYMBOL\n"); ret = perf_session__process_events(session); @@ -300,7 +297,7 @@ out_delete: } static char *get_sort_order(struct perf_mem *mem) { - bool has_extra_options = mem->phys_addr ? true : false; + bool has_extra_options = (mem->phys_addr | mem->data_page_size) ? true : false; char sort[128]; /* @@ -312,13 +309,16 @@ static char *get_sort_order(struct perf_mem *mem) "dso_daddr,tlb,locked"); } else if (has_extra_options) { strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr," - "dso_daddr,snoop,tlb,locked"); + "dso_daddr,snoop,tlb,locked,blocked"); } else return NULL; if (mem->phys_addr) strcat(sort, ",phys_daddr"); + if (mem->data_page_size) + strcat(sort, ",data_page_size"); + return strdup(sort); } @@ -464,6 +464,7 @@ int cmd_mem(int argc, const char **argv) " between columns '.' is reserved."), OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"), + OPT_BOOLEAN(0, "data-page-size", &mem.data_page_size, "Record/Report sample data address page size"), OPT_END() }; const char *const mem_subcommands[] = { "record", "report", NULL }; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fd3911650612..35465d1db6dd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -102,6 +102,7 @@ struct record { bool no_buildid_cache; bool no_buildid_cache_set; bool buildid_all; + bool buildid_mmap; bool timestamp_filename; bool timestamp_boundary; struct switch_output switch_output; @@ -730,6 +731,8 @@ static int record__auxtrace_init(struct record *rec) if (err) return err; + auxtrace_regroup_aux_output(rec->evlist); + return auxtrace_parse_filters(rec->evlist); } @@ -1663,7 +1666,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) status = -1; goto out_delete_session; } - err = evlist__add_pollfd(rec->evlist, done_fd); + err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); if (err < 0) { pr_err("Failed to add wakeup eventfd to poll list\n"); status = err; @@ -1937,18 +1940,19 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { switch (cmd) { - case EVLIST_CTL_CMD_ENABLE: - pr_info(EVLIST_ENABLED_MSG); - break; - case EVLIST_CTL_CMD_DISABLE: - pr_info(EVLIST_DISABLED_MSG); - break; case EVLIST_CTL_CMD_SNAPSHOT: hit_auxtrace_snapshot_trigger(rec); evlist__ctlfd_ack(rec->evlist); break; + case EVLIST_CTL_CMD_STOP: + done = 1; + break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: + case EVLIST_CTL_CMD_ENABLE: + case EVLIST_CTL_CMD_DISABLE: + case EVLIST_CTL_CMD_EVLIST: + case EVLIST_CTL_CMD_PING: default: break; } @@ -2135,6 +2139,8 @@ static int perf_record_config(const char *var, const char *value, void *cb) rec->no_buildid_cache = true; else if (!strcmp(value, "skip")) rec->no_buildid = true; + else if (!strcmp(value, "mmap")) + rec->buildid_mmap = true; else return -1; return 0; @@ -2474,6 +2480,8 @@ static struct option __record_options[] = { "Record the sample physical addresses"), OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, "Record the sampled data address data page size"), + OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, + "Record the sampled code address (ip) page size"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, @@ -2552,6 +2560,8 @@ static struct option __record_options[] = { "file", "vmlinux pathname"), OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, "Record build-id of all DSOs regardless of hits"), + OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, + "Record build-id in map events"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, @@ -2655,6 +2665,21 @@ int cmd_record(int argc, const char **argv) } + if (rec->buildid_mmap) { + if (!perf_can_record_build_id()) { + pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); + err = -EINVAL; + goto out_opts; + } + pr_debug("Enabling build id in mmap2 events.\n"); + /* Enable mmap build id synthesizing. */ + symbol_conf.buildid_mmap2 = true; + /* Enable perf_event_attr::build_id bit. */ + rec->opts.build_id = true; + /* Disable build id cache. */ + rec->no_buildid = true; + } + if (rec->opts.kcore) rec->data.is_dir = true; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index edacfa98d073..5915f19cee55 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -117,6 +117,7 @@ enum perf_output_field { PERF_OUTPUT_IPC = 1ULL << 31, PERF_OUTPUT_TOD = 1ULL << 32, PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, + PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34, }; struct perf_script { @@ -182,10 +183,12 @@ struct output_option { {.str = "ipc", .field = PERF_OUTPUT_IPC}, {.str = "tod", .field = PERF_OUTPUT_TOD}, {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, + {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE}, }; enum { OUTPUT_TYPE_SYNTH = PERF_TYPE_MAX, + OUTPUT_TYPE_OTHER, OUTPUT_TYPE_MAX }; @@ -255,7 +258,7 @@ static struct { PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | - PERF_OUTPUT_DATA_PAGE_SIZE, + PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -283,6 +286,18 @@ static struct { .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, + + [OUTPUT_TYPE_OTHER] = { + .user_set = false, + + .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | + PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, + + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, + }, }; struct evsel_script { @@ -343,8 +358,11 @@ static inline int output_type(unsigned int type) case PERF_TYPE_SYNTH: return OUTPUT_TYPE_SYNTH; default: - return type; + if (type < PERF_TYPE_MAX) + return type; } + + return OUTPUT_TYPE_OTHER; } static inline unsigned int attr_type(unsigned int type) @@ -507,6 +525,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_DATA_PAGE_SIZE, "DATA_PAGE_SIZE", PERF_OUTPUT_DATA_PAGE_SIZE)) return -EINVAL; + if (PRINT_FIELD(CODE_PAGE_SIZE) && + evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE)) + return -EINVAL; + return 0; } @@ -1515,6 +1537,8 @@ static struct { {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"}, {0, NULL} }; @@ -1744,6 +1768,18 @@ static int perf_sample__fprintf_synth_cbr(struct perf_sample *sample, FILE *fp) return len + perf_sample__fprintf_pt_spacing(len, fp); } +static int perf_sample__fprintf_synth_psb(struct perf_sample *sample, FILE *fp) +{ + struct perf_synth_intel_psb *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return 0; + + len = fprintf(fp, " psb offs: %#" PRIx64, data->offset); + return len + perf_sample__fprintf_pt_spacing(len, fp); +} + static int perf_sample__fprintf_synth(struct perf_sample *sample, struct evsel *evsel, FILE *fp) { @@ -1760,6 +1796,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return perf_sample__fprintf_synth_pwrx(sample, fp); case PERF_SYNTH_INTEL_CBR: return perf_sample__fprintf_synth_cbr(sample, fp); + case PERF_SYNTH_INTEL_PSB: + return perf_sample__fprintf_synth_psb(sample, fp); default: break; } @@ -2020,6 +2058,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(DATA_PAGE_SIZE)) fprintf(fp, " %s", get_page_size_name(sample->data_page_size, str)); + if (PRINT_FIELD(CODE_PAGE_SIZE)) + fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str)); + perf_sample__fprintf_ipc(sample, attr, fp); fprintf(fp, "\n"); @@ -2770,7 +2811,7 @@ parse: break; } if (i == imax && strcmp(tok, "flags") == 0) { - print_flags = change == REMOVE ? false : true; + print_flags = change != REMOVE; continue; } if (i == imax) { @@ -3218,7 +3259,7 @@ static char *get_script_path(const char *script_root, const char *suffix) static bool is_top_script(const char *script_path) { - return ends_with(script_path, "top") == NULL ? false : true; + return ends_with(script_path, "top") != NULL; } static int has_required_arg(char *script_path) @@ -3519,12 +3560,16 @@ int cmd_script(int argc, const char **argv) "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," - "data_page_size", + "data_page_size,code_page_size", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), + OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", + "only consider symbols in these DSOs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), + OPT_INTEGER(0, "addr-range", &symbol_conf.addr_range, + "Use with -S to list traced records within address range"), OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL, "Decode instructions from itrace", parse_insn_trace), OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8cc24967bc27..2e2e4a8345ea 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -67,6 +67,7 @@ #include "util/top.h" #include "util/affinity.h" #include "util/pfm.h" +#include "util/bpf_counter.h" #include "asm/bug.h" #include <linux/time64.h> @@ -137,6 +138,19 @@ static const char *topdown_metric_attrs[] = { NULL, }; +static const char *topdown_metric_L2_attrs[] = { + "slots", + "topdown-retiring", + "topdown-bad-spec", + "topdown-fe-bound", + "topdown-be-bound", + "topdown-heavy-ops", + "topdown-br-mispredict", + "topdown-fetch-lat", + "topdown-mem-bound", + NULL, +}; + static const char *smi_cost_attrs = { "{" "msr/aperf/," @@ -409,12 +423,32 @@ static int read_affinity_counters(struct timespec *rs) return 0; } +static int read_bpf_map_counters(void) +{ + struct evsel *counter; + int err; + + evlist__for_each_entry(evsel_list, counter) { + err = bpf_counter__read(counter); + if (err) + return err; + } + return 0; +} + static void read_counters(struct timespec *rs) { struct evsel *counter; + int err; - if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0)) - return; + if (!stat_config.stop_read_counter) { + if (target__has_bpf(&target)) + err = read_bpf_map_counters(); + else + err = read_affinity_counters(rs); + if (err < 0) + return; + } evlist__for_each_entry(evsel_list, counter) { if (counter->err) @@ -496,11 +530,22 @@ static bool handle_interval(unsigned int interval, int *times) return false; } -static void enable_counters(void) +static int enable_counters(void) { + struct evsel *evsel; + int err; + + if (target__has_bpf(&target)) { + evlist__for_each_entry(evsel_list, evsel) { + err = bpf_counter__enable(evsel); + if (err) + return err; + } + } + if (stat_config.initial_delay < 0) { pr_info(EVLIST_DISABLED_MSG); - return; + return 0; } if (stat_config.initial_delay > 0) { @@ -518,6 +563,7 @@ static void enable_counters(void) if (stat_config.initial_delay > 0) pr_info(EVLIST_ENABLED_MSG); } + return 0; } static void disable_counters(void) @@ -578,18 +624,19 @@ static void process_evlist(struct evlist *evlist, unsigned int interval) if (evlist__ctlfd_process(evlist, &cmd) > 0) { switch (cmd) { case EVLIST_CTL_CMD_ENABLE: - pr_info(EVLIST_ENABLED_MSG); if (interval) process_interval(); break; case EVLIST_CTL_CMD_DISABLE: if (interval) process_interval(); - pr_info(EVLIST_DISABLED_MSG); break; case EVLIST_CTL_CMD_SNAPSHOT: case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: + case EVLIST_CTL_CMD_EVLIST: + case EVLIST_CTL_CMD_STOP: + case EVLIST_CTL_CMD_PING: default: break; } @@ -720,7 +767,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; struct affinity affinity; - int i, cpu; + int i, cpu, err; bool second_pass = false; if (forks) { @@ -737,6 +784,13 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (affinity__setup(&affinity) < 0) return -1; + if (target__has_bpf(&target)) { + evlist__for_each_entry(evsel_list, counter) { + if (bpf_counter__load(counter, &target)) + return -1; + } + } + evlist__for_each_cpu (evsel_list, i, cpu) { affinity__set(&affinity, cpu); @@ -850,7 +904,7 @@ try_again_reset: } if (STAT_RECORD) { - int err, fd = perf_data__fd(&perf_stat.data); + int fd = perf_data__fd(&perf_stat.data); if (is_pipe) { err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); @@ -876,7 +930,9 @@ try_again_reset: if (forks) { evlist__start_workload(evsel_list); - enable_counters(); + err = enable_counters(); + if (err) + return -1; if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) status = dispatch_events(forks, timeout, interval, ×); @@ -895,7 +951,9 @@ try_again_reset: if (WIFSIGNALED(status)) psignal(WTERMSIG(status), argv[0]); } else { - enable_counters(); + err = enable_counters(); + if (err) + return -1; status = dispatch_events(forks, timeout, interval, ×); } @@ -1085,6 +1143,10 @@ static struct option stat_options[] = { "stat events on existing process id"), OPT_STRING('t', "tid", &target.tid, "tid", "stat events on existing thread id"), +#ifdef HAVE_BPF_SKEL + OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id", + "stat events on existing bpf program id"), +#endif OPT_BOOLEAN('a', "all-cpus", &target.system_wide, "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, @@ -1153,7 +1215,9 @@ static struct option stat_options[] = { OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge, "don't try to share events between metrics in a group"), OPT_BOOLEAN(0, "topdown", &topdown_run, - "measure topdown level 1 statistics"), + "measure top-down statistics"), + OPT_UINTEGER(0, "td-level", &stat_config.topdown_level, + "Set the metrics level for the top-down statistics (0: max level)"), OPT_BOOLEAN(0, "smi-cost", &smi_cost, "measure SMI cost"), OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", @@ -1706,17 +1770,30 @@ static int add_default_attributes(void) } if (topdown_run) { + const char **metric_attrs = topdown_metric_attrs; + unsigned int max_level = 1; char *str = NULL; bool warn = false; if (!force_metric_only) stat_config.metric_only = true; - if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) { + if (pmu_have_event("cpu", topdown_metric_L2_attrs[5])) { + metric_attrs = topdown_metric_L2_attrs; + max_level = 2; + } + + if (stat_config.topdown_level > max_level) { + pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level); + return -1; + } else if (!stat_config.topdown_level) + stat_config.topdown_level = max_level; + + if (topdown_filter_events(metric_attrs, &str, 1) < 0) { pr_err("Out of memory\n"); return -1; } - if (topdown_metric_attrs[0] && str) { + if (metric_attrs[0] && str) { if (!stat_config.interval && !stat_config.metric_only) { fprintf(stat_config.output, "Topdown accuracy may decrease when measuring long periods.\n" @@ -1779,6 +1856,9 @@ setup_metrics: } if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; + + if (arch_evlist__add_default_attrs(evsel_list) < 0) + return -1; } /* Detailed events get appended to the event list: */ @@ -2064,11 +2144,12 @@ int cmd_stat(int argc, const char **argv) "perf stat [<options>] [<command>]", NULL }; - int status = -EINVAL, run_idx; + int status = -EINVAL, run_idx, err; const char *mode; FILE *output = stderr; unsigned int interval, timeout; const char * const stat_subcommands[] = { "record", "report" }; + char errbuf[BUFSIZ]; setlocale(LC_ALL, ""); @@ -2179,6 +2260,12 @@ int cmd_stat(int argc, const char **argv) } else if (big_num_opt == 0) /* User passed --no-big-num */ stat_config.big_num = false; + err = target__validate(&target); + if (err) { + target__strerror(&target, err, errbuf, BUFSIZ); + pr_warning("%s\n", errbuf); + } + setup_system_wide(argc); /* @@ -2252,8 +2339,6 @@ int cmd_stat(int argc, const char **argv) } } - target__validate(&target); - if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) target.per_thread = true; @@ -2384,9 +2469,10 @@ int cmd_stat(int argc, const char **argv) * tools remain -acme */ int fd = perf_data__fd(&perf_stat.data); - int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, - process_synthesized_event, - &perf_stat.session->machines.host); + + err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, + process_synthesized_event, + &perf_stat.session->machines.host); if (err) { pr_warning("Couldn't synthesize the kernel mmap record, harmless, " "older tools may produce warnings about this file\n."); diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 14a2db622a7b..7303e80a639c 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -37,6 +37,7 @@ int cmd_inject(int argc, const char **argv); int cmd_mem(int argc, const char **argv); int cmd_data(int argc, const char **argv); int cmd_ftrace(int argc, const char **argv); +int cmd_daemon(int argc, const char **argv); int find_scripts(char **scripts_array, char **scripts_path_array, int num, int pathlen); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index bc6c585f74fc..825a12e8d694 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -31,3 +31,4 @@ perf-timechart mainporcelain common perf-top mainporcelain common perf-trace mainporcelain audit perf-version mainporcelain common +perf-daemon mainporcelain common diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index 65c4ff6892d9..e6b6181c6dc6 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,7 +39,7 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com> */ -#include <bpf/bpf.h> +#include <bpf.h> #define NSEC_PER_SEC 1000000000L diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 27f94b0bb874..20cb91ef06ff 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -88,6 +88,7 @@ static struct cmd_struct commands[] = { { "mem", cmd_mem, 0 }, { "data", cmd_data, 0 }, { "ftrace", cmd_ftrace, 0 }, + { "daemon", cmd_daemon, 0 }, }; struct pager_config { diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json index 2d15b11e5383..5c69c1e82ef8 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json @@ -9,15 +9,11 @@ "ArchStdEvent": "BR_INDIRECT_SPEC" }, { - "PublicDescription": "Mispredicted or not predicted branch speculatively executed", - "EventCode": "0x10", - "EventName": "BR_MIS_PRED", + "ArchStdEvent": "BR_MIS_PRED", "BriefDescription": "Branch mispredicted" }, { - "PublicDescription": "Predictable branch speculatively executed", - "EventCode": "0x12", - "EventName": "BR_PRED", + "ArchStdEvent": "BR_PRED", "BriefDescription": "Predictable branch" } ] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json index 5c1a9a922ca4..9bea1ba1c4d2 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json @@ -18,9 +18,6 @@ "ArchStdEvent": "BUS_ACCESS_PERIPH" }, { - "PublicDescription": "Bus access", - "EventCode": "0x19", - "EventName": "BUS_ACCESS", - "BriefDescription": "Bus access" + "ArchStdEvent": "BUS_ACCESS", } ] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json index 40010a8724b3..1e25f2ae4ae0 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json @@ -39,70 +39,40 @@ "ArchStdEvent": "L2D_CACHE_INVAL" }, { - "PublicDescription": "Level 1 instruction cache refill", - "EventCode": "0x01", - "EventName": "L1I_CACHE_REFILL", - "BriefDescription": "L1I cache refill" + "ArchStdEvent": "L1I_CACHE_REFILL", }, { - "PublicDescription": "Level 1 instruction TLB refill", - "EventCode": "0x02", - "EventName": "L1I_TLB_REFILL", - "BriefDescription": "L1I TLB refill" + "ArchStdEvent": "L1I_TLB_REFILL", }, { - "PublicDescription": "Level 1 data cache refill", - "EventCode": "0x03", - "EventName": "L1D_CACHE_REFILL", - "BriefDescription": "L1D cache refill" + "ArchStdEvent": "L1D_CACHE_REFILL", }, { - "PublicDescription": "Level 1 data cache access", - "EventCode": "0x04", - "EventName": "L1D_CACHE_ACCESS", - "BriefDescription": "L1D cache access" + "ArchStdEvent": "L1D_CACHE", }, { - "PublicDescription": "Level 1 data TLB refill", - "EventCode": "0x05", - "EventName": "L1D_TLB_REFILL", - "BriefDescription": "L1D TLB refill" + "ArchStdEvent": "L1D_TLB_REFILL", }, { - "PublicDescription": "Level 1 instruction cache access", - "EventCode": "0x14", - "EventName": "L1I_CACHE_ACCESS", - "BriefDescription": "L1I cache access" + "ArchStdEvent": "L1I_CACHE", }, { - "PublicDescription": "Level 2 data cache access", - "EventCode": "0x16", - "EventName": "L2D_CACHE_ACCESS", - "BriefDescription": "L2D cache access" + "ArchStdEvent": "L2D_CACHE", }, { - "PublicDescription": "Level 2 data refill", - "EventCode": "0x17", - "EventName": "L2D_CACHE_REFILL", - "BriefDescription": "L2D cache refill" + "ArchStdEvent": "L2D_CACHE_REFILL", }, { - "PublicDescription": "Level 2 data cache, Write-Back", - "EventCode": "0x18", - "EventName": "L2D_CACHE_WB", - "BriefDescription": "L2D cache Write-Back" + "ArchStdEvent": "L2D_CACHE_WB", }, { - "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB", - "EventCode": "0x25", - "EventName": "L1D_TLB_ACCESS", + "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB", + "ArchStdEvent": "L1D_TLB", "BriefDescription": "L1D TLB access" }, { - "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB", - "EventCode": "0x26", - "EventName": "L1I_TLB_ACCESS", - "BriefDescription": "L1I TLB access" + "PublicDescription": "This event counts any instruction fetch which accesses the instruction L1 TLB", + "ArchStdEvent": "L1I_TLB", }, { "PublicDescription": "Level 2 access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count", @@ -114,7 +84,7 @@ "PublicDescription": "Level 2 access to instruciton TLB that caused a page table walk. This event counts on any instruciton access which causes L2I_TLB_REFILL to count", "EventCode": "0x35", "EventName": "L2I_TLB_ACCESS", - "BriefDescription": "L2D TLB access" + "BriefDescription": "L2I TLB access" }, { "PublicDescription": "Branch target buffer misprediction", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json index 51d1dc1519b2..9076ca2daf9e 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json @@ -1,9 +1,7 @@ [ { "PublicDescription": "The number of core clock cycles", - "EventCode": "0x11", - "EventName": "CPU_CYCLES", - "BriefDescription": "Clock cycles" + "ArchStdEvent": "CPU_CYCLES", }, { "PublicDescription": "FSU clocking gated off cycle", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json index 66e51bc64b22..9761433ad329 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json @@ -36,15 +36,9 @@ "ArchStdEvent": "EXC_TRAP_FIQ" }, { - "PublicDescription": "Exception taken", - "EventCode": "0x09", - "EventName": "EXC_TAKEN", - "BriefDescription": "Exception taken" + "ArchStdEvent": "EXC_TAKEN", }, { - "PublicDescription": "Instruction architecturally executed, condition check pass, exception return", - "EventCode": "0x0a", - "EventName": "EXC_RETURN", - "BriefDescription": "Exception return" + "ArchStdEvent": "EXC_RETURN", } ] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json index 0d3e46776642..482aa3f19e58 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json @@ -40,45 +40,29 @@ }, { "PublicDescription": "Instruction architecturally executed, software increment", - "EventCode": "0x00", - "EventName": "SW_INCR", + "ArchStdEvent": "SW_INCR", "BriefDescription": "Software increment" }, { - "PublicDescription": "Instruction architecturally executed", - "EventCode": "0x08", - "EventName": "INST_RETIRED", - "BriefDescription": "Instruction retired" + "ArchStdEvent": "INST_RETIRED", }, { - "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR", - "EventCode": "0x0b", - "EventName": "CID_WRITE_RETIRED", + "ArchStdEvent": "CID_WRITE_RETIRED", "BriefDescription": "Write to CONTEXTIDR" }, { - "PublicDescription": "Operation speculatively executed", - "EventCode": "0x1b", - "EventName": "INST_SPEC", - "BriefDescription": "Speculatively executed" + "ArchStdEvent": "INST_SPEC", }, { - "PublicDescription": "Instruction architecturally executed (condition check pass), write to TTBR", - "EventCode": "0x1c", - "EventName": "TTBR_WRITE_RETIRED", - "BriefDescription": "Instruction executed, TTBR write" + "ArchStdEvent": "TTBR_WRITE_RETIRED", }, { - "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches", - "EventCode": "0x21", - "EventName": "BR_RETIRED", - "BriefDescription": "Branch retired" + "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches", + "ArchStdEvent": "BR_RETIRED", }, { - "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush", - "EventCode": "0x22", - "EventName": "BR_MISPRED_RETIRED", - "BriefDescription": "Mispredicted branch retired" + "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush", + "ArchStdEvent": "BR_MIS_PRED_RETIRED", }, { "PublicDescription": "Operation speculatively executed, NOP", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json index c2fe674df960..2e7555696caf 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json @@ -15,15 +15,10 @@ "ArchStdEvent": "UNALIGNED_LDST_SPEC" }, { - "PublicDescription": "Data memory access", - "EventCode": "0x13", - "EventName": "MEM_ACCESS", - "BriefDescription": "Memory access" + "ArchStdEvent": "MEM_ACCESS", }, { - "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", - "EventCode": "0x1a", - "EventName": "MEM_ERROR", - "BriefDescription": "Memory error" + "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", + "ArchStdEvent": "MEMORY_ERROR", } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json index b5e5d055c70d..ec0dc92288ab 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json @@ -1,14 +1,10 @@ [ { - "PublicDescription": "Mispredicted or not predicted branch speculatively executed. This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken.", - "EventCode": "0x10", - "EventName": "BR_MIS_PRED", - "BriefDescription": "Mispredicted or not predicted branch speculatively executed." + "PublicDescription": "This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken", + "ArchStdEvent": "BR_MIS_PRED", }, { - "PublicDescription": "Predictable branch speculatively executed. This event counts all predictable branches.", - "EventCode": "0x12", - "EventName": "BR_PRED", - "BriefDescription": "Predictable branch speculatively executed." + "PublicDescription": "This event counts all predictable branches.", + "ArchStdEvent": "BR_PRED", } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json index fce7309ae624..6263929efce2 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json @@ -1,24 +1,21 @@ [ { - "EventCode": "0x11", - "EventName": "CPU_CYCLES", + "PublicDescription": "The number of core clock cycles" + "ArchStdEvent": "CPU_CYCLES", "BriefDescription": "The number of core clock cycles." }, { - "PublicDescription": "Bus access. This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.", - "EventCode": "0x19", - "EventName": "BUS_ACCESS", - "BriefDescription": "Bus access." + "PublicDescription": "This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.", + "ArchStdEvent": "BUS_ACCESS", }, { - "EventCode": "0x1D", - "EventName": "BUS_CYCLES", - "BriefDescription": "Bus cycles. This event duplicates CPU_CYCLES." + "PublicDescription": "This event duplicates CPU_CYCLES." + "ArchStdEvent": "BUS_CYCLES", }, { - "ArchStdEvent": "BUS_ACCESS_RD" + "ArchStdEvent": "BUS_ACCESS_RD", }, { - "ArchStdEvent": "BUS_ACCESS_WR" + "ArchStdEvent": "BUS_ACCESS_WR", } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json index 24594081c199..cd67bb9df139 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json @@ -1,133 +1,95 @@ [ { - "PublicDescription": "L1 instruction cache refill. This event counts any instruction fetch which misses in the cache.", - "EventCode": "0x01", - "EventName": "L1I_CACHE_REFILL", - "BriefDescription": "L1 instruction cache refill" + "PublicDescription": "This event counts any instruction fetch which misses in the cache.", + "ArchStdEvent": "L1I_CACHE_REFILL", }, { - "PublicDescription": "L1 instruction TLB refill. This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", - "EventCode": "0x02", - "EventName": "L1I_TLB_REFILL", - "BriefDescription": "L1 instruction TLB refill" + "PublicDescription": "This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", + "ArchStdEvent": "L1I_TLB_REFILL", }, { - "PublicDescription": "L1 data cache refill. This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.", - "EventCode": "0x03", - "EventName": "L1D_CACHE_REFILL", - "BriefDescription": "L1 data cache refill" + "PublicDescription": "This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.", + "ArchStdEvent": "L1D_CACHE_REFILL", }, { - "PublicDescription": "L1 data cache access. This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.", - "EventCode": "0x04", - "EventName": "L1D_CACHE", - "BriefDescription": "L1 data cache access" + "PublicDescription": "This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.", + "ArchStdEvent": "L1D_CACHE", }, { - "PublicDescription": "L1 data TLB refill. This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", - "EventCode": "0x05", - "EventName": "L1D_TLB_REFILL", - "BriefDescription": "L1 data TLB refill" + "PublicDescription": "This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", + "ArchStdEvent": "L1D_TLB_REFILL", }, - { + {, "PublicDescription": "Level 1 instruction cache access or Level 0 Macro-op cache access. This event counts any instruction fetch which accesses the L1 instruction cache or L0 Macro-op cache.", - "EventCode": "0x14", - "EventName": "L1I_CACHE", - "BriefDescription": "L1 instruction cache access" + "ArchStdEvent": "L1I_CACHE", }, { - "PublicDescription": "L1 data cache Write-Back. This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.", - "EventCode": "0x15", - "EventName": "L1D_CACHE_WB", - "BriefDescription": "L1 data cache Write-Back" + "PublicDescription": "This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.", + "ArchStdEvent": "L1D_CACHE_WB", }, { - "PublicDescription": "L2 data cache access. This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.", - "EventCode": "0x16", - "EventName": "L2D_CACHE", - "BriefDescription": "L2 data cache access" + "PublicDescription": "This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.", + "ArchStdEvent": "L2D_CACHE", }, { "PublicDescription": "L2 data cache refill. This event counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted", - "EventCode": "0x17", - "EventName": "L2D_CACHE_REFILL", - "BriefDescription": "L2 data cache refill" + "ArchStdEvent": "L2D_CACHE_REFILL", }, { - "PublicDescription": "L2 data cache write-back. This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted", - "EventCode": "0x18", - "EventName": "L2D_CACHE_WB", - "BriefDescription": "L2 data cache write-back" + "PublicDescription": "This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted", + "ArchStdEvent": "L2D_CACHE_WB", }, { - "PublicDescription": "L2 data cache allocation without refill. This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.", - "EventCode": "0x20", - "EventName": "L2D_CACHE_ALLOCATE", - "BriefDescription": "L2 data cache allocation without refill" + "PublicDescription": "This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.", + "ArchStdEvent": "L2D_CACHE_ALLOCATE", }, { - "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.", - "EventCode": "0x25", - "EventName": "L1D_TLB", + "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.", + "ArchStdEvent": "L1D_TLB", "BriefDescription": "Level 1 data TLB access." }, { - "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB.This event counts regardless of whether the MMU is enabled.", - "EventCode": "0x26", - "EventName": "L1I_TLB", + "PublicDescription": "This event counts any instruction fetch which accesses the instruction L1 TLB.This event counts regardless of whether the MMU is enabled.", + "ArchStdEvent": "L1I_TLB", "BriefDescription": "Level 1 instruction TLB access" }, { "PublicDescription": "This event counts any full cache line write into the L3 cache which does not cause a linefill, including write-backs from L2 to L3 and full-line writes which do not allocate into L2", - "EventCode": "0x29", - "EventName": "L3D_CACHE_ALLOCATE", + "ArchStdEvent": "L3D_CACHE_ALLOCATE", "BriefDescription": "Allocation without refill" }, { - "PublicDescription": "Attributable Level 3 unified cache refill. This event counts for any cacheable read transaction returning datafrom the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.", - "EventCode": "0x2A", - "EventName": "L3D_CACHE_REFILL", + "PublicDescription": "This event counts for any cacheable read transaction returning datafrom the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.", + "ArchStdEvent": "L3D_CACHE_REFILL", "BriefDescription": "Attributable Level 3 unified cache refill." }, { - "PublicDescription": "Attributable Level 3 unified cache access. This event counts for any cacheable read transaction returning datafrom the SCU, or for any cacheable write to the SCU.", - "EventCode": "0x2B", - "EventName": "L3D_CACHE", + "PublicDescription": "This event counts for any cacheable read transaction returning datafrom the SCU, or for any cacheable write to the SCU.", + "ArchStdEvent": "L3D_CACHE", "BriefDescription": "Attributable Level 3 unified cache access." }, { - "PublicDescription": "Attributable L2 data or unified TLB refill. This event counts on anyrefill of the L2 TLB, caused by either an instruction or data access.This event does not count if the MMU is disabled.", - "EventCode": "0x2D", - "EventName": "L2D_TLB_REFILL", + "PublicDescription": "This event counts on anyrefill of the L2 TLB, caused by either an instruction or data access.This event does not count if the MMU is disabled.", + "ArchStdEvent": "L2D_TLB_REFILL", "BriefDescription": "Attributable L2 data or unified TLB refill" }, { - "PublicDescription": "Attributable L2 data or unified TLB access. This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.", - "EventCode": "0x2F", - "EventName": "L2D_TLB", - "BriefDescription": "Attributable L2 data or unified TLB access" + "PublicDescription": "This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.", + "ArchStdEvent": "L2D_TLB", }, { - "PublicDescription": "Access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count.", - "EventCode": "0x34", - "EventName": "DTLB_WALK", - "BriefDescription": "Access to data TLB that caused a page table walk." + "PublicDescription": "This event counts on any data access which causes L2D_TLB_REFILL to count.", + "ArchStdEvent": "DTLB_WALK", }, { - "PublicDescription": "Access to instruction TLB that caused a page table walk. This event counts on any instruction access which causes L2D_TLB_REFILL to count.", - "EventCode": "0x35", - "EventName": "ITLB_WALK", - "BriefDescription": "Access to instruction TLB that caused a page table walk." + "PublicDescription": "This event counts on any instruction access which causes L2D_TLB_REFILL to count.", + "ArchStdEvent": "ITLB_WALK", }, { - "EventCode": "0x36", - "EventName": "LL_CACHE_RD", - "BriefDescription": "Last level cache access, read" + "ArchStdEvent": "LL_CACHE_RD", }, { - "EventCode": "0x37", - "EventName": "LL_CACHE_MISS_RD", - "BriefDescription": "Last level cache miss, read" + "ArchStdEvent": "LL_CACHE_MISS_RD", }, { "ArchStdEvent": "L1D_CACHE_INVAL" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json index 98d29c862320..ea4631db41b5 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json @@ -1,14 +1,10 @@ [ { - "EventCode": "0x09", - "EventName": "EXC_TAKEN", - "BriefDescription": "Exception taken." + "ArchStdEvent": "EXC_TAKEN", }, { - "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", - "EventCode": "0x1A", - "EventName": "MEMORY_ERROR", - "BriefDescription": "Local memory error." + "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", + "ArchStdEvent": "MEMORY_ERROR", }, { "ArchStdEvent": "EXC_DABORT" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json index c153ac706d8d..8e59566cba8b 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json @@ -1,49 +1,32 @@ [ { - "PublicDescription": "Software increment. Instruction architecturally executed (condition code check pass).", - "EventCode": "0x00", - "EventName": "SW_INCR", - "BriefDescription": "Software increment." + "ArchStdEvent": "SW_INCR", }, { - "PublicDescription": "Instruction architecturally executed. This event counts all retired instructions, including those that fail their condition check.", - "EventCode": "0x08", - "EventName": "INST_RETIRED", - "BriefDescription": "Instruction architecturally executed." + "PublicDescription": "This event counts all retired instructions, including those that fail their condition check.", + "ArchStdEvent": "INST_RETIRED", }, { - "EventCode": "0x0A", - "EventName": "EXC_RETURN", - "BriefDescription": "Instruction architecturally executed, condition code check pass, exception return." + "ArchStdEvent": "EXC_RETURN", }, { - "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR. This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.", - "EventCode": "0x0B", - "EventName": "CID_WRITE_RETIRED", - "BriefDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR." + "PublicDescription": "This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.", + "ArchStdEvent": "CID_WRITE_RETIRED", }, { - "EventCode": "0x1B", - "EventName": "INST_SPEC", - "BriefDescription": "Operation speculatively executed" + "ArchStdEvent": "INST_SPEC", }, { - "PublicDescription": "Instruction architecturally executed, condition code check pass, write to TTBR. This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.", - "EventCode": "0x1C", - "EventName": "TTBR_WRITE_RETIRED", - "BriefDescription": "Instruction architecturally executed, condition code check pass, write to TTBR" + "PublicDescription": "This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.", + "ArchStdEvent": "TTBR_WRITE_RETIRED", }, - { - "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.", - "EventCode": "0x21", - "EventName": "BR_RETIRED", - "BriefDescription": "Instruction architecturally executed, branch." + {, + "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.", + "ArchStdEvent": "BR_RETIRED", }, { - "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.", - "EventCode": "0x22", - "EventName": "BR_MIS_PRED_RETIRED", - "BriefDescription": "Instruction architecturally executed, mispredicted branch." + "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.", + "ArchStdEvent": "BR_MIS_PRED_RETIRED", }, { "ArchStdEvent": "ASE_SPEC" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json index b86643253f19..f06f399051c1 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json @@ -1,9 +1,7 @@ [ { - "PublicDescription": "Data memory access. This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.", - "EventCode": "0x13", - "EventName": "MEM_ACCESS", - "BriefDescription": "Data memory access" + "PublicDescription": "This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.", + "ArchStdEvent": "MEM_ACCESS", }, { "ArchStdEvent": "MEM_ACCESS_RD" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json index 8bde029a62d5..c2ccbf6fbfa0 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json @@ -1,7 +1,5 @@ [ { - "EventCode": "0x31", - "EventName": "REMOTE_ACCESS", - "BriefDescription": "Access to another socket in a multi-socket system" + "ArchStdEvent": "REMOTE_ACCESS", } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json index 010a647f9d02..d79f0aeaf7f1 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json @@ -1,14 +1,10 @@ [ { - "PublicDescription": "No operation issued because of the frontend. The counter counts on any cycle when there are no fetched instructions available to dispatch.", - "EventCode": "0x23", - "EventName": "STALL_FRONTEND", - "BriefDescription": "No operation issued because of the frontend." + "PublicDescription": "The counter counts on any cycle when there are no fetched instructions available to dispatch.", + "ArchStdEvent": "STALL_FRONTEND", }, { - "PublicDescription": "No operation issued because of the backend. The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.", - "EventCode": "0x24", - "EventName": "STALL_BACKEND", - "BriefDescription": "No operation issued because of the backend." + "PublicDescription": "The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.", + "ArchStdEvent": "STALL_BACKEND", } ] diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json new file mode 100644 index 000000000000..75376c7cc072 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json @@ -0,0 +1,248 @@ +[ + { + "PublicDescription": "Instruction architecturally executed, Condition code check pass, software increment", + "EventCode": "0x00", + "EventName": "SW_INCR", + "BriefDescription": "Instruction architecturally executed, Condition code check pass, software increment" + }, + { + "PublicDescription": "Level 1 instruction cache refill", + "EventCode": "0x01", + "EventName": "L1I_CACHE_REFILL", + "BriefDescription": "Level 1 instruction cache refill" + }, + { + "PublicDescription": "Attributable Level 1 instruction TLB refill", + "EventCode": "0x02", + "EventName": "L1I_TLB_REFILL", + "BriefDescription": "Attributable Level 1 instruction TLB refill" + }, + { + "PublicDescription": "Level 1 data cache refill", + "EventCode": "0x03", + "EventName": "L1D_CACHE_REFILL", + "BriefDescription": "Level 1 data cache refill" + }, + { + "PublicDescription": "Level 1 data cache access", + "EventCode": "0x04", + "EventName": "L1D_CACHE", + "BriefDescription": "Level 1 data cache access" + }, + { + "PublicDescription": "Attributable Level 1 data TLB refill", + "EventCode": "0x05", + "EventName": "L1D_TLB_REFILL", + "BriefDescription": "Attributable Level 1 data TLB refill" + }, + { + "PublicDescription": "Instruction architecturally executed", + "EventCode": "0x08", + "EventName": "INST_RETIRED", + "BriefDescription": "Instruction architecturally executed" + }, + { + "PublicDescription": "Exception taken", + "EventCode": "0x09", + "EventName": "EXC_TAKEN", + "BriefDescription": "Exception taken" + }, + { + "PublicDescription": "Instruction architecturally executed, condition check pass, exception return", + "EventCode": "0x0a", + "EventName": "EXC_RETURN", + "BriefDescription": "Instruction architecturally executed, condition check pass, exception return" + }, + { + "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR", + "EventCode": "0x0b", + "EventName": "CID_WRITE_RETIRED", + "BriefDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR" + }, + { + "PublicDescription": "Mispredicted or not predicted branch speculatively executed", + "EventCode": "0x10", + "EventName": "BR_MIS_PRED", + "BriefDescription": "Mispredicted or not predicted branch speculatively executed" + }, + { + "PublicDescription": "Cycle", + "EventCode": "0x11", + "EventName": "CPU_CYCLES", + "BriefDescription": "Cycle" + }, + { + "PublicDescription": "Predictable branch speculatively executed", + "EventCode": "0x12", + "EventName": "BR_PRED", + "BriefDescription": "Predictable branch speculatively executed" + }, + { + "PublicDescription": "Data memory access", + "EventCode": "0x13", + "EventName": "MEM_ACCESS", + "BriefDescription": "Data memory access" + }, + { + "PublicDescription": "Attributable Level 1 instruction cache access", + "EventCode": "0x14", + "EventName": "L1I_CACHE", + "BriefDescription": "Attributable Level 1 instruction cache access" + }, + { + "PublicDescription": "Attributable Level 1 data cache write-back", + "EventCode": "0x15", + "EventName": "L1D_CACHE_WB", + "BriefDescription": "Attributable Level 1 data cache write-back" + }, + { + "PublicDescription": "Level 2 data cache access", + "EventCode": "0x16", + "EventName": "L2D_CACHE", + "BriefDescription": "Level 2 data cache access" + }, + { + "PublicDescription": "Level 2 data refill", + "EventCode": "0x17", + "EventName": "L2D_CACHE_REFILL", + "BriefDescription": "Level 2 data refill" + }, + { + "PublicDescription": "Attributable Level 2 data cache write-back", + "EventCode": "0x18", + "EventName": "L2D_CACHE_WB", + "BriefDescription": "Attributable Level 2 data cache write-back" + }, + { + "PublicDescription": "Attributable Bus access", + "EventCode": "0x19", + "EventName": "BUS_ACCESS", + "BriefDescription": "Attributable Bus access" + }, + { + "PublicDescription": "Local memory error", + "EventCode": "0x1a", + "EventName": "MEMORY_ERROR", + "BriefDescription": "Local memory error" + }, + { + "PublicDescription": "Operation speculatively executed", + "EventCode": "0x1b", + "EventName": "INST_SPEC", + "BriefDescription": "Operation speculatively executed" + }, + { + "PublicDescription": "Instruction architecturally executed, Condition code check pass, write to TTBR", + "EventCode": "0x1c", + "EventName": "TTBR_WRITE_RETIRED", + "BriefDescription": "Instruction architecturally executed, Condition code check pass, write to TTBR" + }, + { + "PublicDescription": "Bus cycle", + "EventCode": "0x1D", + "EventName": "BUS_CYCLES", + "BriefDescription": "Bus cycle" + }, + { + "PublicDescription": "Attributable Level 2 data cache allocation without refill", + "EventCode": "0x20", + "EventName": "L2D_CACHE_ALLOCATE", + "BriefDescription": "Attributable Level 2 data cache allocation without refill" + }, + { + "PublicDescription": "Instruction architecturally executed, branch", + "EventCode": "0x21", + "EventName": "BR_RETIRED", + "BriefDescription": "Instruction architecturally executed, branch" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch", + "EventCode": "0x22", + "EventName": "BR_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch" + }, + { + "PublicDescription": "No operation issued because of the frontend", + "EventCode": "0x23", + "EventName": "STALL_FRONTEND", + "BriefDescription": "No operation issued because of the frontend" + }, + { + "PublicDescription": "No operation issued due to the backend", + "EventCode": "0x24", + "EventName": "STALL_BACKEND", + "BriefDescription": "No operation issued due to the backend" + }, + { + "PublicDescription": "Attributable Level 1 data or unified TLB access", + "EventCode": "0x25", + "EventName": "L1D_TLB", + "BriefDescription": "Attributable Level 1 data or unified TLB access" + }, + { + "PublicDescription": "Attributable Level 1 instruction TLB access", + "EventCode": "0x26", + "EventName": "L1I_TLB", + "BriefDescription": "Attributable Level 1 instruction TLB access" + }, + { + "PublicDescription": "Attributable Level 3 data cache allocation without refill", + "EventCode": "0x29", + "EventName": "L3D_CACHE_ALLOCATE", + "BriefDescription": "Attributable Level 3 data cache allocation without refill" + }, + { + "PublicDescription": "Attributable Level 3 data cache refill", + "EventCode": "0x2A", + "EventName": "L3D_CACHE_REFILL", + "BriefDescription": "Attributable Level 3 data cache refill" + }, + { + "PublicDescription": "Attributable Level 3 data cache access", + "EventCode": "0x2B", + "EventName": "L3D_CACHE", + "BriefDescription": "Attributable Level 3 data cache access" + }, + { + "PublicDescription": "Attributable Level 2 data TLB refill", + "EventCode": "0x2D", + "EventName": "L2D_TLB_REFILL", + "BriefDescription": "Attributable Level 2 data TLB refill" + }, + { + "PublicDescription": "Attributable Level 2 data or unified TLB access", + "EventCode": "0x2F", + "EventName": "L2D_TLB", + "BriefDescription": "Attributable Level 2 data or unified TLB access" + }, + { + "PublicDescription": "Access to another socket in a multi-socket system", + "EventCode": "0x31", + "EventName": "REMOTE_ACCESS", + "BriefDescription": "Access to another socket in a multi-socket system" + }, + { + "PublicDescription": "Access to data TLB causes a translation table walk", + "EventCode": "0x34", + "EventName": "DTLB_WALK", + "BriefDescription": "Access to data TLB causes a translation table walk" + }, + { + "PublicDescription": "Access to instruction TLB that causes a translation table walk", + "EventCode": "0x35", + "EventName": "ITLB_WALK", + "BriefDescription": "Access to instruction TLB that causes a translation table walk" + }, + { + "PublicDescription": "Attributable Last level cache memory read", + "EventCode": "0x36", + "EventName": "LL_CACHE_RD", + "BriefDescription": "Attributable Last level cache memory read" + }, + { + "PublicDescription": "Last level cache miss, read", + "EventCode": "0x37", + "EventName": "LL_CACHE_MISS_RD", + "BriefDescription": "Last level cache miss, read" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json index 8e553b67cae6..f416fa052337 100644 --- a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json @@ -6,7 +6,7 @@ "ScaleUnit": "9.765625e-4KB", "Unit": "imx8_ddr", "Compat": "i.MX8MM" - }, + }, { "BriefDescription": "bytes all masters write to ddr based on write-cycles event", "MetricName": "imx8mm_ddr_write.all", @@ -14,5 +14,5 @@ "ScaleUnit": "9.765625e-4KB", "Unit": "imx8_ddr", "Compat": "i.MX8MM" - } + } ] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/ddrc.json new file mode 100644 index 000000000000..8352e73d6d35 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/ddrc.json @@ -0,0 +1,37 @@ +[ + { + "BriefDescription": "ddr cycles event", + "EventCode": "0x00", + "EventName": "imx8mn_ddr.cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MN" + }, + { + "BriefDescription": "ddr read-cycles event", + "EventCode": "0x2a", + "EventName": "imx8mn_ddr.read_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MN" + }, + { + "BriefDescription": "ddr write-cycles event", + "EventCode": "0x2b", + "EventName": "imx8mn_ddr.write_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MN" + }, + { + "BriefDescription": "ddr read event", + "EventCode": "0x35", + "EventName": "imx8mn_ddr.read", + "Unit": "imx8_ddr", + "Compat": "i.MX8MN" + }, + { + "BriefDescription": "ddr write event", + "EventCode": "0x38", + "EventName": "imx8mn_ddr.write", + "Unit": "imx8_ddr", + "Compat": "i.MX8MN" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/metrics.json new file mode 100644 index 000000000000..2bbba4d8ea5b --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/metrics.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "bytes all masters read from ddr based on read-cycles event", + "MetricName": "imx8mn_ddr_read.all", + "MetricExpr": "imx8mn_ddr.read_cycles * 4 * 2", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MN" + }, + { + "BriefDescription": "bytes all masters write to ddr based on write-cycles event", + "MetricName": "imx8mn_ddr_write.all", + "MetricExpr": "imx8mn_ddr.write_cycles * 4 * 2", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MN" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/ddrc.json new file mode 100644 index 000000000000..f9a89efc9b24 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/ddrc.json @@ -0,0 +1,37 @@ +[ + { + "BriefDescription": "ddr cycles event", + "EventCode": "0x00", + "EventName": "imx8mp_ddr.cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "ddr read-cycles event", + "EventCode": "0x2a", + "EventName": "imx8mp_ddr.read_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "ddr write-cycles event", + "EventCode": "0x2b", + "EventName": "imx8mp_ddr.write_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "ddr read event", + "EventCode": "0x35", + "EventName": "imx8mp_ddr.read", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "ddr write event", + "EventCode": "0x38", + "EventName": "imx8mp_ddr.write", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/metrics.json new file mode 100644 index 000000000000..8b9544424b3f --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/metrics.json @@ -0,0 +1,466 @@ +[ + { + "BriefDescription": "bytes of all masters read from ddr", + "MetricName": "imx8mp_ddr_read.all", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of all masters write to ddr", + "MetricName": "imx8mp_ddr_write.all", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of a53 core read from ddr", + "MetricName": "imx8mp_ddr_read.a53", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0000@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of a53 core write to ddr", + "MetricName": "imx8mp_ddr_write.a53", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0000@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of supermix(m7) core read from ddr", + "MetricName": "imx8mp_ddr_read.supermix", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x000f\\,axi_id\\=0x0020@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of supermix(m7) write to ddr", + "MetricName": "imx8mp_ddr_write.supermix", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x000f\\,axi_id\\=0x0020@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of gpu 3d read from ddr", + "MetricName": "imx8mp_ddr_read.3d", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0070@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of gpu 3d write to ddr", + "MetricName": "imx8mp_ddr_write.3d", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0070@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of gpu 2d read from ddr", + "MetricName": "imx8mp_ddr_read.2d", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0071@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of gpu 2d write to ddr", + "MetricName": "imx8mp_ddr_write.2d", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0071@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display lcdif1 read from ddr", + "MetricName": "imx8mp_ddr_read.lcdif1", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0068@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display lcdif1 write to ddr", + "MetricName": "imx8mp_ddr_write.lcdif1", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0068@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display lcdif2 read from ddr", + "MetricName": "imx8mp_ddr_read.lcdif2", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0069@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display lcdif2 write to ddr", + "MetricName": "imx8mp_ddr_write.lcdif2", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0069@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isi1 read from ddr", + "MetricName": "imx8mp_ddr_read.isi1", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006a@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isi1 write to ddr", + "MetricName": "imx8mp_ddr_write.isi1", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006a@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isi2 read from ddr", + "MetricName": "imx8mp_ddr_read.isi2", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006b@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isi2 write to ddr", + "MetricName": "imx8mp_ddr_write.isi2", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006b@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isi3 read from ddr", + "MetricName": "imx8mp_ddr_read.isi3", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006c@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isi3 write to ddr", + "MetricName": "imx8mp_ddr_write.isi3", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006c@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isp1 read from ddr", + "MetricName": "imx8mp_ddr_read.isp1", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006d@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isp1 write to ddr", + "MetricName": "imx8mp_ddr_write.isp1", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006d@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isp2 read from ddr", + "MetricName": "imx8mp_ddr_read.isp2", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006e@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display isp2 write to ddr", + "MetricName": "imx8mp_ddr_write.isp2", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006e@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display dewarp read from ddr", + "MetricName": "imx8mp_ddr_read.dewarp", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006f@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of display dewarp write to ddr", + "MetricName": "imx8mp_ddr_write.dewarp", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006f@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of vpu1 read from ddr", + "MetricName": "imx8mp_ddr_read.vpu1", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x007c@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of vpu1 write to ddr", + "MetricName": "imx8mp_ddr_write.vpu1", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x007c@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of vpu2 read from ddr", + "MetricName": "imx8mp_ddr_read.vpu2", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x007d@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of vpu2 write to ddr", + "MetricName": "imx8mp_ddr_write.vpu2", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x007d@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of vpu3 read from ddr", + "MetricName": "imx8mp_ddr_read.vpu3", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x007e@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of vpu3 write to ddr", + "MetricName": "imx8mp_ddr_write.vpu3", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x007e@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of npu read from ddr", + "MetricName": "imx8mp_ddr_read.npu", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0073@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of npu write to ddr", + "MetricName": "imx8mp_ddr_write.npu", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0073@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hsio usb1 read from ddr", + "MetricName": "imx8mp_ddr_read.usb1", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0078@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hsio usb1 write to ddr", + "MetricName": "imx8mp_ddr_write.usb1", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0078@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hsio usb2 read from ddr", + "MetricName": "imx8mp_ddr_read.usb2", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0079@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hsio usb2 write to ddr", + "MetricName": "imx8mp_ddr_write.usb2", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0079@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hsio pci read from ddr", + "MetricName": "imx8mp_ddr_read.pci", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x007a@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hsio pci write to ddr", + "MetricName": "imx8mp_ddr_write.pci", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x007a@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hdmi_tx hrv_mwr read from ddr", + "MetricName": "imx8mp_ddr_read.hdmi_hrv_mwr", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0074@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hdmi_tx hrv_mwr write to ddr", + "MetricName": "imx8mp_ddr_write.hdmi_hrv_mwr", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0074@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hdmi_tx lcdif read from ddr", + "MetricName": "imx8mp_ddr_read.hdmi_lcdif", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0075@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hdmi_tx lcdif write to ddr", + "MetricName": "imx8mp_ddr_write.hdmi_lcdif", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0075@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hdmi_tx tx_hdcp read from ddr", + "MetricName": "imx8mp_ddr_read.hdmi_hdcp", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0076@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of hdmi_tx tx_hdcp write to ddr", + "MetricName": "imx8mp_ddr_write.hdmi_hdcp", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0076@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio dsp read from ddr", + "MetricName": "imx8mp_ddr_read.audio_dsp", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0041@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio dsp write to ddr", + "MetricName": "imx8mp_ddr_write.audio_dsp", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0041@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma2_per read from ddr", + "MetricName": "imx8mp_ddr_read.audio_sdma2_per", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0062@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma2_per write to ddr", + "MetricName": "imx8mp_ddr_write.audio_sdma2_per", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0062@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma2_burst read from ddr", + "MetricName": "imx8mp_ddr_read.audio_sdma2_burst", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0063@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma2_burst write to ddr", + "MetricName": "imx8mp_ddr_write.audio_sdma2_burst", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0063@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma3_per read from ddr", + "MetricName": "imx8mp_ddr_read.audio_sdma3_per", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0064@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma3_per write to ddr", + "MetricName": "imx8mp_ddr_write.audio_sdma3_per", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0064@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma3_burst read from ddr", + "MetricName": "imx8mp_ddr_read.audio_sdma3_burst", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0065@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma3_burst write to ddr", + "MetricName": "imx8mp_ddr_write.audio_sdma3_burst", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0065@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma_pif read from ddr", + "MetricName": "imx8mp_ddr_read.audio_sdma_pif", + "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0066@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + }, + { + "BriefDescription": "bytes of audio sdma_pif write to ddr", + "MetricName": "imx8mp_ddr_write.audio_sdma_pif", + "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0066@", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MP" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/ddrc.json new file mode 100644 index 000000000000..c8682728ddad --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/ddrc.json @@ -0,0 +1,37 @@ +[ + { + "BriefDescription": "ddr cycles event", + "EventCode": "0x00", + "EventName": "imx8mq_ddr.cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MQ" + }, + { + "BriefDescription": "ddr read-cycles event", + "EventCode": "0x2a", + "EventName": "imx8mq_ddr.read_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MQ" + }, + { + "BriefDescription": "ddr write-cycles event", + "EventCode": "0x2b", + "EventName": "imx8mq_ddr.write_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MQ" + }, + { + "BriefDescription": "ddr read event", + "EventCode": "0x35", + "EventName": "imx8mq_ddr.read", + "Unit": "imx8_ddr", + "Compat": "i.MX8MQ" + }, + { + "BriefDescription": "ddr write event", + "EventCode": "0x38", + "EventName": "imx8mq_ddr.write", + "Unit": "imx8_ddr", + "Compat": "i.MX8MQ" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/metrics.json new file mode 100644 index 000000000000..862c98171e0d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/metrics.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "bytes all masters read from ddr based on read-cycles event", + "MetricName": "imx8mq_ddr_read.all", + "MetricExpr": "imx8mq_ddr.read_cycles * 4 * 4", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MQ" + }, + { + "BriefDescription": "bytes all masters write to ddr based on write-cycles event", + "MetricName": "imx8mq_ddr_write.all", + "MetricExpr": "imx8mq_ddr.write_cycles * 4 * 4", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MQ" + } +] diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index aa4dc4f5abde..650aec19d490 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -58,6 +58,7 @@ perf-y += time-utils-test.o perf-y += genelf.o perf-y += api-io.o perf-y += demangle-java-test.o +perf-y += demangle-ocaml-test.o perf-y += pfm.o perf-y += parse-metric.o perf-y += pe-file-parsing.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 7273823d0d02..c4b888f18e9c 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -339,6 +339,10 @@ static struct test generic_tests[] = { .func = test__demangle_java, }, { + .desc = "Demangle OCaml", + .func = test__demangle_ocaml, + }, + { .desc = "Parse and process metrics", .func = test__parse_metric, }, diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 7c098d49c77e..280f0348a09c 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -26,6 +26,7 @@ #include "event.h" #include "record.h" #include "util/mmap.h" +#include "util/string2.h" #include "util/synthetic-events.h" #include "thread.h" @@ -41,15 +42,6 @@ struct state { size_t done_cnt; }; -static unsigned int hex(char c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - return c - 'A' + 10; -} - static size_t read_objdump_chunk(const char **line, unsigned char **buf, size_t *buf_len) { diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c new file mode 100644 index 000000000000..a273ed5163d7 --- /dev/null +++ b/tools/perf/tests/demangle-ocaml-test.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include "tests.h" +#include "session.h" +#include "debug.h" +#include "demangle-ocaml.h" + +int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + int ret = TEST_OK; + char *buf = NULL; + size_t i; + + struct { + const char *mangled, *demangled; + } test_cases[] = { + { "main", + NULL }, + { "camlStdlib__array__map_154", + "Stdlib.array.map" }, + { "camlStdlib__anon_fn$5bstdlib$2eml$3a334$2c0$2d$2d54$5d_1453", + "Stdlib.anon_fn[stdlib.ml:334,0--54]" }, + { "camlStdlib__bytes__$2b$2b_2205", + "Stdlib.bytes.++" }, + }; + + for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { + buf = ocaml_demangle_sym(test_cases[i].mangled); + if ((buf == NULL && test_cases[i].demangled != NULL) + || (buf != NULL && test_cases[i].demangled == NULL) + || (buf != NULL && strcmp(buf, test_cases[i].demangled))) { + pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled, + buf == NULL ? "(null)" : buf, + test_cases[i].demangled == NULL ? "(null)" : test_cases[i].demangled); + ret = TEST_FAIL; + } + free(buf); + } + + return ret; +} diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 71f85e2cc127..f7dd6c463f04 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -15,7 +15,6 @@ #include "tests.h" #include "thread_map.h" #include <perf/cpumap.h> -#include <internal/cpumap.h> #include "debug.h" #include "stat.h" #include "util/counts.h" diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index ce7be37f0d88..6dc1db1626ad 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -70,6 +70,10 @@ static struct pmu_event pme_test[] = { .metric_name = "M3", }, { + .metric_expr = "64 * l1d.replacement / 1000000000 / duration_time", + .metric_name = "L1D_Cache_Fill_BW", +}, +{ .name = NULL, } }; @@ -107,6 +111,8 @@ static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, evlist__for_each_entry(evlist, evsel) { count = find_value(evsel->name, vals); perf_stat__update_shadow_stats(evsel, count, 0, st); + if (!strcmp(evsel->name, "duration_time")) + update_stats(&walltime_nsecs_stats, count); } } @@ -321,6 +327,23 @@ static int test_recursion_fail(void) return 0; } +static int test_memory_bandwidth(void) +{ + double ratio; + struct value vals[] = { + { .event = "l1d.replacement", .val = 4000000 }, + { .event = "duration_time", .val = 200000000 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("L1D_Cache_Fill_BW", vals, &ratio) == 0); + TEST_ASSERT_VAL("L1D_Cache_Fill_BW, wrong ratio", + 1.28 == ratio); + + return 0; +} + static int test_metric_group(void) { double ratio1, ratio2; @@ -353,5 +376,6 @@ int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unu TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0); TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0); TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); + TEST_ASSERT_VAL("Memory bandwidth", test_memory_bandwidth() == 0); return 0; } diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 2393916f6128..0dbe3aa99853 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -129,6 +129,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_WEIGHT) COMP(weight); + if (type & PERF_SAMPLE_WEIGHT_STRUCT) + COMP(ins_lat); + if (type & PERF_SAMPLE_DATA_SRC) COMP(data_src); @@ -157,6 +160,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_DATA_PAGE_SIZE) COMP(data_page_size); + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) + COMP(code_page_size); + if (type & PERF_SAMPLE_AUX) { COMP(aux_sample.size); if (memcmp(s1->aux_sample.data, s2->aux_sample.data, @@ -196,7 +202,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .data = {1, -1ULL, 211, 212, 213}, }; u64 regs[64]; - const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL}; + const u32 raw_data[] = {0x12345678, 0x0a0b0c0d, 0x11020304, 0x05060708, 0 }; const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL}; const u64 aux_data[] = {0xa55a, 0, 0xeeddee, 0x0282028202820282}; struct perf_sample sample = { @@ -238,6 +244,8 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .phys_addr = 113, .cgroup = 114, .data_page_size = 115, + .code_page_size = 116, + .ins_lat = 117, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, @@ -344,7 +352,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u * were added. Please actually update the test rather than just change * the condition below. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_CODE_PAGE_SIZE << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_WEIGHT_STRUCT << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } @@ -374,8 +382,12 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u return err; } - /* Test all sample format bits together */ - sample_type = PERF_SAMPLE_MAX - 1; + /* + * Test all sample format bits together + * Note: PERF_SAMPLE_WEIGHT and PERF_SAMPLE_WEIGHT_STRUCT cannot + * be set simultaneously. + */ + sample_type = (PERF_SAMPLE_MAX - 1) & ~PERF_SAMPLE_WEIGHT; sample_regs = 0x3fff; /* shared yb intr and user regs */ for (i = 0; i < ARRAY_SIZE(rf); i++) { err = do_test(sample_type, sample_regs, rf[i]); diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh index 4861a20edee2..416af614bbe0 100755 --- a/tools/perf/tests/shell/buildid.sh +++ b/tools/perf/tests/shell/buildid.sh @@ -50,6 +50,12 @@ check() exit 1 fi + ${perf} buildid-cache -l | grep $id + if [ $? -ne 0 ]; then + echo "failed: ${id} is not reported by \"perf buildid-cache -l\"" + exit 1 + fi + echo "OK for ${1}" } diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh new file mode 100755 index 000000000000..e5b824dd08d9 --- /dev/null +++ b/tools/perf/tests/shell/daemon.sh @@ -0,0 +1,475 @@ +#!/bin/sh +# daemon operations +# SPDX-License-Identifier: GPL-2.0 + +check_line_first() +{ + local line=$1 + local name=$2 + local base=$3 + local output=$4 + local lock=$5 + local up=$6 + + local line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'` + local line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'` + local line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'` + local line_lock=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'` + local line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'` + + if [ "${name}" != "${line_name}" ]; then + echo "FAILED: wrong name" + error=1 + fi + + if [ "${base}" != "${line_base}" ]; then + echo "FAILED: wrong base" + error=1 + fi + + if [ "${output}" != "${line_output}" ]; then + echo "FAILED: wrong output" + error=1 + fi + + if [ "${lock}" != "${line_lock}" ]; then + echo "FAILED: wrong lock" + error=1 + fi + + if [ "${up}" != "${line_up}" ]; then + echo "FAILED: wrong up" + error=1 + fi +} + +check_line_other() +{ + local line=$1 + local name=$2 + local run=$3 + local base=$4 + local output=$5 + local control=$6 + local ack=$7 + local up=$8 + + local line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'` + local line_run=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'` + local line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'` + local line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'` + local line_control=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'` + local line_ack=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $7 }'` + local line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $8 }'` + + if [ "${name}" != "${line_name}" ]; then + echo "FAILED: wrong name" + error=1 + fi + + if [ "${run}" != "${line_run}" ]; then + echo "FAILED: wrong run" + error=1 + fi + + if [ "${base}" != "${line_base}" ]; then + echo "FAILED: wrong base" + error=1 + fi + + if [ "${output}" != "${line_output}" ]; then + echo "FAILED: wrong output" + error=1 + fi + + if [ "${control}" != "${line_control}" ]; then + echo "FAILED: wrong control" + error=1 + fi + + if [ "${ack}" != "${line_ack}" ]; then + echo "FAILED: wrong ack" + error=1 + fi + + if [ "${up}" != "${line_up}" ]; then + echo "FAILED: wrong up" + error=1 + fi +} + +daemon_start() +{ + local config=$1 + local session=$2 + + perf daemon start --config ${config} + + # wait for the session to ping + local state="FAIL" + while [ "${state}" != "OK" ]; do + state=`perf daemon ping --config ${config} --session ${session} | awk '{ print $1 }'` + sleep 0.05 + done +} + +daemon_exit() +{ + local base=$1 + local config=$2 + + local line=`perf daemon --config ${config} -x: | head -1` + local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` + + # stop daemon + perf daemon stop --config ${config} + + # ... and wait for the pid to go away + tail --pid=${pid} -f /dev/null +} + +test_list() +{ + echo "test daemon list" + + local config=$(mktemp /tmp/perf.daemon.config.XXX) + local base=$(mktemp -d /tmp/perf.daemon.base.XXX) + + cat <<EOF > ${config} +[daemon] +base=BASE + +[session-size] +run = -e cpu-clock + +[session-time] +run = -e task-clock +EOF + + sed -i -e "s|BASE|${base}|" ${config} + + # start daemon + daemon_start ${config} size + + # check first line + # pid:daemon:base:base/output:base/lock + local line=`perf daemon --config ${config} -x: | head -1` + check_line_first ${line} daemon ${base} ${base}/output ${base}/lock "0" + + # check 1st session + # pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0 + local line=`perf daemon --config ${config} -x: | head -2 | tail -1` + check_line_other "${line}" size "-e cpu-clock" ${base}/session-size \ + ${base}/session-size/output ${base}/session-size/control \ + ${base}/session-size/ack "0" + + # check 2nd session + # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 + local line=`perf daemon --config ${config} -x: | head -3 | tail -1` + check_line_other "${line}" time "-e task-clock" ${base}/session-time \ + ${base}/session-time/output ${base}/session-time/control \ + ${base}/session-time/ack "0" + + # stop daemon + daemon_exit ${base} ${config} + + rm -rf ${base} + rm -f ${config} +} + +test_reconfig() +{ + echo "test daemon reconfig" + + local config=$(mktemp /tmp/perf.daemon.config.XXX) + local base=$(mktemp -d /tmp/perf.daemon.base.XXX) + + # prepare config + cat <<EOF > ${config} +[daemon] +base=BASE + +[session-size] +run = -e cpu-clock + +[session-time] +run = -e task-clock +EOF + + sed -i -e "s|BASE|${base}|" ${config} + + # start daemon + daemon_start ${config} size + + # check 2nd session + # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 + local line=`perf daemon --config ${config} -x: | head -3 | tail -1` + check_line_other "${line}" time "-e task-clock" ${base}/session-time \ + ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0" + local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` + + # prepare new config + local config_new=${config}.new + cat <<EOF > ${config_new} +[daemon] +base=BASE + +[session-size] +run = -e cpu-clock + +[session-time] +run = -e cpu-clock +EOF + + # TEST 1 - change config + + sed -i -e "s|BASE|${base}|" ${config_new} + cp ${config_new} ${config} + + # wait for old session to finish + tail --pid=${pid} -f /dev/null + + # wait for new one to start + local state="FAIL" + while [ "${state}" != "OK" ]; do + state=`perf daemon ping --config ${config} --session time | awk '{ print $1 }'` + done + + # check reconfigured 2nd session + # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 + local line=`perf daemon --config ${config} -x: | head -3 | tail -1` + check_line_other "${line}" time "-e cpu-clock" ${base}/session-time \ + ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0" + + # TEST 2 - empty config + + local config_empty=${config}.empty + cat <<EOF > ${config_empty} +[daemon] +base=BASE +EOF + + # change config + sed -i -e "s|BASE|${base}|" ${config_empty} + cp ${config_empty} ${config} + + # wait for sessions to finish + local state="OK" + while [ "${state}" != "FAIL" ]; do + state=`perf daemon ping --config ${config} --session time | awk '{ print $1 }'` + done + + local state="OK" + while [ "${state}" != "FAIL" ]; do + state=`perf daemon ping --config ${config} --session size | awk '{ print $1 }'` + done + + local one=`perf daemon --config ${config} -x: | wc -l` + + if [ ${one} -ne "1" ]; then + echo "FAILED: wrong list output" + error=1 + fi + + # TEST 3 - config again + + cp ${config_new} ${config} + + # wait for size to start + local state="FAIL" + while [ "${state}" != "OK" ]; do + state=`perf daemon ping --config ${config} --session size | awk '{ print $1 }'` + done + + # wait for time to start + local state="FAIL" + while [ "${state}" != "OK" ]; do + state=`perf daemon ping --config ${config} --session time | awk '{ print $1 }'` + done + + # stop daemon + daemon_exit ${base} ${config} + + rm -rf ${base} + rm -f ${config} + rm -f ${config_new} + rm -f ${config_empty} +} + +test_stop() +{ + echo "test daemon stop" + + local config=$(mktemp /tmp/perf.daemon.config.XXX) + local base=$(mktemp -d /tmp/perf.daemon.base.XXX) + + # prepare config + cat <<EOF > ${config} +[daemon] +base=BASE + +[session-size] +run = -e cpu-clock + +[session-time] +run = -e task-clock +EOF + + sed -i -e "s|BASE|${base}|" ${config} + + # start daemon + daemon_start ${config} size + + local pid_size=`perf daemon --config ${config} -x: | head -2 | tail -1 | awk 'BEGIN { FS = ":" } ; { print $1 }'` + local pid_time=`perf daemon --config ${config} -x: | head -3 | tail -1 | awk 'BEGIN { FS = ":" } ; { print $1 }'` + + # check that sessions are running + if [ ! -d "/proc/${pid_size}" ]; then + echo "FAILED: session size not up" + fi + + if [ ! -d "/proc/${pid_time}" ]; then + echo "FAILED: session time not up" + fi + + # stop daemon + daemon_exit ${base} ${config} + + # check that sessions are gone + if [ -d "/proc/${pid_size}" ]; then + echo "FAILED: session size still up" + fi + + if [ -d "/proc/${pid_time}" ]; then + echo "FAILED: session time still up" + fi + + rm -rf ${base} + rm -f ${config} +} + +test_signal() +{ + echo "test daemon signal" + + local config=$(mktemp /tmp/perf.daemon.config.XXX) + local base=$(mktemp -d /tmp/perf.daemon.base.XXX) + + # prepare config + cat <<EOF > ${config} +[daemon] +base=BASE + +[session-test] +run = -e cpu-clock --switch-output +EOF + + sed -i -e "s|BASE|${base}|" ${config} + + # start daemon + daemon_start ${config} test + + # send 2 signals + perf daemon signal --config ${config} --session test + perf daemon signal --config ${config} + + # stop daemon + daemon_exit ${base} ${config} + + # count is 2 perf.data for signals and 1 for perf record finished + count=`ls ${base}/session-test/ | grep perf.data | wc -l` + if [ ${count} -ne 3 ]; then + error=1 + echo "FAILED: perf data no generated" + fi + + rm -rf ${base} + rm -f ${config} +} + +test_ping() +{ + echo "test daemon ping" + + local config=$(mktemp /tmp/perf.daemon.config.XXX) + local base=$(mktemp -d /tmp/perf.daemon.base.XXX) + + # prepare config + cat <<EOF > ${config} +[daemon] +base=BASE + +[session-size] +run = -e cpu-clock + +[session-time] +run = -e task-clock +EOF + + sed -i -e "s|BASE|${base}|" ${config} + + # start daemon + daemon_start ${config} size + + size=`perf daemon ping --config ${config} --session size | awk '{ print $1 }'` + type=`perf daemon ping --config ${config} --session time | awk '{ print $1 }'` + + if [ ${size} != "OK" -o ${type} != "OK" ]; then + error=1 + echo "FAILED: daemon ping failed" + fi + + # stop daemon + daemon_exit ${base} ${config} + + rm -rf ${base} + rm -f ${config} +} + +test_lock() +{ + echo "test daemon lock" + + local config=$(mktemp /tmp/perf.daemon.config.XXX) + local base=$(mktemp -d /tmp/perf.daemon.base.XXX) + + # prepare config + cat <<EOF > ${config} +[daemon] +base=BASE + +[session-size] +run = -e cpu-clock +EOF + + sed -i -e "s|BASE|${base}|" ${config} + + # start daemon + daemon_start ${config} size + + # start second daemon over the same config/base + failed=`perf daemon start --config ${config} 2>&1 | awk '{ print $1 }'` + + # check that we failed properly + if [ ${failed} != "failed:" ]; then + error=1 + echo "FAILED: daemon lock failed" + fi + + # stop daemon + daemon_exit ${base} ${config} + + rm -rf ${base} + rm -f ${config} +} + +error=0 + +test_list +test_reconfig +test_stop +test_signal +test_ping +test_lock + +exit ${error} diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh index 249dfe48cf6a..ebebd3596cf9 100755 --- a/tools/perf/tests/shell/stat+shadow_stat.sh +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -9,31 +9,29 @@ perf stat -a true > /dev/null 2>&1 || exit 2 test_global_aggr() { - local cyc - perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \ grep -e cycles -e instructions | \ while read num evt hash ipc rest do # skip not counted events - if [[ $num == "<not" ]]; then + if [ "$num" = "<not" ]; then continue fi # save cycles count - if [[ $evt == "cycles" ]]; then + if [ "$evt" = "cycles" ]; then cyc=$num continue fi # skip if no cycles - if [[ -z $cyc ]]; then + if [ -z "$cyc" ]; then continue fi # use printf for rounding and a leading zero - local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)` - if [[ $ipc != $res ]]; then + res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)` + if [ "$ipc" != "$res" ]; then echo "IPC is different: $res != $ipc ($num / $cyc)" exit 1 fi @@ -42,32 +40,32 @@ test_global_aggr() test_no_aggr() { - declare -A results - perf stat -a -A --no-big-num -e cycles,instructions sleep 1 2>&1 | \ grep ^CPU | \ while read cpu num evt hash ipc rest do # skip not counted events - if [[ $num == "<not" ]]; then + if [ "$num" = "<not" ]; then continue fi # save cycles count - if [[ $evt == "cycles" ]]; then - results[$cpu]=$num + if [ "$evt" = "cycles" ]; then + results="$results $cpu:$num" continue fi + cyc=${results##* $cpu:} + cyc=${cyc%% *} + # skip if no cycles - local cyc=${results[$cpu]} - if [[ -z $cyc ]]; then + if [ -z "$cyc" ]; then continue fi # use printf for rounding and a leading zero - local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)` - if [[ $ipc != $res ]]; then + res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)` + if [ "$ipc" != "$res" ]; then echo "IPC is different for $cpu: $res != $ipc ($num / $cyc)" exit 1 fi diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 18fde2f179cd..c9eef0bba6f1 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -11,6 +11,7 @@ perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) file=$(mktemp /tmp/temporary_file.XXXXX) +glb_err=0 skip_if_no_cs_etm_event() { perf list | grep -q 'cs_etm//' && return 0 @@ -33,7 +34,7 @@ record_touch_file() { echo "Recording trace (only user mode) with path: CPU$2 => $1" rm -f $file perf record -o ${perfdata} -e cs_etm/@$1/u --per-thread \ - -- taskset -c $2 touch $file + -- taskset -c $2 touch $file > /dev/null 2>&1 } perf_script_branch_samples() { @@ -43,8 +44,8 @@ perf_script_branch_samples() { # touch 6512 1 branches:u: ffffb220824c strcmp+0xc (/lib/aarch64-linux-gnu/ld-2.27.so) # touch 6512 1 branches:u: ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so) # touch 6512 1 branches:u: ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so) - perf script -F,-time -i ${perfdata} | \ - egrep " +$1 +[0-9]+ .* +branches:(.*:)? +" + perf script -F,-time -i ${perfdata} 2>&1 | \ + egrep " +$1 +[0-9]+ .* +branches:(.*:)? +" > /dev/null 2>&1 } perf_report_branch_samples() { @@ -54,8 +55,8 @@ perf_report_branch_samples() { # 73.04% 73.04% touch libc-2.27.so [.] _dl_addr # 7.71% 7.71% touch libc-2.27.so [.] getenv # 2.59% 2.59% touch ld-2.27.so [.] strcmp - perf report --stdio -i ${perfdata} | \ - egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " + perf report --stdio -i ${perfdata} 2>&1 | \ + egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1 } perf_report_instruction_samples() { @@ -65,8 +66,17 @@ perf_report_instruction_samples() { # 68.12% touch libc-2.27.so [.] _dl_addr # 5.80% touch libc-2.27.so [.] getenv # 4.35% touch ld-2.27.so [.] _dl_fixup - perf report --itrace=i1000i --stdio -i ${perfdata} | \ - egrep " +[0-9]+\.[0-9]+% +$1" + perf report --itrace=i1000i --stdio -i ${perfdata} 2>&1 | \ + egrep " +[0-9]+\.[0-9]+% +$1" > /dev/null 2>&1 +} + +arm_cs_report() { + if [ $2 != 0 ]; then + echo "$1: FAIL" + glb_err=$2 + else + echo "$1: PASS" + fi } is_device_sink() { @@ -113,9 +123,7 @@ arm_cs_iterate_devices() { perf_report_instruction_samples touch err=$? - - # Exit when find failure - [ $err != 0 ] && exit $err + arm_cs_report "CoreSight path testing (CPU$2 -> $device_name)" $err fi arm_cs_iterate_devices $dev $2 @@ -129,9 +137,6 @@ arm_cs_etm_traverse_path_test() { # Find the ETM device belonging to which CPU cpu=`cat $dev/cpu` - echo $dev - echo $cpu - # Use depth-first search (DFS) to iterate outputs arm_cs_iterate_devices $dev $cpu done @@ -139,22 +144,20 @@ arm_cs_etm_traverse_path_test() { arm_cs_etm_system_wide_test() { echo "Recording trace with system wide mode" - perf record -o ${perfdata} -e cs_etm// -a -- ls + perf record -o ${perfdata} -e cs_etm// -a -- ls > /dev/null 2>&1 perf_script_branch_samples perf && perf_report_branch_samples perf && perf_report_instruction_samples perf err=$? - - # Exit when find failure - [ $err != 0 ] && exit $err + arm_cs_report "CoreSight system wide testing" $err } arm_cs_etm_snapshot_test() { echo "Recording trace with snapshot mode" perf record -o ${perfdata} -e cs_etm// -S \ - -- dd if=/dev/zero of=/dev/null & + -- dd if=/dev/zero of=/dev/null > /dev/null 2>&1 & PERFPID=$! # Wait for perf program @@ -172,12 +175,10 @@ arm_cs_etm_snapshot_test() { perf_report_instruction_samples dd err=$? - - # Exit when find failure - [ $err != 0 ] && exit $err + arm_cs_report "CoreSight snapshot testing" $err } arm_cs_etm_traverse_path_test arm_cs_etm_system_wide_test arm_cs_etm_snapshot_test -exit 0 +exit $glb_err diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 8e24a61fe4c2..b85f005308a3 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -119,6 +119,7 @@ int test__time_utils(struct test *t, int subtest); int test__jit_write_elf(struct test *test, int subtest); int test__api_io(struct test *test, int subtest); int test__demangle_java(struct test *test, int subtest); +int test__demangle_ocaml(struct test *test, int subtest); int test__pfm(struct test *test, int subtest); const char *test__pfm_subtest_get_desc(int subtest); int test__pfm_subtest_get_nr(void); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index bd77825fd5a1..35b82caf8090 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -759,7 +759,7 @@ static int annotate_browser__run(struct annotate_browser *browser, continue; case 'k': notes->options->show_linenr = !notes->options->show_linenr; - break; + continue; case 'H': nd = browser->curr_hot; break; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e2563d0154eb..e3e12f9d4733 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -135,6 +135,7 @@ perf-y += clockid.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o @@ -172,6 +173,7 @@ perf-$(CONFIG_ZSTD) += zstd.o perf-$(CONFIG_LIBCAP) += cap.o +perf-y += demangle-ocaml.o perf-y += demangle-java.o perf-y += demangle-rust.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ce8c07bc8c56..e60841b86d27 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -321,12 +321,18 @@ bool ins__is_call(const struct ins *ins) /* * Prevents from matching commas in the comment section, e.g.: * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast + * + * and skip comma as part of function arguments, e.g.: + * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc> */ static inline const char *validate_comma(const char *c, struct ins_operands *ops) { if (ops->raw_comment && c > ops->raw_comment) return NULL; + if (ops->raw_func_start && c > ops->raw_func_start) + return NULL; + return c; } @@ -341,6 +347,8 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s u64 start, end; ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->raw_func_start = strchr(ops->raw, '<'); + c = validate_comma(c, ops); /* diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0a0cd4f32175..096cdaf21b01 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -32,6 +32,7 @@ struct ins { struct ins_operands { char *raw; char *raw_comment; + char *raw_func_start; struct { char *raw; char *name; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 90d575cee1b9..32fe41835fa6 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -172,12 +172,22 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.from_ip = ip; else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH) decoder->record.to_ip = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) + decoder->record.virt_addr = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) + decoder->record.phys_addr = ip; break; case ARM_SPE_COUNTER: break; case ARM_SPE_CONTEXT: break; case ARM_SPE_OP_TYPE: + if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) { + if (payload & 0x1) + decoder->record.op = ARM_SPE_ST; + else + decoder->record.op = ARM_SPE_LD; + } break; case ARM_SPE_EVENTS: if (payload & BIT(EV_L1D_REFILL)) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 24727b8ca7ff..59bdb7309674 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -24,12 +24,20 @@ enum arm_spe_sample_type { ARM_SPE_REMOTE_ACCESS = 1 << 7, }; +enum arm_spe_op_type { + ARM_SPE_LD = 1 << 0, + ARM_SPE_ST = 1 << 1, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; + u32 op; u64 from_ip; u64 to_ip; u64 timestamp; + u64 virt_addr; + u64 phys_addr; }; struct arm_spe_insn; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 8901a1656a41..2539d4baec44 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -53,6 +53,7 @@ struct arm_spe { u8 sample_tlb; u8 sample_branch; u8 sample_remote_access; + u8 sample_memory; u64 l1d_miss_id; u64 l1d_access_id; @@ -62,6 +63,7 @@ struct arm_spe { u64 tlb_access_id; u64 branch_miss_id; u64 remote_access_id; + u64 memory_id; u64 kernel_start; @@ -235,7 +237,6 @@ static void arm_spe_prep_sample(struct arm_spe *spe, sample->cpumode = arm_spe_cpumode(spe, sample->ip); sample->pid = speq->pid; sample->tid = speq->tid; - sample->addr = record->to_ip; sample->period = 1; sample->cpu = speq->cpu; @@ -259,11 +260,11 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, return ret; } -static int -arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq, - u64 spe_events_id) +static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, + u64 spe_events_id, u64 data_src) { struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; struct perf_sample sample = { .ip = 0, }; @@ -271,27 +272,102 @@ arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; + sample.addr = record->virt_addr; + sample.phys_addr = record->phys_addr; + sample.data_src = data_src; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } +static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, + u64 spe_events_id) +{ + struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; + union perf_event *event = speq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + arm_spe_prep_sample(spe, speq, event, &sample); + + sample.id = spe_events_id; + sample.stream_id = spe_events_id; + sample.addr = record->to_ip; + + return arm_spe_deliver_synth_event(spe, speq, event, &sample); +} + +#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ + ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ + ARM_SPE_REMOTE_ACCESS) + +static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) +{ + if (type & SPE_MEM_TYPE) + return true; + + return false; +} + +static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) +{ + union perf_mem_data_src data_src = { 0 }; + + if (record->op == ARM_SPE_LD) + data_src.mem_op = PERF_MEM_OP_LOAD; + else + data_src.mem_op = PERF_MEM_OP_STORE; + + if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { + data_src.mem_lvl = PERF_MEM_LVL_L3; + + if (record->type & ARM_SPE_LLC_MISS) + data_src.mem_lvl |= PERF_MEM_LVL_MISS; + else + data_src.mem_lvl |= PERF_MEM_LVL_HIT; + } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { + data_src.mem_lvl = PERF_MEM_LVL_L1; + + if (record->type & ARM_SPE_L1D_MISS) + data_src.mem_lvl |= PERF_MEM_LVL_MISS; + else + data_src.mem_lvl |= PERF_MEM_LVL_HIT; + } + + if (record->type & ARM_SPE_REMOTE_ACCESS) + data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; + + if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { + data_src.mem_dtlb = PERF_MEM_TLB_WK; + + if (record->type & ARM_SPE_TLB_MISS) + data_src.mem_dtlb |= PERF_MEM_TLB_MISS; + else + data_src.mem_dtlb |= PERF_MEM_TLB_HIT; + } + + return data_src.val; +} + static int arm_spe_sample(struct arm_spe_queue *speq) { const struct arm_spe_record *record = &speq->decoder->record; struct arm_spe *spe = speq->spe; + u64 data_src; int err; + data_src = arm_spe__synth_data_source(record); + if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->l1d_miss_id); + err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, + data_src); if (err) return err; } if (record->type & ARM_SPE_L1D_ACCESS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->l1d_access_id); + err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, + data_src); if (err) return err; } @@ -299,15 +375,15 @@ static int arm_spe_sample(struct arm_spe_queue *speq) if (spe->sample_llc) { if (record->type & ARM_SPE_LLC_MISS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->llc_miss_id); + err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, + data_src); if (err) return err; } if (record->type & ARM_SPE_LLC_ACCESS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->llc_access_id); + err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, + data_src); if (err) return err; } @@ -315,31 +391,36 @@ static int arm_spe_sample(struct arm_spe_queue *speq) if (spe->sample_tlb) { if (record->type & ARM_SPE_TLB_MISS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->tlb_miss_id); + err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, + data_src); if (err) return err; } if (record->type & ARM_SPE_TLB_ACCESS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->tlb_access_id); + err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, + data_src); if (err) return err; } } if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { - err = arm_spe_synth_spe_events_sample(speq, - spe->branch_miss_id); + err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); if (err) return err; } if (spe->sample_remote_access && (record->type & ARM_SPE_REMOTE_ACCESS)) { - err = arm_spe_synth_spe_events_sample(speq, - spe->remote_access_id); + err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, + data_src); + if (err) + return err; + } + + if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { + err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; } @@ -803,7 +884,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.type = PERF_TYPE_HARDWARE; attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | - PERF_SAMPLE_PERIOD; + PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC; if (spe->timeless_decoding) attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; else @@ -907,6 +988,16 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; } + if (spe->synth_opts.mem) { + spe->sample_memory = true; + + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->memory_id = id; + arm_spe_set_event_name(evlist, id, "memory"); + } + return 0; } diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a60878498139..953f4afacd3b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -788,6 +788,21 @@ no_opt: return auxtrace_validate_aux_sample_size(evlist, opts); } +void auxtrace_regroup_aux_output(struct evlist *evlist) +{ + struct evsel *evsel, *aux_evsel = NULL; + struct evsel_config_term *term; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_aux_event(evsel)) + aux_evsel = evsel; + term = evsel__get_config_term(evsel, AUX_OUTPUT); + /* If possible, group with the AUX event */ + if (term && aux_evsel) + evlist__regroup(evlist, aux_evsel, evsel); + } +} + struct auxtrace_record *__weak auxtrace_record__init(struct evlist *evlist __maybe_unused, int *err) { diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 7e5c9e1552bd..a4fbb33b7245 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -559,6 +559,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, int auxtrace_parse_sample_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts, const char *str); +void auxtrace_regroup_aux_output(struct evlist *evlist); int auxtrace_record__options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts); @@ -741,6 +742,11 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused, } static inline +void auxtrace_regroup_aux_output(struct evlist *evlist __maybe_unused) +{ +} + +static inline int auxtrace__process_event(struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c new file mode 100644 index 000000000000..04f89120b323 --- /dev/null +++ b/tools/perf/util/bpf_counter.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (c) 2019 Facebook */ + +#include <assert.h> +#include <limits.h> +#include <unistd.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <linux/err.h> +#include <linux/zalloc.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> + +#include "bpf_counter.h" +#include "counts.h" +#include "debug.h" +#include "evsel.h" +#include "target.h" + +#include "bpf_skel/bpf_prog_profiler.skel.h" + +static inline void *u64_to_ptr(__u64 ptr) +{ + return (void *)(unsigned long)ptr; +} + +static void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + +static struct bpf_counter *bpf_counter_alloc(void) +{ + struct bpf_counter *counter; + + counter = zalloc(sizeof(*counter)); + if (counter) + INIT_LIST_HEAD(&counter->list); + return counter; +} + +static int bpf_program_profiler__destroy(struct evsel *evsel) +{ + struct bpf_counter *counter, *tmp; + + list_for_each_entry_safe(counter, tmp, + &evsel->bpf_counter_list, list) { + list_del_init(&counter->list); + bpf_prog_profiler_bpf__destroy(counter->skel); + free(counter); + } + assert(list_empty(&evsel->bpf_counter_list)); + + return 0; +} + +static char *bpf_target_prog_name(int tgt_fd) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_func_info *func_info; + const struct btf_type *t; + char *name = NULL; + struct btf *btf; + + info_linear = bpf_program__get_prog_info_linear( + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + if (IS_ERR_OR_NULL(info_linear)) { + pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd); + return NULL; + } + + if (info_linear->info.btf_id == 0 || + btf__get_from_id(info_linear->info.btf_id, &btf)) { + pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd); + goto out; + } + + func_info = u64_to_ptr(info_linear->info.func_info); + t = btf__type_by_id(btf, func_info[0].type_id); + if (!t) { + pr_debug("btf %d doesn't have type %d\n", + info_linear->info.btf_id, func_info[0].type_id); + goto out; + } + name = strdup(btf__name_by_offset(btf, t->name_off)); +out: + free(info_linear); + return name; +} + +static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id) +{ + struct bpf_prog_profiler_bpf *skel; + struct bpf_counter *counter; + struct bpf_program *prog; + char *prog_name; + int prog_fd; + int err; + + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + pr_err("Failed to open fd for bpf prog %u\n", prog_id); + return -1; + } + counter = bpf_counter_alloc(); + if (!counter) { + close(prog_fd); + return -1; + } + + skel = bpf_prog_profiler_bpf__open(); + if (!skel) { + pr_err("Failed to open bpf skeleton\n"); + goto err_out; + } + + skel->rodata->num_cpu = evsel__nr_cpus(evsel); + + bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel)); + bpf_map__resize(skel->maps.fentry_readings, 1); + bpf_map__resize(skel->maps.accum_readings, 1); + + prog_name = bpf_target_prog_name(prog_fd); + if (!prog_name) { + pr_err("Failed to get program name for bpf prog %u. Does it have BTF?\n", prog_id); + goto err_out; + } + + bpf_object__for_each_program(prog, skel->obj) { + err = bpf_program__set_attach_target(prog, prog_fd, prog_name); + if (err) { + pr_err("bpf_program__set_attach_target failed.\n" + "Does bpf prog %u have BTF?\n", prog_id); + goto err_out; + } + } + set_max_rlimit(); + err = bpf_prog_profiler_bpf__load(skel); + if (err) { + pr_err("bpf_prog_profiler_bpf__load failed\n"); + goto err_out; + } + + assert(skel != NULL); + counter->skel = skel; + list_add(&counter->list, &evsel->bpf_counter_list); + close(prog_fd); + return 0; +err_out: + bpf_prog_profiler_bpf__destroy(skel); + free(counter); + close(prog_fd); + return -1; +} + +static int bpf_program_profiler__load(struct evsel *evsel, struct target *target) +{ + char *bpf_str, *bpf_str_, *tok, *saveptr = NULL, *p; + u32 prog_id; + int ret; + + bpf_str_ = bpf_str = strdup(target->bpf_str); + if (!bpf_str) + return -1; + + while ((tok = strtok_r(bpf_str, ",", &saveptr)) != NULL) { + prog_id = strtoul(tok, &p, 10); + if (prog_id == 0 || prog_id == UINT_MAX || + (*p != '\0' && *p != ',')) { + pr_err("Failed to parse bpf prog ids %s\n", + target->bpf_str); + return -1; + } + + ret = bpf_program_profiler_load_one(evsel, prog_id); + if (ret) { + bpf_program_profiler__destroy(evsel); + free(bpf_str_); + return -1; + } + bpf_str = NULL; + } + free(bpf_str_); + return 0; +} + +static int bpf_program_profiler__enable(struct evsel *evsel) +{ + struct bpf_counter *counter; + int ret; + + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { + assert(counter->skel != NULL); + ret = bpf_prog_profiler_bpf__attach(counter->skel); + if (ret) { + bpf_program_profiler__destroy(evsel); + return ret; + } + } + return 0; +} + +static int bpf_program_profiler__read(struct evsel *evsel) +{ + // perf_cpu_map uses /sys/devices/system/cpu/online + int num_cpu = evsel__nr_cpus(evsel); + // BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible + // Sometimes possible > online, like on a Ryzen 3900X that has 24 + // threads but its possible showed 0-31 -acme + int num_cpu_bpf = libbpf_num_possible_cpus(); + struct bpf_perf_event_value values[num_cpu_bpf]; + struct bpf_counter *counter; + int reading_map_fd; + __u32 key = 0; + int err, cpu; + + if (list_empty(&evsel->bpf_counter_list)) + return -EAGAIN; + + for (cpu = 0; cpu < num_cpu; cpu++) { + perf_counts(evsel->counts, cpu, 0)->val = 0; + perf_counts(evsel->counts, cpu, 0)->ena = 0; + perf_counts(evsel->counts, cpu, 0)->run = 0; + } + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { + struct bpf_prog_profiler_bpf *skel = counter->skel; + + assert(skel != NULL); + reading_map_fd = bpf_map__fd(skel->maps.accum_readings); + + err = bpf_map_lookup_elem(reading_map_fd, &key, values); + if (err) { + pr_err("failed to read value\n"); + return err; + } + + for (cpu = 0; cpu < num_cpu; cpu++) { + perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter; + perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled; + perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running; + } + } + return 0; +} + +static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, + int fd) +{ + struct bpf_prog_profiler_bpf *skel; + struct bpf_counter *counter; + int ret; + + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { + skel = counter->skel; + assert(skel != NULL); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events), + &cpu, &fd, BPF_ANY); + if (ret) + return ret; + } + return 0; +} + +struct bpf_counter_ops bpf_program_profiler_ops = { + .load = bpf_program_profiler__load, + .enable = bpf_program_profiler__enable, + .read = bpf_program_profiler__read, + .destroy = bpf_program_profiler__destroy, + .install_pe = bpf_program_profiler__install_pe, +}; + +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) +{ + if (list_empty(&evsel->bpf_counter_list)) + return 0; + return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); +} + +int bpf_counter__load(struct evsel *evsel, struct target *target) +{ + if (target__has_bpf(target)) + evsel->bpf_counter_ops = &bpf_program_profiler_ops; + + if (evsel->bpf_counter_ops) + return evsel->bpf_counter_ops->load(evsel, target); + return 0; +} + +int bpf_counter__enable(struct evsel *evsel) +{ + if (list_empty(&evsel->bpf_counter_list)) + return 0; + return evsel->bpf_counter_ops->enable(evsel); +} + +int bpf_counter__read(struct evsel *evsel) +{ + if (list_empty(&evsel->bpf_counter_list)) + return -EAGAIN; + return evsel->bpf_counter_ops->read(evsel); +} + +void bpf_counter__destroy(struct evsel *evsel) +{ + if (list_empty(&evsel->bpf_counter_list)) + return; + evsel->bpf_counter_ops->destroy(evsel); + evsel->bpf_counter_ops = NULL; +} diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h new file mode 100644 index 000000000000..2eca210e5dc1 --- /dev/null +++ b/tools/perf/util/bpf_counter.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_BPF_COUNTER_H +#define __PERF_BPF_COUNTER_H 1 + +#include <linux/list.h> + +struct evsel; +struct target; +struct bpf_counter; + +typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); +typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, + struct target *target); +typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, + int cpu, + int fd); + +struct bpf_counter_ops { + bpf_counter_evsel_target_op load; + bpf_counter_evsel_op enable; + bpf_counter_evsel_op read; + bpf_counter_evsel_op destroy; + bpf_counter_evsel_install_pe_op install_pe; +}; + +struct bpf_counter { + void *skel; + struct list_head list; +}; + +#ifdef HAVE_BPF_SKEL + +int bpf_counter__load(struct evsel *evsel, struct target *target); +int bpf_counter__enable(struct evsel *evsel); +int bpf_counter__read(struct evsel *evsel); +void bpf_counter__destroy(struct evsel *evsel); +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); + +#else /* HAVE_BPF_SKEL */ + +#include<linux/err.h> + +static inline int bpf_counter__load(struct evsel *evsel __maybe_unused, + struct target *target __maybe_unused) +{ + return 0; +} + +static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused) +{ + return 0; +} + +static inline int bpf_counter__read(struct evsel *evsel __maybe_unused) +{ + return -EAGAIN; +} + +static inline void bpf_counter__destroy(struct evsel *evsel __maybe_unused) +{ +} + +static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, + int cpu __maybe_unused, + int fd __maybe_unused) +{ + return 0; +} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* __PERF_BPF_COUNTER_H */ diff --git a/tools/perf/util/bpf_skel/.gitignore b/tools/perf/util/bpf_skel/.gitignore new file mode 100644 index 000000000000..5263e9e6c5d8 --- /dev/null +++ b/tools/perf/util/bpf_skel/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +.tmp +*.skel.h
\ No newline at end of file diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c new file mode 100644 index 000000000000..c7cec92d0236 --- /dev/null +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2020 Facebook +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* map of perf event fds, num_cpu * num_metric entries */ +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +/* readings at fentry */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} fentry_readings SEC(".maps"); + +/* accumulated readings */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} accum_readings SEC(".maps"); + +const volatile __u32 num_cpu = 1; + +SEC("fentry/XXX") +int BPF_PROG(fentry_XXX) +{ + __u32 key = bpf_get_smp_processor_id(); + struct bpf_perf_event_value *ptr; + __u32 zero = 0; + long err; + + /* look up before reading, to reduce error */ + ptr = bpf_map_lookup_elem(&fentry_readings, &zero); + if (!ptr) + return 0; + + err = bpf_perf_event_read_value(&events, key, ptr, sizeof(*ptr)); + if (err) + return 0; + + return 0; +} + +static inline void +fexit_update_maps(struct bpf_perf_event_value *after) +{ + struct bpf_perf_event_value *before, diff, *accum; + __u32 zero = 0; + + before = bpf_map_lookup_elem(&fentry_readings, &zero); + /* only account samples with a valid fentry_reading */ + if (before && before->counter) { + struct bpf_perf_event_value *accum; + + diff.counter = after->counter - before->counter; + diff.enabled = after->enabled - before->enabled; + diff.running = after->running - before->running; + + accum = bpf_map_lookup_elem(&accum_readings, &zero); + if (accum) { + accum->counter += diff.counter; + accum->enabled += diff.enabled; + accum->running += diff.running; + } + } +} + +SEC("fexit/XXX") +int BPF_PROG(fexit_XXX) +{ + struct bpf_perf_event_value reading; + __u32 cpu = bpf_get_smp_processor_id(); + __u32 one = 1, zero = 0; + int err; + + /* read all events before updating the maps, to reduce error */ + err = bpf_perf_event_read_value(&events, cpu, &reading, sizeof(reading)); + if (err) + return 0; + + fexit_update_maps(&reading); + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 02df36b30ac5..e32e8f2ff3bd 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -448,7 +448,8 @@ static bool lsdir_bid_tail_filter(const char *name __maybe_unused, int i = 0; while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3) i++; - return (i == SBUILD_ID_SIZE - 3) && (d->d_name[i] == '\0'); + return (i >= SBUILD_ID_MIN_SIZE - 3) && (i <= SBUILD_ID_SIZE - 3) && + (d->d_name[i] == '\0'); } struct strlist *build_id_cache__list_all(bool validonly) @@ -490,7 +491,7 @@ struct strlist *build_id_cache__list_all(bool validonly) } strlist__for_each_entry(nd2, linklist) { if (snprintf(sbuild_id, SBUILD_ID_SIZE, "%s%s", - nd->s, nd2->s) != SBUILD_ID_SIZE - 1) + nd->s, nd2->s) > SBUILD_ID_SIZE - 1) goto err_out; if (validonly && !build_id_cache__valid_id(sbuild_id)) continue; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 02613f4b2c29..c19617151670 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -2,8 +2,10 @@ #ifndef PERF_BUILD_ID_H_ #define PERF_BUILD_ID_H_ 1 -#define BUILD_ID_SIZE 20 +#define BUILD_ID_SIZE 20 /* SHA-1 length in bytes */ +#define BUILD_ID_MIN_SIZE 16 /* MD5/UUID/GUID length in bytes */ #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) +#define SBUILD_ID_MIN_SIZE (BUILD_ID_MIN_SIZE * 2 + 1) #include "machine.h" #include "tool.h" diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 5dff7e489921..f24ab4585553 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -161,7 +161,7 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup) /* helper function for ftw() in match_cgroups and list_cgroups */ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused, - int typeflag) + int typeflag, struct FTW *ftwbuf __maybe_unused) { struct cgroup_name *cn; @@ -209,12 +209,12 @@ static int list_cgroups(const char *str) if (!s) return -1; /* pretend if it's added by ftw() */ - ret = add_cgroup_name(s, NULL, FTW_D); + ret = add_cgroup_name(s, NULL, FTW_D, NULL); free(s); if (ret) return -1; } else { - if (add_cgroup_name("", NULL, FTW_D) < 0) + if (add_cgroup_name("", NULL, FTW_D, NULL) < 0) return -1; } @@ -247,7 +247,7 @@ static int match_cgroups(const char *str) prefix_len = strlen(mnt); /* collect all cgroups in the cgroup_list */ - if (ftw(mnt, add_cgroup_name, 20) < 0) + if (nftw(mnt, add_cgroup_name, 20, 0) < 0) return -1; for (;;) { diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 6969f82843ee..6984c77068a3 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -489,7 +489,7 @@ int perf_default_config(const char *var, const char *value, return 0; } -int perf_config_from_file(config_fn_t fn, const char *filename, void *data) +static int perf_config_from_file(config_fn_t fn, const char *filename, void *data) { int ret; FILE *f = fopen(filename, "r"); @@ -521,16 +521,66 @@ static int perf_env_bool(const char *k, int def) return v ? perf_config_bool(k, v) : def; } -static int perf_config_system(void) +int perf_config_system(void) { return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); } -static int perf_config_global(void) +int perf_config_global(void) { return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); } +static char *home_perfconfig(void) +{ + const char *home = NULL; + char *config; + struct stat st; + + home = getenv("HOME"); + + /* + * Skip reading user config if: + * - there is no place to read it from (HOME) + * - we are asked not to (PERF_CONFIG_NOGLOBAL=1) + */ + if (!home || !*home || !perf_config_global()) + return NULL; + + config = strdup(mkpath("%s/.perfconfig", home)); + if (config == NULL) { + pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home); + return NULL; + } + + if (stat(config, &st) < 0) + goto out_free; + + if (st.st_uid && (st.st_uid != geteuid())) { + pr_warning("File %s not owned by current user or root, ignoring it.", config); + goto out_free; + } + + if (st.st_size) + return config; + +out_free: + free(config); + return NULL; +} + +const char *perf_home_perfconfig(void) +{ + static const char *config; + static bool failed; + + config = failed ? NULL : home_perfconfig(); + if (!config) + failed = true; + + return config; +} + static struct perf_config_section *find_section(struct list_head *sections, const char *section_name) { @@ -676,9 +726,6 @@ int perf_config_set__collect(struct perf_config_set *set, const char *file_name, static int perf_config_set__init(struct perf_config_set *set) { int ret = -1; - const char *home = NULL; - char *user_config; - struct stat st; /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ if (config_exclusive_filename) @@ -687,41 +734,11 @@ static int perf_config_set__init(struct perf_config_set *set) if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0) goto out; } - - home = getenv("HOME"); - - /* - * Skip reading user config if: - * - there is no place to read it from (HOME) - * - we are asked not to (PERF_CONFIG_NOGLOBAL=1) - */ - if (!home || !*home || !perf_config_global()) - return 0; - - user_config = strdup(mkpath("%s/.perfconfig", home)); - if (user_config == NULL) { - pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home); - goto out; - } - - if (stat(user_config, &st) < 0) { - if (errno == ENOENT) - ret = 0; - goto out_free; - } - - ret = 0; - - if (st.st_uid && (st.st_uid != geteuid())) { - pr_warning("File %s not owned by current user or root, ignoring it.", user_config); - goto out_free; + if (perf_config_global() && perf_home_perfconfig()) { + if (perf_config_from_file(collect_config, perf_home_perfconfig(), set) < 0) + goto out; } - if (st.st_size) - ret = perf_config_from_file(collect_config, user_config, set); - -out_free: - free(user_config); out: return ret; } @@ -738,6 +755,18 @@ struct perf_config_set *perf_config_set__new(void) return set; } +struct perf_config_set *perf_config_set__load_file(const char *file) +{ + struct perf_config_set *set = zalloc(sizeof(*set)); + + if (set) { + INIT_LIST_HEAD(&set->sections); + perf_config_from_file(collect_config, file, set); + } + + return set; +} + static int perf_config__init(void) { if (config_set == NULL) @@ -746,17 +775,15 @@ static int perf_config__init(void) return config_set == NULL; } -int perf_config(config_fn_t fn, void *data) +int perf_config_set(struct perf_config_set *set, + config_fn_t fn, void *data) { int ret = 0; char key[BUFSIZ]; struct perf_config_section *section; struct perf_config_item *item; - if (config_set == NULL && perf_config__init()) - return -1; - - perf_config_set__for_each_entry(config_set, section, item) { + perf_config_set__for_each_entry(set, section, item) { char *value = item->value; if (value) { @@ -778,6 +805,14 @@ out: return ret; } +int perf_config(config_fn_t fn, void *data) +{ + if (config_set == NULL && perf_config__init()) + return -1; + + return perf_config_set(config_set, fn, data); +} + void perf_config__exit(void) { perf_config_set__delete(config_set); diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 8c881e3a3ec3..2fd77aaff4d2 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -27,17 +27,22 @@ extern const char *config_exclusive_filename; typedef int (*config_fn_t)(const char *, const char *, void *); -int perf_config_from_file(config_fn_t fn, const char *filename, void *data); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); +int perf_config_set(struct perf_config_set *set, + config_fn_t fn, void *data); int perf_config_int(int *dest, const char *, const char *); int perf_config_u8(u8 *dest, const char *name, const char *value); int perf_config_u64(u64 *dest, const char *, const char *); int perf_config_bool(const char *, const char *); int config_error_nonbool(const char *); const char *perf_etc_perfconfig(void); +const char *perf_home_perfconfig(void); +int perf_config_system(void); +int perf_config_global(void); struct perf_config_set *perf_config_set__new(void); +struct perf_config_set *perf_config_set__load_file(const char *file); void perf_config_set__delete(struct perf_config_set *set); int perf_config_set__collect(struct perf_config_set *set, const char *file_name, const char *var, const char *value); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index cd007cc9c283..3f4bc4050477 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -419,19 +419,10 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq, packet->last_instr_subtype = elem->last_i_subtype; packet->last_instr_cond = elem->last_instr_cond; - switch (elem->last_i_type) { - case OCSD_INSTR_BR: - case OCSD_INSTR_BR_INDIRECT: + if (elem->last_i_type == OCSD_INSTR_BR || elem->last_i_type == OCSD_INSTR_BR_INDIRECT) packet->last_instr_taken_branch = elem->last_instr_exec; - break; - case OCSD_INSTR_ISB: - case OCSD_INSTR_DSB_DMB: - case OCSD_INSTR_WFI_WFE: - case OCSD_INSTR_OTHER: - default: + else packet->last_instr_taken_branch = false; - break; - } packet->last_instr_size = elem->last_instr_sz; @@ -572,6 +563,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( case OCSD_GEN_TRC_ELEM_EVENT: case OCSD_GEN_TRC_ELEM_SWTRACE: case OCSD_GEN_TRC_ELEM_CUSTOM: + case OCSD_GEN_TRC_ELEM_SYNC_MARKER: + case OCSD_GEN_TRC_ELEM_MEMTRANS: default: break; } diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 27c5fef9ad54..8b67bd97d122 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -948,7 +948,7 @@ static char *change_name(char *name, char *orig_name, int dup) goto out; /* * Add '_' prefix to potential keywork. According to - * Mathieu Desnoyers (https://lkml.org/lkml/2015/1/23/652), + * Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com), * futher CTF spec updating may require us to use '$'. */ if (dup < 0) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index db7447154622..5cd189172525 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -438,6 +438,8 @@ static struct { {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "transaction abort"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "trace begin"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "trace end"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vm entry"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vm exit"}, {0, NULL} }; diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 50fd6a4be4e0..2c06abf6dcd2 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -10,6 +10,7 @@ #include <api/debug.h> #include <linux/kernel.h> #include <linux/time64.h> +#include <sys/time.h> #ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #endif @@ -31,21 +32,48 @@ int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; static FILE *debug_file; +bool debug_display_time; void debug_set_file(FILE *file) { debug_file = file; } +void debug_set_display_time(bool set) +{ + debug_display_time = set; +} + +static int fprintf_time(FILE *file) +{ + struct timeval tod; + struct tm ltime; + char date[64]; + + if (!debug_display_time) + return 0; + + if (gettimeofday(&tod, NULL) != 0) + return 0; + + if (localtime_r(&tod.tv_sec, <ime) == NULL) + return 0; + + strftime(date, sizeof(date), "%F %H:%M:%S", <ime); + return fprintf(file, "[%s.%06lu] ", date, (long)tod.tv_usec); +} + int veprintf(int level, int var, const char *fmt, va_list args) { int ret = 0; if (var >= level) { - if (use_browser >= 1 && !redirect_to_stderr) + if (use_browser >= 1 && !redirect_to_stderr) { ui_helpline__vshow(fmt, args); - else - ret = vfprintf(debug_file, fmt, args); + } else { + ret = fprintf_time(debug_file); + ret += vfprintf(debug_file, fmt, args); + } } return ret; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 43f712295645..48f631966067 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -64,6 +64,7 @@ int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); void debug_set_file(FILE *file); +void debug_set_display_time(bool set); void perf_debug_setup(void); int perf_quiet_option(void); diff --git a/tools/perf/util/demangle-ocaml.c b/tools/perf/util/demangle-ocaml.c new file mode 100644 index 000000000000..3df14e67c622 --- /dev/null +++ b/tools/perf/util/demangle-ocaml.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include <stdlib.h> +#include "util/string2.h" + +#include "demangle-ocaml.h" + +#include <linux/ctype.h> + +static const char *caml_prefix = "caml"; +static const size_t caml_prefix_len = 4; + +/* mangled OCaml symbols start with "caml" followed by an upper-case letter */ +static bool +ocaml_is_mangled(const char *sym) +{ + return 0 == strncmp(sym, caml_prefix, caml_prefix_len) + && isupper(sym[caml_prefix_len]); +} + +/* + * input: + * sym: a symbol which may have been mangled by the OCaml compiler + * return: + * if the input doesn't look like a mangled OCaml symbol, NULL is returned + * otherwise, a newly allocated string containing the demangled symbol is returned + */ +char * +ocaml_demangle_sym(const char *sym) +{ + char *result; + int j = 0; + int i; + int len; + + if (!ocaml_is_mangled(sym)) { + return NULL; + } + + len = strlen(sym); + + /* the demangled symbol is always smaller than the mangled symbol */ + result = malloc(len + 1); + if (!result) + return NULL; + + /* skip "caml" prefix */ + i = caml_prefix_len; + + while (i < len) { + if (sym[i] == '_' && sym[i + 1] == '_') { + /* "__" -> "." */ + result[j++] = '.'; + i += 2; + } + else if (sym[i] == '$' && isxdigit(sym[i + 1]) && isxdigit(sym[i + 2])) { + /* "$xx" is a hex-encoded character */ + result[j++] = (hex(sym[i + 1]) << 4) | hex(sym[i + 2]); + i += 3; + } + else { + result[j++] = sym[i++]; + } + } + result[j] = '\0'; + + /* scan backwards to remove an "_" followed by decimal digits */ + if (j != 0 && isdigit(result[j - 1])) { + while (--j) { + if (!isdigit(result[j])) { + break; + } + } + if (result[j] == '_') { + result[j] = '\0'; + } + } + + return result; +} diff --git a/tools/perf/util/demangle-ocaml.h b/tools/perf/util/demangle-ocaml.h new file mode 100644 index 000000000000..843cc4fa10a6 --- /dev/null +++ b/tools/perf/util/demangle-ocaml.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_DEMANGLE_OCAML +#define __PERF_DEMANGLE_OCAML 1 + +char * ocaml_demangle_sym(const char *str); + +#endif /* __PERF_DEMANGLE_OCAML */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 05616d4138a9..ac706304afe9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -288,17 +288,36 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) { - return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 - " %02x:%02x %"PRI_lu64" %"PRI_lu64"]: %c%c%c%c %s\n", - event->mmap2.pid, event->mmap2.tid, event->mmap2.start, - event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, - event->mmap2.min, event->mmap2.ino, - event->mmap2.ino_generation, - (event->mmap2.prot & PROT_READ) ? 'r' : '-', - (event->mmap2.prot & PROT_WRITE) ? 'w' : '-', - (event->mmap2.prot & PROT_EXEC) ? 'x' : '-', - (event->mmap2.flags & MAP_SHARED) ? 's' : 'p', - event->mmap2.filename); + if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { + char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; + + build_id__init(&bid, event->mmap2.build_id, + event->mmap2.build_id_size); + build_id__sprintf(&bid, sbuild_id); + + return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 + " <%s>]: %c%c%c%c %s\n", + event->mmap2.pid, event->mmap2.tid, event->mmap2.start, + event->mmap2.len, event->mmap2.pgoff, sbuild_id, + (event->mmap2.prot & PROT_READ) ? 'r' : '-', + (event->mmap2.prot & PROT_WRITE) ? 'w' : '-', + (event->mmap2.prot & PROT_EXEC) ? 'x' : '-', + (event->mmap2.flags & MAP_SHARED) ? 's' : 'p', + event->mmap2.filename); + } else { + return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 + " %02x:%02x %"PRI_lu64" %"PRI_lu64"]: %c%c%c%c %s\n", + event->mmap2.pid, event->mmap2.tid, event->mmap2.start, + event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, + event->mmap2.min, event->mmap2.ino, + event->mmap2.ino_generation, + (event->mmap2.prot & PROT_READ) ? 'r' : '-', + (event->mmap2.prot & PROT_WRITE) ? 'w' : '-', + (event->mmap2.prot & PROT_EXEC) ? 'x' : '-', + (event->mmap2.flags & MAP_SHARED) ? 's' : 'p', + event->mmap2.filename); + } } size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp) @@ -626,6 +645,19 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, return al->sym; } +static bool check_address_range(struct intlist *addr_list, int addr_range, + unsigned long addr) +{ + struct int_node *pos; + + intlist__for_each_entry(pos, addr_list) { + if (addr >= pos->i && addr < pos->i + addr_range) + return true; + } + + return false; +} + /* * Callers need to drop the reference to al->thread, obtained in * machine__findnew_thread() @@ -673,6 +705,8 @@ int machine__resolve(struct machine *machine, struct addr_location *al, } al->sym = map__find_symbol(al->map, al->addr); + } else if (symbol_conf.dso_list) { + al->filtered |= (1 << HIST_FILTER__DSO); } if (symbol_conf.sym_list) { @@ -690,6 +724,17 @@ int machine__resolve(struct machine *machine, struct addr_location *al, ret = strlist__has_entry(symbol_conf.sym_list, al_addr_str); } + if (!ret && symbol_conf.addr_list && al->map) { + unsigned long addr = al->map->unmap_ip(al->map, al->addr); + + ret = intlist__has_entry(symbol_conf.addr_list, addr); + if (!ret && symbol_conf.addr_range) { + ret = check_address_range(symbol_conf.addr_list, + symbol_conf.addr_range, + addr); + } + } + if (!ret) al->filtered |= (1 << HIST_FILTER__SYMBOL); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index ff403ea578e1..f603edbbbc6f 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -96,6 +96,8 @@ enum { PERF_IP_FLAG_TRACE_BEGIN = 1ULL << 8, PERF_IP_FLAG_TRACE_END = 1ULL << 9, PERF_IP_FLAG_IN_TX = 1ULL << 10, + PERF_IP_FLAG_VMENTRY = 1ULL << 11, + PERF_IP_FLAG_VMEXIT = 1ULL << 12, }; #define PERF_IP_FLAG_CHARS "bcrosyiABEx" @@ -110,7 +112,9 @@ enum { PERF_IP_FLAG_INTERRUPT |\ PERF_IP_FLAG_TX_ABORT |\ PERF_IP_FLAG_TRACE_BEGIN |\ - PERF_IP_FLAG_TRACE_END) + PERF_IP_FLAG_TRACE_END |\ + PERF_IP_FLAG_VMENTRY |\ + PERF_IP_FLAG_VMEXIT) #define MAX_INSN 16 @@ -136,11 +140,13 @@ struct perf_sample { u64 data_src; u64 phys_addr; u64 data_page_size; + u64 code_page_size; u64 cgroup; u32 flags; u16 insn_len; u8 cpumode; u16 misc; + u16 ins_lat; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; @@ -171,6 +177,7 @@ enum perf_synth_id { PERF_SYNTH_INTEL_EXSTOP, PERF_SYNTH_INTEL_PWRX, PERF_SYNTH_INTEL_CBR, + PERF_SYNTH_INTEL_PSB, }; /* @@ -263,6 +270,12 @@ struct perf_synth_intel_cbr { u32 reserved3; }; +struct perf_synth_intel_psb { + u32 padding; + u32 reserved; + u64 offset; +}; + /* * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get * 8-byte alignment. @@ -412,4 +425,7 @@ extern unsigned int proc_map_timeout; #define PAGE_SIZE_NAME_LEN 32 char *get_page_size_name(u64 size, char *str); +void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); +void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); + #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 05363a7247c4..5121b4db66fe 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -24,6 +24,7 @@ #include "bpf-event.h" #include "util/string2.h" #include "util/perf_api_probe.h" +#include "util/evsel_fprintf.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -303,6 +304,11 @@ int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *a return evlist__add_attrs(evlist, attrs, nr_attrs); } +__weak int arch_evlist__add_default_attrs(struct evlist *evlist __maybe_unused) +{ + return 0; +} + struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id) { struct evsel *evsel; @@ -572,6 +578,14 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask); } +#ifdef HAVE_EVENTFD_SUPPORT +int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd) +{ + return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, + fdarray_flag__nonfilterable); +} +#endif + int evlist__poll(struct evlist *evlist, int timeout) { return perf_evlist__poll(&evlist->core, timeout); @@ -1936,6 +1950,15 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) { *cmd = EVLIST_CTL_CMD_SNAPSHOT; pr_debug("is snapshot\n"); + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_EVLIST_TAG, + (sizeof(EVLIST_CTL_CMD_EVLIST_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_EVLIST; + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_STOP_TAG, + (sizeof(EVLIST_CTL_CMD_STOP_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_STOP; + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_PING_TAG, + (sizeof(EVLIST_CTL_CMD_PING_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_PING; } } @@ -1957,6 +1980,98 @@ int evlist__ctlfd_ack(struct evlist *evlist) return err; } +static int get_cmd_arg(char *cmd_data, size_t cmd_size, char **arg) +{ + char *data = cmd_data + cmd_size; + + /* no argument */ + if (!*data) + return 0; + + /* there's argument */ + if (*data == ' ') { + *arg = data + 1; + return 1; + } + + /* malformed */ + return -1; +} + +static int evlist__ctlfd_enable(struct evlist *evlist, char *cmd_data, bool enable) +{ + struct evsel *evsel; + char *name; + int err; + + err = get_cmd_arg(cmd_data, + enable ? sizeof(EVLIST_CTL_CMD_ENABLE_TAG) - 1 : + sizeof(EVLIST_CTL_CMD_DISABLE_TAG) - 1, + &name); + if (err < 0) { + pr_info("failed: wrong command\n"); + return -1; + } + + if (err) { + evsel = evlist__find_evsel_by_str(evlist, name); + if (evsel) { + if (enable) + evlist__enable_evsel(evlist, name); + else + evlist__disable_evsel(evlist, name); + pr_info("Event %s %s\n", evsel->name, + enable ? "enabled" : "disabled"); + } else { + pr_info("failed: can't find '%s' event\n", name); + } + } else { + if (enable) { + evlist__enable(evlist); + pr_info(EVLIST_ENABLED_MSG); + } else { + evlist__disable(evlist); + pr_info(EVLIST_DISABLED_MSG); + } + } + + return 0; +} + +static int evlist__ctlfd_list(struct evlist *evlist, char *cmd_data) +{ + struct perf_attr_details details = { .verbose = false, }; + struct evsel *evsel; + char *arg; + int err; + + err = get_cmd_arg(cmd_data, + sizeof(EVLIST_CTL_CMD_EVLIST_TAG) - 1, + &arg); + if (err < 0) { + pr_info("failed: wrong command\n"); + return -1; + } + + if (err) { + if (!strcmp(arg, "-v")) { + details.verbose = true; + } else if (!strcmp(arg, "-g")) { + details.event_group = true; + } else if (!strcmp(arg, "-F")) { + details.freq = true; + } else { + pr_info("failed: wrong command\n"); + return -1; + } + } + + evlist__for_each_entry(evlist, evsel) + evsel__fprintf(evsel, &details, stderr); + + return 0; +} + int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) { int err = 0; @@ -1973,12 +2088,16 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) if (err > 0) { switch (*cmd) { case EVLIST_CTL_CMD_ENABLE: - evlist__enable(evlist); - break; case EVLIST_CTL_CMD_DISABLE: - evlist__disable(evlist); + err = evlist__ctlfd_enable(evlist, cmd_data, + *cmd == EVLIST_CTL_CMD_ENABLE); + break; + case EVLIST_CTL_CMD_EVLIST: + err = evlist__ctlfd_list(evlist, cmd_data); break; case EVLIST_CTL_CMD_SNAPSHOT: + case EVLIST_CTL_CMD_STOP: + case EVLIST_CTL_CMD_PING: break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 1aae75895dea..b695ffaae519 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -110,6 +110,8 @@ int __evlist__add_default_attrs(struct evlist *evlist, #define evlist__add_default_attrs(evlist, array) \ __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) +int arch_evlist__add_default_attrs(struct evlist *evlist); + int evlist__add_dummy(struct evlist *evlist); int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, @@ -142,6 +144,10 @@ struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char int evlist__add_pollfd(struct evlist *evlist, int fd); int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); +#ifdef HAVE_EVENTFD_SUPPORT +int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd); +#endif + int evlist__poll(struct evlist *evlist, int timeout); struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id); @@ -330,6 +336,9 @@ struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evse #define EVLIST_CTL_CMD_DISABLE_TAG "disable" #define EVLIST_CTL_CMD_ACK_TAG "ack\n" #define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot" +#define EVLIST_CTL_CMD_EVLIST_TAG "evlist" +#define EVLIST_CTL_CMD_STOP_TAG "stop" +#define EVLIST_CTL_CMD_PING_TAG "ping" #define EVLIST_CTL_CMD_MAX_LEN 64 @@ -339,6 +348,9 @@ enum evlist_ctl_cmd { EVLIST_CTL_CMD_DISABLE, EVLIST_CTL_CMD_ACK, EVLIST_CTL_CMD_SNAPSHOT, + EVLIST_CTL_CMD_EVLIST, + EVLIST_CTL_CMD_STOP, + EVLIST_CTL_CMD_PING, }; int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c26ea82220bd..1bf76864c4f2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -25,6 +25,7 @@ #include <stdlib.h> #include <perf/evsel.h> #include "asm/bug.h" +#include "bpf_counter.h" #include "callchain.h" #include "cgroup.h" #include "counts.h" @@ -247,6 +248,7 @@ void evsel__init(struct evsel *evsel, evsel->bpf_obj = NULL; evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->config_terms); + INIT_LIST_HEAD(&evsel->bpf_counter_list); perf_evsel__object.init(evsel); evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel__calc_id_pos(evsel); @@ -1012,6 +1014,11 @@ struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evs return found_term; } +void __weak arch_evsel__set_sample_weight(struct evsel *evsel) +{ + evsel__set_sample_bit(evsel, WEIGHT); +} + /* * The enable_on_exec/disabled value strategy: * @@ -1166,12 +1173,14 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, } if (opts->sample_weight) - evsel__set_sample_bit(evsel, WEIGHT); + arch_evsel__set_sample_weight(evsel); + + attr->task = track; + attr->mmap = track; + attr->mmap2 = track && !perf_missing_features.mmap2; + attr->comm = track; + attr->build_id = track && opts->build_id; - attr->task = track; - attr->mmap = track; - attr->mmap2 = track && !perf_missing_features.mmap2; - attr->comm = track; /* * ksymbol is tracked separately with text poke because it needs to be * system wide and enabled immediately. @@ -1191,6 +1200,9 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->sample_data_page_size) evsel__set_sample_bit(evsel, DATA_PAGE_SIZE); + if (opts->sample_code_page_size) + evsel__set_sample_bit(evsel, CODE_PAGE_SIZE); + if (opts->record_switch_events) attr->context_switch = track; @@ -1366,6 +1378,7 @@ void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); + bpf_counter__destroy(evsel); evsel__free_counts(evsel); perf_evsel__free_fd(&evsel->core); perf_evsel__free_id(&evsel->core); @@ -1735,6 +1748,10 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, } fallback_missing_features: + if (perf_missing_features.weight_struct) { + evsel__set_sample_bit(evsel, WEIGHT); + evsel__reset_sample_bit(evsel, WEIGHT_STRUCT); + } if (perf_missing_features.clockid_wrong) evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */ if (perf_missing_features.clockid) { @@ -1781,6 +1798,8 @@ retry_open: FD(evsel, cpu, thread) = fd; + bpf_counter__install_pe(evsel, cpu, fd); + if (unlikely(test_attr__enabled)) { test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], fd, group_fd, flags); @@ -1873,7 +1892,17 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.data_page_size && + if (!perf_missing_features.weight_struct && + (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { + perf_missing_features.weight_struct = true; + pr_debug2("switching off weight struct support\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.code_page_size && + (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { + perf_missing_features.code_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); + goto out_close; + } else if (!perf_missing_features.data_page_size && (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { perf_missing_features.data_page_size = true; pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); @@ -2076,6 +2105,13 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size) return 0; } +void __weak arch_perf_parse_sample_weight(struct perf_sample *data, + const __u64 *array, + u64 type __maybe_unused) +{ + data->weight = *array; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2316,9 +2352,9 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, } } - if (type & PERF_SAMPLE_WEIGHT) { + if (type & PERF_SAMPLE_WEIGHT_TYPE) { OVERFLOW_CHECK_u64(array); - data->weight = *array; + arch_perf_parse_sample_weight(data, array, type); array++; } @@ -2369,6 +2405,12 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + data->code_page_size = 0; + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) { + data->code_page_size = *array; + array++; + } + if (type & PERF_SAMPLE_AUX) { OVERFLOW_CHECK_u64(array); sz = *array++; @@ -2678,6 +2720,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, "We found oprofile daemon running, please stop it and try again."); break; case EINVAL: + if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size) + return scnprintf(msg, size, "Asking for the code page size isn't supported by this kernel."); if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size) return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel."); if (evsel->core.attr.write_backward && perf_missing_features.write_backward) @@ -2689,6 +2733,9 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, if (perf_missing_features.aux_output) return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel."); break; + case ENODATA: + return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. " + "Please add an auxiliary event in front of the load latency event."); default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index cd1d8dd43199..4e8e49fb7e9d 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -17,6 +17,8 @@ struct cgroup; struct perf_counts; struct perf_stat_evsel; union perf_event; +struct bpf_counter_ops; +struct target; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -127,6 +129,8 @@ struct evsel { * See also evsel__has_callchain(). */ __u64 synth_sample_type; + struct list_head bpf_counter_list; + struct bpf_counter_ops *bpf_counter_ops; }; struct perf_missing_features { @@ -145,6 +149,8 @@ struct perf_missing_features { bool branch_hw_idx; bool cgroup; bool data_page_size; + bool code_page_size; + bool weight_struct; }; extern struct perf_missing_features perf_missing_features; @@ -239,6 +245,8 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); +void arch_evsel__set_sample_weight(struct evsel *evsel); + int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); int evsel__append_addr_filter(struct evsel *evsel, const char *filter); @@ -424,4 +432,5 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel) struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index fb498a723a00..bfedd7b23521 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -100,6 +100,7 @@ out: return ++printed; } +#ifndef PYTHON_PERF int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, unsigned int print_opts, struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp) @@ -239,3 +240,4 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, return printed; } +#endif /* PYTHON_PERF */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 062383e225a3..4fe9e2a54346 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3323,6 +3323,14 @@ int perf_session__write_header(struct perf_session *session, attr_offset = lseek(ff.fd, 0, SEEK_CUR); evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.size < sizeof(evsel->core.attr)) { + /* + * We are likely in "perf inject" and have read + * from an older file. Update attr size so that + * reader gets the right offset to the ids. + */ + evsel->core.attr.size = sizeof(evsel->core.attr); + } f_attr = (struct perf_file_attr){ .attr = evsel->core.attr, .ids = { @@ -3798,7 +3806,7 @@ int perf_session__read_header(struct perf_session *session) * check for the pipe header regardless of source. */ err = perf_header__read_pipe(session); - if (!err || (err && perf_data__is_pipe(data))) { + if (!err || perf_data__is_pipe(data)) { data->is_pipe = true; return err; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a08fb9ea411b..c82f5fc26af8 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -208,10 +208,14 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3); hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); + hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); + hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else hists__new_col_len(hists, HISTC_TIME, 12); + hists__new_col_len(hists, HISTC_CODE_PAGE_SIZE, 6); if (h->srcline) { len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header)); @@ -285,12 +289,13 @@ static long hist_time(unsigned long htime) } static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight) + u64 weight, u64 ins_lat) { he_stat->period += period; he_stat->weight += weight; he_stat->nr_events += 1; + he_stat->ins_lat += ins_lat; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -302,6 +307,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_guest_us += src->period_guest_us; dest->nr_events += src->nr_events; dest->weight += src->weight; + dest->ins_lat += src->ins_lat; } static void he_stat__decay(struct he_stat *he_stat) @@ -590,6 +596,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, int64_t cmp; u64 period = entry->stat.period; u64 weight = entry->stat.weight; + u64 ins_lat = entry->stat.ins_lat; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -608,11 +615,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight); + he_stat__add_period(&he->stat, period, weight, ins_lat); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight); + he_stat__add_period(he->stat_acc, period, weight, ins_lat); /* * This mem info was allocated from sample__resolve_mem @@ -718,10 +725,12 @@ __hists__add_entry(struct hists *hists, .cpumode = al->cpumode, .ip = al->addr, .level = al->level, + .code_page_size = sample->code_page_size, .stat = { .nr_events = 1, .period = sample->period, .weight = sample->weight, + .ins_lat = sample->ins_lat, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 14f66330923d..3c537232294b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -53,6 +53,7 @@ enum hist_column { HISTC_DSO_TO, HISTC_LOCAL_WEIGHT, HISTC_GLOBAL_WEIGHT, + HISTC_CODE_PAGE_SIZE, HISTC_MEM_DADDR_SYMBOL, HISTC_MEM_DADDR_DSO, HISTC_MEM_PHYS_DADDR, @@ -71,6 +72,9 @@ enum hist_column { HISTC_SYM_SIZE, HISTC_DSO_SIZE, HISTC_SYMBOL_IPC, + HISTC_MEM_BLOCKED, + HISTC_LOCAL_INS_LAT, + HISTC_GLOBAL_INS_LAT, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 697513f35154..8c59677bee13 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -24,6 +24,13 @@ #include "intel-pt-decoder.h" #include "intel-pt-log.h" +#define BITULL(x) (1ULL << (x)) + +/* IA32_RTIT_CTL MSR bits */ +#define INTEL_PT_CYC_ENABLE BITULL(1) +#define INTEL_PT_CYC_THRESHOLD (BITULL(22) | BITULL(21) | BITULL(20) | BITULL(19)) +#define INTEL_PT_CYC_THRESHOLD_SHIFT 19 + #define INTEL_PT_BLK_SIZE 1024 #define BIT63 (((uint64_t)1 << 63)) @@ -55,6 +62,7 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_TIP_PGD, INTEL_PT_STATE_FUP, INTEL_PT_STATE_FUP_NO_TIP, + INTEL_PT_STATE_FUP_IN_PSB, INTEL_PT_STATE_RESAMPLE, }; @@ -73,6 +81,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) case INTEL_PT_STATE_TIP_PGD: case INTEL_PT_STATE_FUP: case INTEL_PT_STATE_FUP_NO_TIP: + case INTEL_PT_STATE_FUP_IN_PSB: return false; default: return true; @@ -112,13 +121,14 @@ struct intel_pt_decoder { bool have_last_ip; bool in_psb; bool hop; - bool hop_psb_fup; bool leap; + bool nr; + bool next_nr; enum intel_pt_param_flags flags; uint64_t pos; uint64_t last_ip; uint64_t ip; - uint64_t cr3; + uint64_t pip_payload; uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; @@ -167,6 +177,8 @@ struct intel_pt_decoder { uint64_t sample_tot_cyc_cnt; uint64_t base_cyc_cnt; uint64_t cyc_cnt_timestamp; + uint64_t ctl; + uint64_t cyc_threshold; double tsc_to_cyc; bool continuous_period; bool overflow; @@ -189,6 +201,7 @@ struct intel_pt_decoder { int no_progress; int stuck_ip_prd; int stuck_ip_cnt; + uint64_t psb_ip; const unsigned char *next_buf; size_t next_len; unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ]; @@ -204,6 +217,14 @@ static uint64_t intel_pt_lower_power_of_2(uint64_t x) return x << i; } +static uint64_t intel_pt_cyc_threshold(uint64_t ctl) +{ + if (!(ctl & INTEL_PT_CYC_ENABLE)) + return 0; + + return (ctl & INTEL_PT_CYC_THRESHOLD) >> INTEL_PT_CYC_THRESHOLD_SHIFT; +} + static void intel_pt_setup_period(struct intel_pt_decoder *decoder) { if (decoder->period_type == INTEL_PT_PERIOD_TICKS) { @@ -245,12 +266,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->flags = params->flags; + decoder->ctl = params->ctl; decoder->period = params->period; decoder->period_type = params->period_type; decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio; + decoder->cyc_threshold = intel_pt_cyc_threshold(decoder->ctl); + intel_pt_setup_period(decoder); decoder->mtc_shift = params->mtc_period; @@ -481,6 +505,28 @@ static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder) decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX; } +static inline void intel_pt_update_pip(struct intel_pt_decoder *decoder) +{ + decoder->pip_payload = decoder->packet.payload; +} + +static inline void intel_pt_update_nr(struct intel_pt_decoder *decoder) +{ + decoder->next_nr = decoder->pip_payload & 1; +} + +static inline void intel_pt_set_nr(struct intel_pt_decoder *decoder) +{ + decoder->nr = decoder->pip_payload & 1; + decoder->next_nr = decoder->nr; +} + +static inline void intel_pt_set_pip(struct intel_pt_decoder *decoder) +{ + intel_pt_update_pip(decoder); + intel_pt_set_nr(decoder); +} + static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) { intel_pt_clear_tx_flags(decoder); @@ -1218,6 +1264,7 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) decoder->continuous_period = false; decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->state.type |= INTEL_PT_TRACE_END; + intel_pt_update_nr(decoder); return 0; } if (err == INTEL_PT_RETURN) @@ -1225,6 +1272,8 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) if (err) return err; + intel_pt_update_nr(decoder); + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) { decoder->pge = false; @@ -1337,6 +1386,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) decoder->state.from_ip = decoder->ip; decoder->state.to_ip = decoder->last_ip; decoder->ip = decoder->last_ip; + intel_pt_update_nr(decoder); return 0; } @@ -1461,6 +1511,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) { intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); + intel_pt_set_nr(decoder); decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; @@ -1735,18 +1786,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + intel_pt_set_pip(decoder); break; case INTEL_PT_FUP: decoder->pge = true; if (decoder->packet.count) { intel_pt_set_last_ip(decoder); - if (decoder->hop) { - /* Act on FUP at PSBEND */ - decoder->ip = decoder->last_ip; - decoder->hop_psb_fup = true; - } + decoder->psb_ip = decoder->last_ip; } break; @@ -1761,6 +1808,9 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + case INTEL_PT_VMCS: case INTEL_PT_MNT: case INTEL_PT_PAD: @@ -1835,6 +1885,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) decoder->pge = false; decoder->continuous_period = false; decoder->state.type |= INTEL_PT_TRACE_END; + intel_pt_update_nr(decoder); return 0; case INTEL_PT_TIP_PGE: @@ -1850,6 +1901,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) } decoder->state.type |= INTEL_PT_TRACE_BEGIN; intel_pt_mtc_cyc_cnt_pge(decoder); + intel_pt_set_nr(decoder); return 0; case INTEL_PT_TIP: @@ -1860,10 +1912,11 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) intel_pt_set_ip(decoder); decoder->state.to_ip = decoder->ip; } + intel_pt_update_nr(decoder); return 0; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + intel_pt_update_pip(decoder); break; case INTEL_PT_MTC: @@ -1922,21 +1975,27 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in return HOP_IGNORE; case INTEL_PT_TIP_PGD: - if (!decoder->packet.count) + if (!decoder->packet.count) { + intel_pt_set_nr(decoder); return HOP_IGNORE; + } intel_pt_set_ip(decoder); decoder->state.type |= INTEL_PT_TRACE_END; decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; + intel_pt_update_nr(decoder); return HOP_RETURN; case INTEL_PT_TIP: - if (!decoder->packet.count) + if (!decoder->packet.count) { + intel_pt_set_nr(decoder); return HOP_IGNORE; + } intel_pt_set_ip(decoder); decoder->state.type = INTEL_PT_INSTRUCTION; decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; + intel_pt_update_nr(decoder); return HOP_RETURN; case INTEL_PT_FUP: @@ -1959,26 +2018,23 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in return HOP_RETURN; case INTEL_PT_PSB: + decoder->state.psb_offset = decoder->pos; + decoder->psb_ip = 0; decoder->last_ip = 0; decoder->have_last_ip = true; - decoder->hop_psb_fup = false; *err = intel_pt_walk_psbend(decoder); if (*err == -EAGAIN) return HOP_AGAIN; if (*err) return HOP_RETURN; - if (decoder->hop_psb_fup) { - decoder->hop_psb_fup = false; - decoder->state.type = INTEL_PT_INSTRUCTION; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - return HOP_RETURN; + decoder->state.type = INTEL_PT_PSB_EVT; + if (decoder->psb_ip) { + decoder->state.type |= INTEL_PT_INSTRUCTION; + decoder->ip = decoder->psb_ip; } - if (decoder->cbr != decoder->cbr_seen) { - decoder->state.type = 0; - return HOP_RETURN; - } - return HOP_IGNORE; + decoder->state.from_ip = decoder->psb_ip; + decoder->state.to_ip = 0; + return HOP_RETURN; case INTEL_PT_BAD: case INTEL_PT_PAD: @@ -2012,8 +2068,151 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in } } +struct intel_pt_psb_info { + struct intel_pt_pkt fup_packet; + bool fup; + int after_psbend; +}; + +/* Lookahead and get the FUP packet from PSB+ */ +static int intel_pt_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct intel_pt_psb_info *data = pkt_info->data; + + switch (pkt_info->packet.type) { + case INTEL_PT_PAD: + case INTEL_PT_MNT: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_MTC: + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_CBR: + case INTEL_PT_PIP: + if (data->after_psbend) { + data->after_psbend -= 1; + if (!data->after_psbend) + return 1; + } + break; + + case INTEL_PT_FUP: + if (data->after_psbend) + return 1; + if (data->fup || pkt_info->packet.count == 0) + return 1; + data->fup_packet = pkt_info->packet; + data->fup = true; + break; + + case INTEL_PT_PSBEND: + if (!data->fup) + return 1; + /* Keep going to check for a TIP.PGE */ + data->after_psbend = 6; + break; + + case INTEL_PT_TIP_PGE: + /* Ignore FUP in PSB+ if followed by TIP.PGE */ + if (data->after_psbend) + data->fup = false; + return 1; + + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: + if (data->after_psbend) { + data->after_psbend -= 1; + if (!data->after_psbend) + return 1; + break; + } + return 1; + + case INTEL_PT_OVF: + case INTEL_PT_BAD: + case INTEL_PT_TNT: + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP: + case INTEL_PT_PSB: + case INTEL_PT_TRACESTOP: + default: + return 1; + } + + return 0; +} + +static int intel_pt_psb(struct intel_pt_decoder *decoder) +{ + int err; + + decoder->last_ip = 0; + decoder->psb_ip = 0; + decoder->have_last_ip = true; + intel_pt_clear_stack(&decoder->stack); + err = intel_pt_walk_psbend(decoder); + if (err) + return err; + decoder->state.type = INTEL_PT_PSB_EVT; + decoder->state.from_ip = decoder->psb_ip; + decoder->state.to_ip = 0; + return 0; +} + +static int intel_pt_fup_in_psb(struct intel_pt_decoder *decoder) +{ + int err; + + if (decoder->ip != decoder->last_ip) { + err = intel_pt_walk_fup(decoder); + if (!err || err != -EAGAIN) + return err; + } + + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + err = intel_pt_psb(decoder); + if (err) { + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + + return 0; +} + +static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err) +{ + struct intel_pt_psb_info data = { .fup = false }; + + if (!decoder->branch_enable || !decoder->pge) + return false; + + intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data); + if (!data.fup) + return false; + + decoder->packet = data.fup_packet; + intel_pt_set_last_ip(decoder); + decoder->pkt_state = INTEL_PT_STATE_FUP_IN_PSB; + + *err = intel_pt_fup_in_psb(decoder); + + return true; +} + static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) { + int last_packet_type = INTEL_PT_PAD; bool no_tip = false; int err; @@ -2022,6 +2221,12 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) if (err) return err; next: + if (decoder->cyc_threshold) { + if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC) + decoder->sample_cyc = false; + last_packet_type = decoder->packet.type; + } + if (decoder->hop) { switch (intel_pt_hop_trace(decoder, &no_tip, &err)) { case HOP_IGNORE: @@ -2055,6 +2260,7 @@ next: case INTEL_PT_TIP_PGE: { decoder->pge = true; intel_pt_mtc_cyc_cnt_pge(decoder); + intel_pt_set_nr(decoder); if (decoder->packet.count == 0) { intel_pt_log_at("Skipping zero TIP.PGE", decoder->pos); @@ -2120,27 +2326,17 @@ next: break; case INTEL_PT_PSB: - decoder->last_ip = 0; - decoder->have_last_ip = true; - intel_pt_clear_stack(&decoder->stack); - err = intel_pt_walk_psbend(decoder); + decoder->state.psb_offset = decoder->pos; + decoder->psb_ip = 0; + if (intel_pt_psb_with_fup(decoder, &err)) + return err; + err = intel_pt_psb(decoder); if (err == -EAGAIN) goto next; - if (err) - return err; - /* - * PSB+ CBR will not have changed but cater for the - * possibility of another CBR change that gets caught up - * in the PSB+. - */ - if (decoder->cbr != decoder->cbr_seen) { - decoder->state.type = 0; - return 0; - } - break; + return err; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + intel_pt_update_pip(decoder); break; case INTEL_PT_MTC: @@ -2351,6 +2547,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) uint64_t current_ip = decoder->ip; intel_pt_set_ip(decoder); + decoder->psb_ip = decoder->ip; if (current_ip) intel_pt_log_to("Setting IP", decoder->ip); @@ -2378,7 +2575,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + intel_pt_set_pip(decoder); break; case INTEL_PT_MODE_EXEC: @@ -2497,7 +2694,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + intel_pt_set_pip(decoder); break; case INTEL_PT_MODE_EXEC: @@ -2522,18 +2719,18 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_PSB: + decoder->state.psb_offset = decoder->pos; + decoder->psb_ip = 0; decoder->last_ip = 0; decoder->have_last_ip = true; intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psb(decoder); if (err) return err; - if (decoder->ip) { - /* Do not have a sample */ - decoder->state.type = 0; - return 0; - } - break; + decoder->state.type = INTEL_PT_PSB_EVT; + decoder->state.from_ip = decoder->psb_ip; + decoder->state.to_ip = 0; + return 0; case INTEL_PT_TNT: case INTEL_PT_PSBEND: @@ -2577,7 +2774,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) intel_pt_log("Scanning for full IP\n"); err = intel_pt_walk_to_ip(decoder); - if (err) + if (err || ((decoder->state.type & INTEL_PT_PSB_EVT) && !decoder->ip)) return err; /* In hop mode, resample to get the to_ip as an "instruction" sample */ @@ -2689,10 +2886,10 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) decoder->continuous_period = false; decoder->have_last_ip = false; decoder->last_ip = 0; + decoder->psb_ip = 0; decoder->ip = 0; intel_pt_clear_stack(&decoder->stack); -leap: err = intel_pt_scan_for_psb(decoder); if (err) return err; @@ -2704,8 +2901,11 @@ leap: if (err) return err; + decoder->state.type = INTEL_PT_PSB_EVT; /* Only PSB sample */ + decoder->state.from_ip = decoder->psb_ip; + decoder->state.to_ip = 0; + if (decoder->ip) { - decoder->state.type = 0; /* Do not have a sample */ /* * In hop mode, resample to get the PSB FUP ip as an * "instruction" sample. @@ -2714,14 +2914,6 @@ leap: decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - } else if (decoder->leap) { - /* - * In leap mode, only PSB+ is decoded, so keeping leaping to the - * next PSB until there is an ip. - */ - goto leap; - } else { - return intel_pt_sync_ip(decoder); } return 0; @@ -2783,6 +2975,9 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) if (err == -EAGAIN) err = intel_pt_walk_trace(decoder); break; + case INTEL_PT_STATE_FUP_IN_PSB: + err = intel_pt_fup_in_psb(decoder); + break; case INTEL_PT_STATE_RESAMPLE: err = intel_pt_resample(decoder); break; @@ -2797,6 +2992,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.from_ip = decoder->ip; intel_pt_update_sample_time(decoder); decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; + intel_pt_set_nr(decoder); } else { decoder->state.err = 0; if (decoder->cbr != decoder->cbr_seen) { @@ -2811,14 +3007,30 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) } if (intel_pt_sample_time(decoder->pkt_state)) { intel_pt_update_sample_time(decoder); - if (decoder->sample_cyc) + if (decoder->sample_cyc) { decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; + decoder->state.flags |= INTEL_PT_SAMPLE_IPC; + decoder->sample_cyc = false; + } } + /* + * When using only TSC/MTC to compute cycles, IPC can be + * sampled as soon as the cycle count changes. + */ + if (!decoder->have_cyc) + decoder->state.flags |= INTEL_PT_SAMPLE_IPC; } + /* Let PSB event always have TSC timestamp */ + if ((decoder->state.type & INTEL_PT_PSB_EVT) && decoder->tsc_timestamp) + decoder->sample_timestamp = decoder->tsc_timestamp; + + decoder->state.from_nr = decoder->nr; + decoder->state.to_nr = decoder->next_nr; + decoder->nr = decoder->next_nr; + decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); - decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 8645fc265481..d9e62a7f6f0e 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -17,6 +17,7 @@ #define INTEL_PT_ABORT_TX (1 << 1) #define INTEL_PT_ASYNC (1 << 2) #define INTEL_PT_FUP_IP (1 << 3) +#define INTEL_PT_SAMPLE_IPC (1 << 4) enum intel_pt_sample_type { INTEL_PT_BRANCH = 1 << 0, @@ -31,6 +32,7 @@ enum intel_pt_sample_type { INTEL_PT_TRACE_BEGIN = 1 << 9, INTEL_PT_TRACE_END = 1 << 10, INTEL_PT_BLK_ITEMS = 1 << 11, + INTEL_PT_PSB_EVT = 1 << 12, }; enum intel_pt_period_type { @@ -199,10 +201,11 @@ struct intel_pt_blk_items { struct intel_pt_state { enum intel_pt_sample_type type; + bool from_nr; + bool to_nr; int err; uint64_t from_ip; uint64_t to_ip; - uint64_t cr3; uint64_t tot_insn_cnt; uint64_t tot_cyc_cnt; uint64_t timestamp; @@ -213,6 +216,7 @@ struct intel_pt_state { uint64_t pwre_payload; uint64_t pwrx_payload; uint64_t cbr_payload; + uint64_t psb_offset; uint32_t cbr; uint32_t flags; enum intel_pt_insn_op insn_op; @@ -243,6 +247,7 @@ struct intel_pt_params { void *data; bool return_compression; bool branch_enable; + uint64_t ctl; uint64_t period; enum intel_pt_period_type period_type; unsigned max_non_turbo_ratio; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index fb8a3558d3d5..2f6cc7eea251 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -43,6 +43,17 @@ static void intel_pt_insn_decoder(struct insn *insn, switch (insn->opcode.bytes[0]) { case 0xf: switch (insn->opcode.bytes[1]) { + case 0x01: + switch (insn->modrm.bytes[0]) { + case 0xc2: /* vmlaunch */ + case 0xc3: /* vmresume */ + op = INTEL_PT_OP_VMENTRY; + branch = INTEL_PT_BR_INDIRECT; + break; + default: + break; + } + break; case 0x05: /* syscall */ case 0x34: /* sysenter */ op = INTEL_PT_OP_SYSCALL; @@ -213,6 +224,7 @@ const char *branch_name[] = { [INTEL_PT_OP_INT] = "Int", [INTEL_PT_OP_SYSCALL] = "Syscall", [INTEL_PT_OP_SYSRET] = "Sysret", + [INTEL_PT_OP_VMENTRY] = "VMentry", }; const char *intel_pt_insn_name(enum intel_pt_insn_op op) @@ -267,6 +279,9 @@ int intel_pt_insn_type(enum intel_pt_insn_op op) case INTEL_PT_OP_SYSRET: return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET; + case INTEL_PT_OP_VMENTRY: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_VMENTRY; default: return 0; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h index 95a1eb0141ff..c2861cfdd768 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h @@ -24,6 +24,7 @@ enum intel_pt_insn_op { INTEL_PT_OP_INT, INTEL_PT_OP_SYSCALL, INTEL_PT_OP_SYSRET, + INTEL_PT_OP_VMENTRY, }; enum intel_pt_insn_branch { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 4ce109993e74..02a3395d6ce3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -16,8 +16,6 @@ #define BIT63 ((uint64_t)1 << 63) -#define NR_FLAG BIT63 - #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 @@ -106,9 +104,7 @@ static int intel_pt_get_pip(const unsigned char *buf, size_t len, packet->type = INTEL_PT_PIP; memcpy_le64(&payload, buf + 2, 6); - packet->payload = payload >> 1; - if (payload & 1) - packet->payload |= NR_FLAG; + packet->payload = payload; return 8; } @@ -719,10 +715,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, name, (unsigned)(payload >> 1) & 1, (unsigned)payload & 1); case INTEL_PT_PIP: - nr = packet->payload & NR_FLAG ? 1 : 0; - payload &= ~NR_FLAG; + nr = packet->payload & INTEL_PT_VMX_NR_FLAG ? 1 : 0; + payload &= ~INTEL_PT_VMX_NR_FLAG; ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", - name, payload, nr); + name, payload >> 1, nr); return ret; case INTEL_PT_PTWRITE: return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h index 17ca9b56d72f..996090cb84f6 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -21,6 +21,8 @@ #define INTEL_PT_PKT_MAX_SZ 16 +#define INTEL_PT_VMX_NR_FLAG 1 + enum intel_pt_pkt_type { INTEL_PT_BAD, INTEL_PT_PAD, diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 60214de42f31..f6e28ac231b7 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -108,6 +108,7 @@ struct intel_pt { u64 exstop_id; u64 pwrx_id; u64 cbr_id; + u64 psb_id; bool sample_pebs; struct evsel *pebs_evsel; @@ -162,6 +163,9 @@ struct intel_pt_queue { int switch_state; pid_t next_tid; struct thread *thread; + struct machine *guest_machine; + struct thread *unknown_guest_thread; + pid_t guest_machine_pid; bool exclude_kernel; bool have_sample; u64 time; @@ -549,13 +553,59 @@ static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, auxtrace_cache__remove(dso->auxtrace_cache, offset); } -static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) +static inline bool intel_pt_guest_kernel_ip(uint64_t ip) { - return ip >= pt->kernel_start ? + /* Assumes 64-bit kernel */ + return ip & (1ULL << 63); +} + +static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr) +{ + if (nr) { + return intel_pt_guest_kernel_ip(ip) ? + PERF_RECORD_MISC_GUEST_KERNEL : + PERF_RECORD_MISC_GUEST_USER; + } + + return ip >= ptq->pt->kernel_start ? PERF_RECORD_MISC_KERNEL : PERF_RECORD_MISC_USER; } +static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip) +{ + /* No support for non-zero CS base */ + if (from_ip) + return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr); + return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr); +} + +static int intel_pt_get_guest(struct intel_pt_queue *ptq) +{ + struct machines *machines = &ptq->pt->session->machines; + struct machine *machine; + pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid; + + if (ptq->guest_machine && pid == ptq->guest_machine_pid) + return 0; + + ptq->guest_machine = NULL; + thread__zput(ptq->unknown_guest_thread); + + machine = machines__find_guest(machines, pid); + if (!machine) + return -1; + + ptq->unknown_guest_thread = machine__idle_thread(machine); + if (!ptq->unknown_guest_thread) + return -1; + + ptq->guest_machine = machine; + ptq->guest_machine_pid = pid; + + return 0; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -572,19 +622,29 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, u64 offset, start_offset, start_ip; u64 insn_cnt = 0; bool one_map = true; + bool nr; intel_pt_insn->length = 0; if (to_ip && *ip == to_ip) goto out_no_cache; - cpumode = intel_pt_cpumode(ptq->pt, *ip); + nr = ptq->state->to_nr; + cpumode = intel_pt_nr_cpumode(ptq, *ip, nr); - thread = ptq->thread; - if (!thread) { - if (cpumode != PERF_RECORD_MISC_KERNEL) + if (nr) { + if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL || + intel_pt_get_guest(ptq)) return -EINVAL; - thread = ptq->pt->unknown_thread; + machine = ptq->guest_machine; + thread = ptq->unknown_guest_thread; + } else { + thread = ptq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) + return -EINVAL; + thread = ptq->pt->unknown_thread; + } } while (1) { @@ -732,8 +792,14 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) u8 cpumode; u64 offset; - if (ip >= ptq->pt->kernel_start) + if (ptq->state->to_nr) { + if (intel_pt_guest_kernel_ip(ip)) + return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); + /* No support for decoding guest user space */ + return -EINVAL; + } else if (ip >= ptq->pt->kernel_start) { return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); + } cpumode = PERF_RECORD_MISC_USER; @@ -893,6 +959,18 @@ static bool intel_pt_sampling_mode(struct intel_pt *pt) return false; } +static u64 intel_pt_ctl(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) + return config; + } + return 0; +} + static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) { u64 quot, rem; @@ -1026,6 +1104,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.branch_enable = intel_pt_branch_enable(pt); + params.ctl = intel_pt_ctl(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; @@ -1087,6 +1166,7 @@ static void intel_pt_free_queue(void *priv) if (!ptq) return; thread__zput(ptq->thread); + thread__zput(ptq->unknown_guest_thread); intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); zfree(&ptq->last_branch); @@ -1121,13 +1201,16 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) if (ptq->state->flags & INTEL_PT_ABORT_TX) { ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; } else if (ptq->state->flags & INTEL_PT_ASYNC) { - if (ptq->state->to_ip) + if (!ptq->state->to_ip) + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + else if (ptq->state->from_nr && !ptq->state->to_nr) + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_VMEXIT; + else ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT; - else - ptq->flags = PERF_IP_FLAG_BRANCH | - PERF_IP_FLAG_TRACE_END; ptq->insn_len = 0; } else { if (ptq->state->from_ip) @@ -1301,8 +1384,8 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); sample->ip = ptq->state->from_ip; - sample->cpumode = intel_pt_cpumode(pt, sample->ip); sample->addr = ptq->state->to_ip; + sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr); sample->period = 1; sample->flags = ptq->flags; @@ -1381,7 +1464,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; @@ -1431,7 +1515,8 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) else sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; @@ -1533,6 +1618,32 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) pt->pwr_events_sample_type); } +static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_psb raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->psb_id; + sample.stream_id = ptq->pt->psb_id; + sample.flags = 0; + + raw.reserved = 0; + raw.offset = ptq->state->psb_offset; + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); +} + static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; @@ -1791,10 +1902,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) else sample.ip = ptq->state->from_ip; - /* No support for guest mode at this time */ - cpumode = sample.ip < ptq->pt->kernel_start ? - PERF_RECORD_MISC_USER : - PERF_RECORD_MISC_KERNEL; + cpumode = intel_pt_cpumode(ptq, sample.ip, 0); event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; @@ -1853,13 +1961,30 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) sample.addr = items->mem_access_address; - if (sample_type & PERF_SAMPLE_WEIGHT) { + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { /* * Refer kernel's setup_pebs_adaptive_sample_data() and * intel_hsw_weight(). */ - if (items->has_mem_access_latency) - sample.weight = items->mem_access_latency; + if (items->has_mem_access_latency) { + u64 weight = items->mem_access_latency >> 32; + + /* + * Starts from SPR, the mem access latency field + * contains both cache latency [47:32] and instruction + * latency [15:0]. The cache latency is the same as the + * mem access latency on previous platforms. + * + * In practice, no memory access could last than 4G + * cycles. Use latency >> 32 to distinguish the + * different format of the mem access latency field. + */ + if (weight > 0) { + sample.weight = weight & 0xffff; + sample.ins_lat = items->mem_access_latency & 0xffff; + } else + sample.weight = items->mem_access_latency; + } if (!sample.weight && items->has_tsx_aux_info) { /* Cycles last block */ sample.weight = (u32)items->tsx_aux_info; @@ -1966,14 +2091,8 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) { - /* - * Cycle count and instruction count only go together to create - * a valid IPC ratio when the cycle count changes. - */ - ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; - ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; - } + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; /* * Do PEBS first to allow for the possibility that the PEBS timestamp @@ -1986,6 +2105,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) } if (pt->sample_pwr_events) { + if (state->type & INTEL_PT_PSB_EVT) { + err = intel_pt_synth_psb_sample(ptq); + if (err) + return err; + } if (ptq->state->cbr != ptq->cbr_seen) { err = intel_pt_synth_cbr_sample(ptq); if (err) @@ -2047,7 +2171,27 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) } if (pt->sample_branches) { - err = intel_pt_synth_branch_sample(ptq); + if (state->from_nr != state->to_nr && + state->from_ip && state->to_ip) { + struct intel_pt_state *st = (struct intel_pt_state *)state; + u64 to_ip = st->to_ip; + u64 from_ip = st->from_ip; + + /* + * perf cannot handle having different machines for ip + * and addr, so create 2 branches. + */ + st->to_ip = 0; + err = intel_pt_synth_branch_sample(ptq); + if (err) + return err; + st->from_ip = 0; + st->to_ip = to_ip; + err = intel_pt_synth_branch_sample(ptq); + st->from_ip = from_ip; + } else { + err = intel_pt_synth_branch_sample(ptq); + } if (err) return err; } @@ -3083,6 +3227,14 @@ static int intel_pt_synth_events(struct intel_pt *pt, pt->cbr_id = id; intel_pt_set_event_name(evlist, id, "cbr"); id += 1; + + attr.config = PERF_SYNTH_INTEL_PSB; + err = intel_pt_synth_event(session, "psb", &attr, id); + if (err) + return err; + pt->psb_id = id; + intel_pt_set_event_name(evlist, id, "psb"); + id += 1; } if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) { diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c index 84e5304e151a..934092199f89 100644 --- a/tools/perf/util/intlist.c +++ b/tools/perf/util/intlist.c @@ -13,7 +13,7 @@ static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused, const void *entry) { - int i = (int)((long)entry); + unsigned long i = (unsigned long)entry; struct rb_node *rc = NULL; struct int_node *node = malloc(sizeof(*node)); @@ -41,15 +41,20 @@ static void intlist__node_delete(struct rblist *rblist __maybe_unused, static int intlist__node_cmp(struct rb_node *rb_node, const void *entry) { - int i = (int)((long)entry); + unsigned long i = (unsigned long)entry; struct int_node *node = container_of(rb_node, struct int_node, rb_node); - return node->i - i; + if (node->i > i) + return 1; + else if (node->i < i) + return -1; + + return 0; } -int intlist__add(struct intlist *ilist, int i) +int intlist__add(struct intlist *ilist, unsigned long i) { - return rblist__add_node(&ilist->rblist, (void *)((long)i)); + return rblist__add_node(&ilist->rblist, (void *)i); } void intlist__remove(struct intlist *ilist, struct int_node *node) @@ -58,7 +63,7 @@ void intlist__remove(struct intlist *ilist, struct int_node *node) } static struct int_node *__intlist__findnew(struct intlist *ilist, - int i, bool create) + unsigned long i, bool create) { struct int_node *node = NULL; struct rb_node *rb_node; @@ -67,9 +72,9 @@ static struct int_node *__intlist__findnew(struct intlist *ilist, return NULL; if (create) - rb_node = rblist__findnew(&ilist->rblist, (void *)((long)i)); + rb_node = rblist__findnew(&ilist->rblist, (void *)i); else - rb_node = rblist__find(&ilist->rblist, (void *)((long)i)); + rb_node = rblist__find(&ilist->rblist, (void *)i); if (rb_node) node = container_of(rb_node, struct int_node, rb_node); @@ -77,12 +82,12 @@ static struct int_node *__intlist__findnew(struct intlist *ilist, return node; } -struct int_node *intlist__find(struct intlist *ilist, int i) +struct int_node *intlist__find(struct intlist *ilist, unsigned long i) { return __intlist__findnew(ilist, i, false); } -struct int_node *intlist__findnew(struct intlist *ilist, int i) +struct int_node *intlist__findnew(struct intlist *ilist, unsigned long i) { return __intlist__findnew(ilist, i, true); } @@ -93,7 +98,7 @@ static int intlist__parse_list(struct intlist *ilist, const char *s) int err; do { - long value = strtol(s, &sep, 10); + unsigned long value = strtol(s, &sep, 10); err = -EINVAL; if (*sep != ',' && *sep != '\0') break; diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h index 5c19ee001299..e336b174d0c7 100644 --- a/tools/perf/util/intlist.h +++ b/tools/perf/util/intlist.h @@ -9,7 +9,7 @@ struct int_node { struct rb_node rb_node; - int i; + unsigned long i; void *priv; }; @@ -21,13 +21,13 @@ struct intlist *intlist__new(const char *slist); void intlist__delete(struct intlist *ilist); void intlist__remove(struct intlist *ilist, struct int_node *in); -int intlist__add(struct intlist *ilist, int i); +int intlist__add(struct intlist *ilist, unsigned long i); struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx); -struct int_node *intlist__find(struct intlist *ilist, int i); -struct int_node *intlist__findnew(struct intlist *ilist, int i); +struct int_node *intlist__find(struct intlist *ilist, unsigned long i); +struct int_node *intlist__findnew(struct intlist *ilist, unsigned long i); -static inline bool intlist__has_entry(struct intlist *ilist, int i) +static inline bool intlist__has_entry(struct intlist *ilist, unsigned long i) { return intlist__find(ilist, i) != NULL; } diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h index 6817ffc2a059..fb810e1b2de7 100644 --- a/tools/perf/util/jit.h +++ b/tools/perf/util/jit.h @@ -5,7 +5,7 @@ #include <data.h> int jit_process(struct perf_session *session, struct perf_data *output, - struct machine *machine, char *filename, pid_t pid, u64 *nbytes); + struct machine *machine, char *filename, pid_t pid, pid_t tid, u64 *nbytes); int jit_inject_record(const char *filename); diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 055bab7a92b3..9760d8e7b386 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -18,6 +18,7 @@ #include "event.h" #include "debug.h" #include "evlist.h" +#include "namespaces.h" #include "symbol.h" #include <elf.h> @@ -35,6 +36,7 @@ struct jit_buf_desc { struct perf_data *output; struct perf_session *session; struct machine *machine; + struct nsinfo *nsi; union jr_entry *entry; void *buf; uint64_t sample_type; @@ -72,7 +74,8 @@ struct jit_tool { #define get_jit_tool(t) (container_of(tool, struct jit_tool, tool)) static int -jit_emit_elf(char *filename, +jit_emit_elf(struct jit_buf_desc *jd, + char *filename, const char *sym, uint64_t code_addr, const void *code, @@ -83,14 +86,18 @@ jit_emit_elf(char *filename, uint32_t unwinding_header_size, uint32_t unwinding_size) { - int ret, fd; + int ret, fd, saved_errno; + struct nscookie nsc; if (verbose > 0) fprintf(stderr, "write ELF image %s\n", filename); + nsinfo__mountns_enter(jd->nsi, &nsc); fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644); + saved_errno = errno; + nsinfo__mountns_exit(&nsc); if (fd == -1) { - pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno)); + pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(saved_errno)); return -1; } @@ -99,8 +106,11 @@ jit_emit_elf(char *filename, close(fd); - if (ret) - unlink(filename); + if (ret) { + nsinfo__mountns_enter(jd->nsi, &nsc); + unlink(filename); + nsinfo__mountns_exit(&nsc); + } return ret; } @@ -134,12 +144,15 @@ static int jit_open(struct jit_buf_desc *jd, const char *name) { struct jitheader header; + struct nscookie nsc; struct jr_prefix *prefix; ssize_t bs, bsz = 0; void *n, *buf = NULL; int ret, retval = -1; + nsinfo__mountns_enter(jd->nsi, &nsc); jd->in = fopen(name, "r"); + nsinfo__mountns_exit(&nsc); if (!jd->in) return -1; @@ -367,6 +380,20 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) return 0; } +static pid_t jr_entry_pid(struct jit_buf_desc *jd, union jr_entry *jr) +{ + if (jd->nsi && jd->nsi->in_pidns) + return jd->nsi->tgid; + return jr->load.pid; +} + +static pid_t jr_entry_tid(struct jit_buf_desc *jd, union jr_entry *jr) +{ + if (jd->nsi && jd->nsi->in_pidns) + return jd->nsi->pid; + return jr->load.tid; +} + static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) { struct perf_tsc_conversion tc; @@ -402,14 +429,15 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) const char *sym; uint64_t count; int ret, csize, usize; - pid_t pid, tid; + pid_t nspid, pid, tid; struct { u32 pid, tid; u64 time; } *id; - pid = jr->load.pid; - tid = jr->load.tid; + nspid = jr->load.pid; + pid = jr_entry_pid(jd, jr); + tid = jr_entry_tid(jd, jr); csize = jr->load.code_size; usize = jd->unwinding_mapped_size; addr = jr->load.code_addr; @@ -425,14 +453,14 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) filename = event->mmap2.filename; size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", jd->dir, - pid, + nspid, count); size++; /* for \0 */ size = PERF_ALIGN(size, sizeof(u64)); uaddr = (uintptr_t)code; - ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries, + ret = jit_emit_elf(jd, filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries, jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size); if (jd->debug_data && jd->nr_debug_entries) { @@ -451,7 +479,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) free(event); return -1; } - if (stat(filename, &st)) + if (nsinfo__stat(filename, &st, jd->nsi)) memset(&st, 0, sizeof(st)); event->mmap2.header.type = PERF_RECORD_MMAP2; @@ -515,14 +543,15 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) int usize; u16 idr_size; int ret; - pid_t pid, tid; + pid_t nspid, pid, tid; struct { u32 pid, tid; u64 time; } *id; - pid = jr->move.pid; - tid = jr->move.tid; + nspid = jr->load.pid; + pid = jr_entry_pid(jd, jr); + tid = jr_entry_tid(jd, jr); usize = jd->unwinding_mapped_size; idr_size = jd->machine->id_hdr_size; @@ -536,12 +565,12 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) filename = event->mmap2.filename; size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", jd->dir, - pid, + nspid, jr->move.code_index); size++; /* for \0 */ - if (stat(filename, &st)) + if (nsinfo__stat(filename, &st, jd->nsi)) memset(&st, 0, sizeof(st)); size = PERF_ALIGN(size, sizeof(u64)); @@ -700,7 +729,7 @@ jit_inject(struct jit_buf_desc *jd, char *path) * as captured in the RECORD_MMAP record */ static int -jit_detect(char *mmap_name, pid_t pid) +jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi) { char *p; char *end = NULL; @@ -740,7 +769,7 @@ jit_detect(char *mmap_name, pid_t pid) * pid does not match mmap pid * pid==0 in system-wide mode (synthesized) */ - if (pid && pid2 != pid) + if (pid && pid2 != nsi->nstgid) return -1; /* * validate suffix @@ -782,16 +811,30 @@ jit_process(struct perf_session *session, struct machine *machine, char *filename, pid_t pid, + pid_t tid, u64 *nbytes) { + struct thread *thread; + struct nsinfo *nsi; struct evsel *first; struct jit_buf_desc jd; int ret; + thread = machine__findnew_thread(machine, pid, tid); + if (thread == NULL) { + pr_err("problem processing JIT mmap event, skipping it.\n"); + return 0; + } + + nsi = nsinfo__get(thread->nsinfo); + thread__put(thread); + /* * first, detect marker mmap (i.e., the jitdump mmap) */ - if (jit_detect(filename, pid)) { + if (jit_detect(filename, pid, nsi)) { + nsinfo__put(nsi); + // Strip //anon* mmaps if we processed a jitdump for this pid if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0)) return 1; @@ -804,6 +847,7 @@ jit_process(struct perf_session *session, jd.session = session; jd.output = output; jd.machine = machine; + jd.nsi = nsi; /* * track sample_type to compute id_all layout @@ -821,5 +865,7 @@ jit_process(struct perf_session *session, ret = 1; } + nsinfo__put(jd.nsi); + return ret; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f841f3503cae..b5c2d8be4144 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -369,6 +369,15 @@ out: return machine; } +struct machine *machines__find_guest(struct machines *machines, pid_t pid) +{ + struct machine *machine = machines__find(machines, pid); + + if (!machine) + machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID); + return machine; +} + void machines__process_guests(struct machines *machines, machine__process_t process, void *data) { @@ -589,6 +598,24 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid, return th; } +/* + * Threads are identified by pid and tid, and the idle task has pid == tid == 0. + * So here a single thread is created for that, but actually there is a separate + * idle task per cpu, so there should be one 'struct thread' per cpu, but there + * is only 1. That causes problems for some tools, requiring workarounds. For + * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu(). + */ +struct thread *machine__idle_thread(struct machine *machine) +{ + struct thread *thread = machine__findnew_thread(machine, 0, 0); + + if (!thread || thread__set_comm(thread, "swapper", 0) || + thread__set_namespaces(thread, 0, NULL)) + pr_err("problem inserting idle task for machine pid %d\n", machine->pid); + + return thread; +} + struct comm *machine__thread_exec_comm(struct machine *machine, struct thread *thread) { @@ -1599,7 +1626,8 @@ static int machine__process_extra_kernel_map(struct machine *machine, } static int machine__process_kernel_mmap_event(struct machine *machine, - struct extra_kernel_map *xm) + struct extra_kernel_map *xm, + struct build_id *bid) { struct map *map; enum dso_space_type dso_space; @@ -1624,6 +1652,10 @@ static int machine__process_kernel_mmap_event(struct machine *machine, goto out_problem; map->end = map->start + xm->end - xm->start; + + if (build_id__is_defined(bid)) + dso__set_build_id(map->dso, bid); + } else if (is_kernel_mmap) { const char *symbol_name = (xm->name + strlen(machine->mmap_name)); /* @@ -1681,6 +1713,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine, machine__update_kernel_mmap(machine, xm->start, xm->end); + if (build_id__is_defined(bid)) + dso__set_build_id(kernel, bid); + /* * Avoid using a zero address (kptr_restrict) for the ref reloc * symbol. Effectively having zero here means that at record @@ -1718,11 +1753,17 @@ int machine__process_mmap2_event(struct machine *machine, .ino = event->mmap2.ino, .ino_generation = event->mmap2.ino_generation, }; + struct build_id __bid, *bid = NULL; int ret = 0; if (dump_trace) perf_event__fprintf_mmap2(event, stdout); + if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { + bid = &__bid; + build_id__init(bid, event->mmap2.build_id, event->mmap2.build_id_size); + } + if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || sample->cpumode == PERF_RECORD_MISC_KERNEL) { struct extra_kernel_map xm = { @@ -1732,7 +1773,7 @@ int machine__process_mmap2_event(struct machine *machine, }; strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN); - ret = machine__process_kernel_mmap_event(machine, &xm); + ret = machine__process_kernel_mmap_event(machine, &xm, bid); if (ret < 0) goto out_problem; return 0; @@ -1746,7 +1787,7 @@ int machine__process_mmap2_event(struct machine *machine, map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, &dso_id, event->mmap2.prot, - event->mmap2.flags, + event->mmap2.flags, bid, event->mmap2.filename, thread); if (map == NULL) @@ -1789,7 +1830,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event }; strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN); - ret = machine__process_kernel_mmap_event(machine, &xm); + ret = machine__process_kernel_mmap_event(machine, &xm, NULL); if (ret < 0) goto out_problem; return 0; @@ -1805,7 +1846,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(machine, event->mmap.start, event->mmap.len, event->mmap.pgoff, - NULL, prot, 0, event->mmap.filename, thread); + NULL, prot, 0, NULL, event->mmap.filename, thread); if (map == NULL) goto out_problem_map; @@ -2980,7 +3021,7 @@ int machines__for_each_thread(struct machines *machines, pid_t machine__get_current_tid(struct machine *machine, int cpu) { - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid) return -1; @@ -2992,7 +3033,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid) { struct thread *thread; - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0) return -EINVAL; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 26368d3c1754..7377ed6efdf1 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -106,6 +106,7 @@ u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr); struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid); +struct thread *machine__idle_thread(struct machine *machine); struct comm *machine__thread_exec_comm(struct machine *machine, struct thread *thread); @@ -162,6 +163,7 @@ struct machine *machines__add(struct machines *machines, pid_t pid, struct machine *machines__find_host(struct machines *machines); struct machine *machines__find(struct machines *machines, pid_t pid); struct machine *machines__findnew(struct machines *machines, pid_t pid); +struct machine *machines__find_guest(struct machines *machines, pid_t pid); void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); void machines__set_comm_exec(struct machines *machines, bool comm_exec); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f44ede437dc7..692e56dc832e 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -130,8 +130,8 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) struct map *map__new(struct machine *machine, u64 start, u64 len, u64 pgoff, struct dso_id *id, - u32 prot, u32 flags, char *filename, - struct thread *thread) + u32 prot, u32 flags, struct build_id *bid, + char *filename, struct thread *thread) { struct map *map = malloc(sizeof(*map)); struct nsinfo *nsi = NULL; @@ -194,6 +194,10 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, dso__set_loaded(dso); } dso->nsinfo = nsi; + + if (build_id__is_defined(bid)) + dso__set_build_id(dso, bid); + dso__put(dso); } return map; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index b1c0686db1b7..9f32825c98d8 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -104,10 +104,11 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso); struct dso_id; +struct build_id; struct map *map__new(struct machine *machine, u64 start, u64 len, u64 pgoff, struct dso_id *id, u32 prot, u32 flags, - char *filename, struct thread *thread); + struct build_id *bid, char *filename, struct thread *thread); struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); struct map *map__clone(struct map *map); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 19007e463b8a..f93a852ad838 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -56,6 +56,11 @@ char * __weak perf_mem_events__name(int i) return (char *)e->name; } +__weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused) +{ + return false; +} + int perf_mem_events__parse(const char *str) { char *tok, *saveptr = NULL; @@ -332,6 +337,29 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) return l; } +int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t l = 0; + u64 mask = PERF_MEM_BLK_NA; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + if (mem_info) + mask = mem_info->data_src.mem_blk; + + if (!mask || (mask & PERF_MEM_BLK_NA)) { + l += scnprintf(out + l, sz - l, " N/A"); + return l; + } + if (mask & PERF_MEM_BLK_DATA) + l += scnprintf(out + l, sz - l, " Data"); + if (mask & PERF_MEM_BLK_ADDR) + l += scnprintf(out + l, sz - l, " Addr"); + + return l; +} + int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { int i = 0; @@ -343,6 +371,8 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info); i += scnprintf(out + i, sz - i, "|LCK "); i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info); + i += scnprintf(out + i, sz - i, "|BLK "); + i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info); return i; } @@ -355,6 +385,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) u64 lvl = data_src->mem_lvl; u64 snoop = data_src->mem_snoop; u64 lock = data_src->mem_lock; + u64 blk = data_src->mem_blk; /* * Skylake might report unknown remote level via this * bit, consider it when evaluating remote HITMs. @@ -374,6 +405,9 @@ do { \ if (lock & P(LOCK, LOCKED)) stats->locks++; + if (blk & P(BLK, DATA)) stats->blk_data++; + if (blk & P(BLK, ADDR)) stats->blk_addr++; + if (op & P(OP, LOAD)) { /* load */ stats->load++; @@ -485,6 +519,8 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) stats->rmt_hit += add->rmt_hit; stats->lcl_dram += add->lcl_dram; stats->rmt_dram += add->rmt_dram; + stats->blk_data += add->blk_data; + stats->blk_addr += add->blk_addr; stats->nomap += add->nomap; stats->noparse += add->noparse; } diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 5ef178278909..755cef7e0625 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -9,6 +9,7 @@ #include <linux/refcount.h> #include <linux/perf_event.h> #include "stat.h" +#include "evsel.h" struct perf_mem_event { bool record; @@ -39,6 +40,7 @@ int perf_mem_events__init(void); char *perf_mem_events__name(int i); struct perf_mem_event *perf_mem_events__ptr(int i); +bool is_mem_loads_aux_event(struct evsel *leader); void perf_mem_events__list(void); @@ -47,6 +49,7 @@ int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); @@ -76,6 +79,8 @@ struct c2c_stats { u32 rmt_hit; /* count of loads with remote hit clean; */ u32 lcl_dram; /* count of loads miss to local DRAM */ u32 rmt_dram; /* count of loads miss to remote DRAM */ + u32 blk_data; /* count of loads blocked by data */ + u32 blk_addr; /* count of loads blocked by address conflict */ u32 nomap; /* count of load/stores with no phys adrs */ u32 noparse; /* count of unparsable data sources */ }; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index ee94d3e8dd65..26c990e32378 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -162,6 +162,14 @@ static bool contains_event(struct evsel **metric_events, int num_events, return false; } +static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2) +{ + if (!ev1->pmu_name || !ev2->pmu_name) + return false; + + return !strcmp(ev1->pmu_name, ev2->pmu_name); +} + /** * Find a group of events in perf_evlist that correspond to those from a parsed * metric expression. Note, as find_evsel_group is called in the same order as @@ -280,8 +288,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, */ if (!has_constraint && ev->leader != metric_events[i]->leader && - !strcmp(ev->leader->pmu_name, - metric_events[i]->leader->pmu_name)) + evsel_same_pmu(ev->leader, metric_events[i]->leader)) break; if (!strcmp(metric_events[i]->name, ev->name)) { set_bit(ev->idx, evlist_used); @@ -372,7 +379,7 @@ static int metricgroup__setup_events(struct list_head *groups, metric_refs[i].metric_expr = ref->metric_expr; i++; } - }; + } expr->metric_refs = metric_refs; expr->metric_expr = m->metric_expr; @@ -766,7 +773,6 @@ int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) struct metricgroup_add_iter_data { struct list_head *metric_list; const char *metric; - struct metric **m; struct expr_ids *ids; int *ret; bool *has_match; @@ -1058,12 +1064,13 @@ static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, void *data) { struct metricgroup_add_iter_data *d = data; + struct metric *m = NULL; int ret; if (!match_pe_metric(pe, d->metric)) return 0; - ret = add_metric(d->metric_list, pe, d->metric_no_group, d->m, NULL, d->ids); + ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); if (ret) return ret; @@ -1114,7 +1121,6 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, .metric_list = &list, .metric = metric, .metric_no_group = metric_no_group, - .m = &m, .ids = &ids, .has_match = &has_match, .ret = &ret, diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 285d6f30d912..608b20c72a5c 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -66,6 +66,7 @@ int nsinfo__init(struct nsinfo *nsi) char spath[PATH_MAX]; char *newns = NULL; char *statln = NULL; + char *nspid; struct stat old_stat; struct stat new_stat; FILE *f = NULL; @@ -112,8 +113,12 @@ int nsinfo__init(struct nsinfo *nsi) } if (strstr(statln, "NStgid:") != NULL) { - nsi->nstgid = (pid_t)strtol(strrchr(statln, '\t'), - NULL, 10); + nspid = strrchr(statln, '\t'); + nsi->nstgid = (pid_t)strtol(nspid, NULL, 10); + /* If innermost tgid is not the first, process is in a different + * PID namespace. + */ + nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid; break; } } @@ -140,6 +145,7 @@ struct nsinfo *nsinfo__new(pid_t pid) nsi->tgid = pid; nsi->nstgid = pid; nsi->need_setns = false; + nsi->in_pidns = false; /* Init may fail if the process exits while we're trying to look * at its proc information. In that case, save the pid but * don't try to enter the namespace. @@ -166,6 +172,7 @@ struct nsinfo *nsinfo__copy(struct nsinfo *nsi) nnsi->tgid = nsi->tgid; nnsi->nstgid = nsi->nstgid; nnsi->need_setns = nsi->need_setns; + nnsi->in_pidns = nsi->in_pidns; if (nsi->mntns_path) { nnsi->mntns_path = strdup(nsi->mntns_path); if (!nnsi->mntns_path) { @@ -280,3 +287,15 @@ char *nsinfo__realpath(const char *path, struct nsinfo *nsi) return rpath; } + +int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi) +{ + int ret; + struct nscookie nsc; + + nsinfo__mountns_enter(nsi, &nsc); + ret = stat(filename, st); + nsinfo__mountns_exit(&nsc); + + return ret; +} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 4b33f684eddd..ad9775db7b9c 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -8,6 +8,7 @@ #define __PERF_NAMESPACES_H #include <sys/types.h> +#include <sys/stat.h> #include <linux/stddef.h> #include <linux/perf_event.h> #include <linux/refcount.h> @@ -33,6 +34,7 @@ struct nsinfo { pid_t tgid; pid_t nstgid; bool need_setns; + bool in_pidns; char *mntns_path; refcount_t refcnt; }; @@ -55,6 +57,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc); void nsinfo__mountns_exit(struct nscookie *nc); char *nsinfo__realpath(const char *path, struct nsinfo *nsi); +int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi); static inline void __nsinfo__zput(struct nsinfo **nsip) { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 9db5097317f4..0b36285a9435 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -356,6 +356,7 @@ bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUT cycles-ct | cycles-t | mem-loads | +mem-loads-aux | mem-stores | topdown-[a-z-]+ | tx-capacity-[a-z-]+ | diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 3840d02f0f7b..829af17a0867 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -98,6 +98,11 @@ static void perf_probe_text_poke(struct evsel *evsel) evsel->core.attr.text_poke = 1; } +static void perf_probe_build_id(struct evsel *evsel) +{ + evsel->core.attr.build_id = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -172,3 +177,8 @@ bool perf_can_aux_sample(void) return true; } + +bool perf_can_record_build_id(void) +{ + return perf_probe_api(perf_probe_build_id); +} diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h index d5506a983a94..f12ca55f509a 100644 --- a/tools/perf/util/perf_api_probe.h +++ b/tools/perf/util/perf_api_probe.h @@ -11,5 +11,6 @@ bool perf_can_record_cpu_wide(void); bool perf_can_record_switch_events(void); bool perf_can_record_text_poke_events(void); bool perf_can_sample_identifier(void); +bool perf_can_record_build_id(void); #endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index fb0bb6684438..30481825515b 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -35,7 +35,8 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), - bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), + bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), bit_name(CODE_PAGE_SIZE), + bit_name(WEIGHT_STRUCT), { .name = NULL, } }; #undef bit_name @@ -134,6 +135,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(bpf_event, p_unsigned); PRINT_ATTRf(aux_output, p_unsigned); PRINT_ATTRf(cgroup, p_unsigned); + PRINT_ATTRf(text_poke, p_unsigned); + PRINT_ATTRf(build_id, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index a45499126184..eeac181ebccf 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -33,6 +33,13 @@ extern const struct sample_reg sample_reg_masks[]; int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); +static inline const char *perf_reg_name(int id) +{ + const char *reg_name = __perf_reg_name(id); + + return reg_name ?: "unknown"; +} + #else #define PERF_REGS_MASK 0 #define PERF_REGS_MAX 0 diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8eae2afff71a..a9cff3a50ddf 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -894,6 +894,16 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, struct debuginfo *dinfo; int ntevs, ret = 0; + /* Workaround for gcc #98776 issue. + * Perf failed to add kretprobe event with debuginfo of vmlinux which is + * compiled by gcc with -fpatchable-function-entry option enabled. The + * same issue with kernel module. The retprobe doesn`t need debuginfo. + * This workaround solution use map to query the probe function address + * for retprobe event. + */ + if (pev->point.retprobe) + return 0; + dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf); if (!dinfo) { if (need_dwarf) @@ -1074,7 +1084,7 @@ static int __show_line_range(struct line_range *lr, const char *module, } intlist__for_each_entry(ln, lr->line_list) { - for (; ln->i > l; l++) { + for (; ln->i > (unsigned long)l; l++) { ret = show_one_line(fp, l - lr->offset); if (ret < 0) goto end; diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index bbecb449ea94..52273542e6ef 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -794,6 +794,8 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, char *ret = NULL; int i, args_count, err; unsigned long long ref_ctr_offset; + char *arg; + int arg_idx = 0; if (strbuf_init(&buf, 32) < 0) return NULL; @@ -818,11 +820,43 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, if (args == NULL) goto error; - for (i = 0; i < args_count; ++i) { - if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) { + for (i = 0; i < args_count; ) { + /* + * FIXUP: Arm64 ELF section '.note.stapsdt' uses string + * format "-4@[sp, NUM]" if a probe is to access data in + * the stack, e.g. below is an example for the SDT + * Arguments: + * + * Arguments: -4@[sp, 12] -4@[sp, 8] -4@[sp, 4] + * + * Since the string introduces an extra space character + * in the middle of square brackets, the argument is + * divided into two items. Fixup for this case, if an + * item contains sub string "[sp,", need to concatenate + * the two items. + */ + if (strstr(args[i], "[sp,") && (i+1) < args_count) { + err = asprintf(&arg, "%s %s", args[i], args[i+1]); + i += 2; + } else { + err = asprintf(&arg, "%s", args[i]); + i += 1; + } + + /* Failed to allocate memory */ + if (err < 0) { argv_free(args); goto error; } + + if (synthesize_sdt_probe_arg(&buf, arg_idx, arg) < 0) { + free(arg); + argv_free(args); + goto error; + } + + free(arg); + arg_idx++; } argv_free(args); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 76dd349aa48d..1b118c9c86a6 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1187,8 +1187,10 @@ static int debuginfo__find_probe_location(struct debuginfo *dbg, while (!dwarf_nextcu(dbg->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) { /* Get the DIE(Debugging Information Entry) of this CU */ diep = dwarf_offdie(dbg->dbg, off + cuhl, &pf->cu_die); - if (!diep) + if (!diep) { + off = noff; continue; + } /* Check if target file is included. */ if (pp->file) @@ -1949,8 +1951,10 @@ int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr) /* Get the DIE(Debugging Information Entry) of this CU */ diep = dwarf_offdie(dbg->dbg, off + cuhl, &lf.cu_die); - if (!diep) + if (!diep) { + off = noff; continue; + } /* Check if target file is included. */ if (lr->file) diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index a9d9c142eb7c..71b753523fac 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/python.c util/cap.c util/evlist.c util/evsel.c +util/evsel_fprintf.c util/perf_event_attr_fprintf.c util/cpumap.c util/memswap.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index cc5ade85a33f..278abecb5bdf 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -80,6 +80,27 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, } /* + * XXX: All these evsel destructors need some better mechanism, like a linked + * list of destructors registered when the relevant code indeed is used instead + * of having more and more calls in perf_evsel__delete(). -- acme + * + * For now, add some more: + * + * Not to drag the BPF bandwagon... + */ +void bpf_counter__destroy(struct evsel *evsel); +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); + +void bpf_counter__destroy(struct evsel *evsel __maybe_unused) +{ +} + +int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused) +{ + return 0; +} + +/* * Support debug printing even though util/debug.c is not linked. That means * implementing 'verbose' and 'eprintf'. */ diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index e70c9dd04567..f99852d54b14 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -15,6 +15,8 @@ #include "record.h" #include "../perf-sys.h" #include "topdown.h" +#include "map_symbol.h" +#include "mem-events.h" /* * evsel__config_leader_sampling() uses special rules for leader sampling. @@ -25,7 +27,8 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl { struct evsel *leader = evsel->leader; - if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader)) { + if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader) || + is_mem_loads_aux_event(leader)) { evlist__for_each_entry(evlist, evsel) { if (evsel->leader == leader && evsel != evsel->leader) return evsel; @@ -201,10 +204,10 @@ static int record_opts__config_freq(struct record_opts *opts) * Default frequency is over current maximum. */ if (max_rate < opts->freq) { - pr_warning("Lowering default frequency rate to %u.\n" + pr_warning("Lowering default frequency rate from %u to %u.\n" "Please consider tweaking " "/proc/sys/kernel/perf_event_max_sample_rate.\n", - max_rate); + opts->freq, max_rate); opts->freq = max_rate; } diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 694b351dcd27..68f471d9a88b 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -23,6 +23,7 @@ struct record_opts { bool sample_address; bool sample_phys_addr; bool sample_data_page_size; + bool sample_code_page_size; bool sample_weight; bool sample_time; bool sample_time_set; @@ -50,6 +51,7 @@ struct record_opts { bool no_bpf_event; bool kcore; bool text_poke; + bool build_id; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 50ff9795a4f1..859832a82496 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -593,10 +593,13 @@ static void perf_event__mmap2_swap(union perf_event *event, event->mmap2.start = bswap_64(event->mmap2.start); event->mmap2.len = bswap_64(event->mmap2.len); event->mmap2.pgoff = bswap_64(event->mmap2.pgoff); - event->mmap2.maj = bswap_32(event->mmap2.maj); - event->mmap2.min = bswap_32(event->mmap2.min); - event->mmap2.ino = bswap_64(event->mmap2.ino); - event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); + + if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { + event->mmap2.maj = bswap_32(event->mmap2.maj); + event->mmap2.min = bswap_32(event->mmap2.min); + event->mmap2.ino = bswap_64(event->mmap2.ino); + event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); + } if (sample_id_all) { void *data = &event->mmap2.filename; @@ -1297,8 +1300,12 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); - if (sample_type & PERF_SAMPLE_WEIGHT) - printf("... weight: %" PRIu64 "\n", sample->weight); + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { + printf("... weight: %" PRIu64 "", sample->weight); + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) + printf(",0x%"PRIx16"", sample->ins_lat); + printf("\n"); + } if (sample_type & PERF_SAMPLE_DATA_SRC) printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); @@ -1309,6 +1316,9 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str)); + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + printf(" .. code page size: %s\n", get_page_size_name(sample->code_page_size, str)); + if (sample_type & PERF_SAMPLE_TRANSACTION) printf("... transaction: %" PRIx64 "\n", sample->transaction); @@ -1346,8 +1356,6 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) { - struct machine *machine; - if (perf_guest && ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) { @@ -1359,10 +1367,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, else pid = sample->pid; - machine = machines__find(machines, pid); - if (!machine) - machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID); - return machine; + return machines__find_guest(machines, pid); } return &machines->host; @@ -1784,32 +1789,13 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) return machine__findnew_thread(&session->machines.host, -1, pid); } -/* - * Threads are identified by pid and tid, and the idle task has pid == tid == 0. - * So here a single thread is created for that, but actually there is a separate - * idle task per cpu, so there should be one 'struct thread' per cpu, but there - * is only 1. That causes problems for some tools, requiring workarounds. For - * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu(). - */ int perf_session__register_idle_thread(struct perf_session *session) { - struct thread *thread; - int err = 0; - - thread = machine__findnew_thread(&session->machines.host, 0, 0); - if (thread == NULL || thread__set_comm(thread, "swapper", 0)) { - pr_err("problem inserting idle task.\n"); - err = -1; - } + struct thread *thread = machine__idle_thread(&session->machines.host); - if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) { - pr_err("problem inserting idle task.\n"); - err = -1; - } - - /* machine__findnew_thread() got the thread, so put it */ + /* machine__idle_thread() got the thread, so put it */ thread__put(thread); - return err; + return thread ? 0 : -1; } static void @@ -2404,7 +2390,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, { int i, err = -1; struct perf_cpu_map *map; - int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS); for (i = 0; i < PERF_TYPE_MAX; ++i) { struct evsel *evsel; diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index c5e3e9a68162..483f05004e68 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -43,7 +43,7 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) -cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] +cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls', '-DPYTHON_PERF' ] if not cc_is_clang: cflags += ['-Wno-cast-function-type' ] diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 80907bc32683..0d5ad42812b9 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -36,7 +36,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char *default_sort_order = "comm,dso,symbol"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; -const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; +const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; const char default_tracepoint_sort_order[] = "trace"; @@ -1365,6 +1365,49 @@ struct sort_entry sort_global_weight = { .se_width_idx = HISTC_GLOBAL_WEIGHT, }; +static u64 he_ins_lat(struct hist_entry *he) +{ + return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0; +} + +static int64_t +sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return he_ins_lat(left) - he_ins_lat(right); +} + +static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he)); +} + +struct sort_entry sort_local_ins_lat = { + .se_header = "Local INSTR Latency", + .se_cmp = sort__local_ins_lat_cmp, + .se_snprintf = hist_entry__local_ins_lat_snprintf, + .se_width_idx = HISTC_LOCAL_INS_LAT, +}; + +static int64_t +sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->stat.ins_lat - right->stat.ins_lat; +} + +static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat); +} + +struct sort_entry sort_global_ins_lat = { + .se_header = "INSTR Latency", + .se_cmp = sort__global_ins_lat_cmp, + .se_snprintf = hist_entry__global_ins_lat_snprintf, + .se_width_idx = HISTC_GLOBAL_INS_LAT, +}; + struct sort_entry sort_mem_daddr_sym = { .se_header = "Data Symbol", .se_cmp = sort__daddr_cmp, @@ -1422,6 +1465,41 @@ struct sort_entry sort_mem_dcacheline = { }; static int64_t +sort__blocked_cmp(struct hist_entry *left, struct hist_entry *right) +{ + union perf_mem_data_src data_src_l; + union perf_mem_data_src data_src_r; + + if (left->mem_info) + data_src_l = left->mem_info->data_src; + else + data_src_l.mem_blk = PERF_MEM_BLK_NA; + + if (right->mem_info) + data_src_r = right->mem_info->data_src; + else + data_src_r.mem_blk = PERF_MEM_BLK_NA; + + return (int64_t)(data_src_r.mem_blk - data_src_l.mem_blk); +} + +static int hist_entry__blocked_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + char out[16]; + + perf_mem__blk_scnprintf(out, sizeof(out), he->mem_info); + return repsep_snprintf(bf, size, "%.*s", width, out); +} + +struct sort_entry sort_mem_blocked = { + .se_header = "Blocked", + .se_cmp = sort__blocked_cmp, + .se_snprintf = hist_entry__blocked_snprintf, + .se_width_idx = HISTC_MEM_BLOCKED, +}; + +static int64_t sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right) { uint64_t l = 0, r = 0; @@ -1492,6 +1570,31 @@ struct sort_entry sort_mem_data_page_size = { }; static int64_t +sort__code_page_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = left->code_page_size; + uint64_t r = right->code_page_size; + + return (int64_t)(r - l); +} + +static int hist_entry__code_page_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + char str[PAGE_SIZE_NAME_LEN]; + + return repsep_snprintf(bf, size, "%-*s", width, + get_page_size_name(he->code_page_size, str)); +} + +struct sort_entry sort_code_page_size = { + .se_header = "Code Page Size", + .se_cmp = sort__code_page_size_cmp, + .se_snprintf = hist_entry__code_page_size_snprintf, + .se_width_idx = HISTC_CODE_PAGE_SIZE, +}; + +static int64_t sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) { if (!left->branch_info || !right->branch_info) @@ -1735,6 +1838,9 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null), DIM(SORT_TIME, "time", sort_time), + DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), + DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), + DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), }; #undef DIM @@ -1770,6 +1876,7 @@ static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr), DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size), + DIM(SORT_MEM_BLOCKED, "blocked", sort_mem_blocked), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e50f2b695bc4..63f67a3f3630 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -50,6 +50,7 @@ struct he_stat { u64 period_guest_sys; u64 period_guest_us; u64 weight; + u64 ins_lat; u32 nr_events; }; @@ -106,6 +107,7 @@ struct hist_entry { u64 transaction; s32 socket; s32 cpu; + u64 code_page_size; u8 cpumode; u8 depth; @@ -229,6 +231,9 @@ enum sort_type { SORT_CGROUP_ID, SORT_SYM_IPC_NULL, SORT_TIME, + SORT_CODE_PAGE_SIZE, + SORT_LOCAL_INS_LAT, + SORT_GLOBAL_INS_LAT, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -256,6 +261,7 @@ enum sort_type { SORT_MEM_IADDR_SYMBOL, SORT_MEM_PHYS_DADDR, SORT_MEM_DATA_PAGE_SIZE, + SORT_MEM_BLOCKED, }; /* diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 583ae4f09c5d..cce7a76d6473 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1045,7 +1045,9 @@ static void print_header(struct perf_stat_config *config, if (!config->csv_output) { fprintf(output, "\n"); fprintf(output, " Performance counter stats for "); - if (_target->system_wide) + if (_target->bpf_str) + fprintf(output, "\'BPF program(s) %s", _target->bpf_str); + else if (_target->system_wide) fprintf(output, "\'system wide"); else if (_target->cpu_list) fprintf(output, "\'CPU(s) %s", _target->cpu_list); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 901265127e36..6ccf21a72f06 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -8,6 +8,7 @@ #include "evlist.h" #include "expr.h" #include "metricgroup.h" +#include "cgroup.h" #include <linux/zalloc.h> /* @@ -28,6 +29,7 @@ struct saved_value { enum stat_type type; int ctx; int cpu; + struct cgroup *cgrp; struct runtime_stat *stat; struct stats stats; u64 metric_total; @@ -57,6 +59,9 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->ctx != b->ctx) return a->ctx - b->ctx; + if (a->cgrp != b->cgrp) + return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1; + if (a->evsel == NULL && b->evsel == NULL) { if (a->stat == b->stat) return 0; @@ -100,7 +105,8 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, bool create, enum stat_type type, int ctx, - struct runtime_stat *st) + struct runtime_stat *st, + struct cgroup *cgrp) { struct rblist *rblist; struct rb_node *nd; @@ -110,10 +116,15 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, .type = type, .ctx = ctx, .stat = st, + .cgrp = cgrp, }; rblist = &st->value_list; + /* don't use context info for clock events */ + if (type == STAT_NSECS) + dm.ctx = 0; + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); @@ -191,12 +202,18 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) reset_stat(st); } +struct runtime_stat_data { + int ctx; + struct cgroup *cgrp; +}; + static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int ctx, int cpu, u64 count) + int cpu, u64 count, + struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu, true, - type, ctx, st); + struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, + rsd->ctx, st, rsd->cgrp); if (v) update_stats(&v->stats, count); @@ -210,82 +227,98 @@ static void update_runtime_stat(struct runtime_stat *st, void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, int cpu, struct runtime_stat *st) { - int ctx = evsel_context(counter); u64 count_ns = count; struct saved_value *v; + struct runtime_stat_data rsd = { + .ctx = evsel_context(counter), + .cgrp = counter->cgrp, + }; count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); + update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); + update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); + update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); + else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS)) + update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS, + cpu, count, &rsd); + else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT)) + update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT, + cpu, count, &rsd); + else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT)) + update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT, + cpu, count, &rsd); + else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND)) + update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND, + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); + update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); + update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); + update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, ctx, cpu, count); + update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); + v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, + rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu, true, STAT_NONE, 0, st); + cpu, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -422,11 +455,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -434,11 +468,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -446,16 +481,15 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -470,16 +504,15 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -490,17 +523,15 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); + total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -511,18 +542,15 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -533,18 +561,15 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -554,17 +579,15 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -574,17 +597,15 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -594,17 +615,15 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -662,56 +681,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) +static double td_total_slots(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd); } -static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) +static double td_bad_spec(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd); - total_slots = td_total_slots(ctx, cpu, st); + total_slots = td_total_slots(cpu, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu, struct runtime_stat *st) +static double td_retiring(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu); + cpu, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_fe_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu); + cpu, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_be_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(ctx, cpu, st) + - td_bad_spec(ctx, cpu, st) + - td_retiring(ctx, cpu, st)); + double sum = (td_fe_bound(cpu, st, rsd) + + td_bad_spec(cpu, st, rsd) + + td_retiring(cpu, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -722,15 +746,15 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int ctx, int cpu, - enum stat_type type, - struct runtime_stat *stat) +static double td_metric_ratio(int cpu, enum stat_type type, + struct runtime_stat *stat, + struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu); - double d = runtime_stat_avg(stat, type, ctx, cpu); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd); + double d = runtime_stat_avg(stat, type, cpu, rsd); if (sum) return d / sum; @@ -742,34 +766,33 @@ static double td_metric_ratio(int ctx, int cpu, * We allow two missing. */ -static bool full_td(int ctx, int cpu, - struct runtime_stat *stat) +static bool full_td(int cpu, struct runtime_stat *stat, + struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, - int cpu, struct evsel *evsel, +static void print_smi_cost(struct perf_stat_config *config, int cpu, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double smi_num, aperf, cycles, cost = 0.0; - int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); - aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); - cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -804,7 +827,8 @@ static int prepare_metric(struct evsel **metric_events, scale = 1e-9; } else { v = saved_value_lookup(metric_events[i], cpu, false, - STAT_NONE, 0, st); + STAT_NONE, 0, st, + metric_events[i]->cgrp); if (!v) break; stats = &v->stats; @@ -930,12 +954,15 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; const char *color = NULL; - int ctx = evsel_context(evsel); + struct runtime_stat_data rsd = { + .ctx = evsel_context(evsel), + .cgrp = evsel->cgrp, + }; struct metric_event *me; int num = 1; if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) { ratio = avg / total; @@ -945,12 +972,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu)); + cpu, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -960,8 +986,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) - print_branch_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0) + print_branch_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -970,8 +996,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) - print_l1_dcache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0) + print_l1_dcache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -980,8 +1006,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) - print_l1_icache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0) + print_l1_icache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -990,8 +1016,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) - print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0) + print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1000,8 +1026,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) - print_itlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0) + print_itlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1010,27 +1036,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) - print_ll_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0) + print_ll_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); + total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) { ratio = avg / total; @@ -1039,7 +1065,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1049,8 +1075,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (total2 < avg) total2 = avg; @@ -1060,21 +1086,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; @@ -1087,28 +1111,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu, st); + double fe_bound = td_fe_bound(cpu, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu, st); + double retiring = td_retiring(cpu, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu, st); + double bad_spec = td_bad_spec(cpu, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu, st); + double be_bound = td_be_bound(cpu, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1121,55 +1145,135 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu, st) > 0) + if (td_total_slots(cpu, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(ctx, cpu, st)) { - double retiring = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_RETIRING, st); - + full_td(cpu, st, &rsd)) { + double retiring = td_metric_ratio(cpu, + STAT_TOPDOWN_RETIRING, st, + &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(ctx, cpu, st)) { - double fe_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_FE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double fe_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_FE_BOUND, st, + &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(ctx, cpu, st)) { - double be_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double be_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_BE_BOUND, st, + &rsd); if (be_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "backend bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(ctx, cpu, st)) { - double bad_spec = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BAD_SPEC, st); - + full_td(cpu, st, &rsd)) { + double bad_spec = td_metric_ratio(cpu, + STAT_TOPDOWN_BAD_SPEC, st, + &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) && + full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { + double retiring = td_metric_ratio(cpu, + STAT_TOPDOWN_RETIRING, st, + &rsd); + double heavy_ops = td_metric_ratio(cpu, + STAT_TOPDOWN_HEAVY_OPS, st, + &rsd); + double light_ops = retiring - heavy_ops; + + if (retiring > 0.7 && heavy_ops > 0.1) + color = PERF_COLOR_GREEN; + print_metric(config, ctxp, color, "%8.1f%%", "heavy operations", + heavy_ops * 100.); + if (retiring > 0.7 && light_ops > 0.6) + color = PERF_COLOR_GREEN; + else + color = NULL; + print_metric(config, ctxp, color, "%8.1f%%", "light operations", + light_ops * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) && + full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { + double bad_spec = td_metric_ratio(cpu, + STAT_TOPDOWN_BAD_SPEC, st, + &rsd); + double br_mis = td_metric_ratio(cpu, + STAT_TOPDOWN_BR_MISPREDICT, st, + &rsd); + double m_clears = bad_spec - br_mis; + + if (bad_spec > 0.1 && br_mis > 0.05) + color = PERF_COLOR_RED; + print_metric(config, ctxp, color, "%8.1f%%", "branch mispredict", + br_mis * 100.); + if (bad_spec > 0.1 && m_clears > 0.05) + color = PERF_COLOR_RED; + else + color = NULL; + print_metric(config, ctxp, color, "%8.1f%%", "machine clears", + m_clears * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) && + full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { + double fe_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_FE_BOUND, st, + &rsd); + double fetch_lat = td_metric_ratio(cpu, + STAT_TOPDOWN_FETCH_LAT, st, + &rsd); + double fetch_bw = fe_bound - fetch_lat; + + if (fe_bound > 0.2 && fetch_lat > 0.15) + color = PERF_COLOR_RED; + print_metric(config, ctxp, color, "%8.1f%%", "fetch latency", + fetch_lat * 100.); + if (fe_bound > 0.2 && fetch_bw > 0.1) + color = PERF_COLOR_RED; + else + color = NULL; + print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth", + fetch_bw * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) && + full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { + double be_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_BE_BOUND, st, + &rsd); + double mem_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_MEM_BOUND, st, + &rsd); + double core_bound = be_bound - mem_bound; + + if (be_bound > 0.2 && mem_bound > 0.2) + color = PERF_COLOR_RED; + print_metric(config, ctxp, color, "%8.1f%%", "memory bound", + mem_bound * 100.); + if (be_bound > 0.2 && core_bound > 0.1) + color = PERF_COLOR_RED; + else + color = NULL; + print_metric(config, ctxp, color, "%8.1f%%", "Core bound", + core_bound * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { + } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { char unit = 'M'; char unit_buf[10]; - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) ratio = 1000.0 * avg / total; @@ -1180,7 +1284,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu, evsel, out, st); + print_smi_cost(config, cpu, out, st, &rsd); } else { num = 0; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8ce1479c98f0..5d8af29447f4 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -99,6 +99,10 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { ID(TOPDOWN_BAD_SPEC, topdown-bad-spec), ID(TOPDOWN_FE_BOUND, topdown-fe-bound), ID(TOPDOWN_BE_BOUND, topdown-be-bound), + ID(TOPDOWN_HEAVY_OPS, topdown-heavy-ops), + ID(TOPDOWN_BR_MISPREDICT, topdown-br-mispredict), + ID(TOPDOWN_FETCH_LAT, topdown-fetch-lat), + ID(TOPDOWN_MEM_BOUND, topdown-mem-bound), ID(SMI_NUM, msr/smi/), ID(APERF, msr/aperf/), }; @@ -527,7 +531,7 @@ int create_perf_stat_counter(struct evsel *evsel, if (leader->core.nr_members > 1) attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; - attr->inherit = !config->no_inherit; + attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list); /* * Some events get initialized with sample_(period/type) set, diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index b5369730b4a2..d85c292148bb 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -33,6 +33,10 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC, PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND, PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND, + PERF_STAT_EVSEL_ID__TOPDOWN_HEAVY_OPS, + PERF_STAT_EVSEL_ID__TOPDOWN_BR_MISPREDICT, + PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_LAT, + PERF_STAT_EVSEL_ID__TOPDOWN_MEM_BOUND, PERF_STAT_EVSEL_ID__SMI_NUM, PERF_STAT_EVSEL_ID__APERF, PERF_STAT_EVSEL_ID__MAX, @@ -91,6 +95,10 @@ enum stat_type { STAT_TOPDOWN_BAD_SPEC, STAT_TOPDOWN_FE_BOUND, STAT_TOPDOWN_BE_BOUND, + STAT_TOPDOWN_HEAVY_OPS, + STAT_TOPDOWN_BR_MISPREDICT, + STAT_TOPDOWN_FETCH_LAT, + STAT_TOPDOWN_MEM_BOUND, STAT_SMI_NUM, STAT_APERF, STAT_MAX @@ -148,6 +156,7 @@ struct perf_stat_config { int ctl_fd_ack; bool ctl_fd_close; const char *cgroup_list; + unsigned int topdown_level; }; void perf_stat__set_big_num(int set); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 52603876c548..f6d90cdd9225 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -293,3 +293,12 @@ char *strdup_esc(const char *str) return ret; } + +unsigned int hex(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + return c - 'A' + 10; +} diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 73df616ced43..56c30fef9682 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -38,4 +38,6 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids); char *strpbrk_esc(char *str, const char *stopset); char *strdup_esc(const char *str); +unsigned int hex(char c); + #endif /* PERF_STRING_H */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index f3577f7d72fe..6dff843fd883 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -12,6 +12,7 @@ #include "maps.h" #include "symbol.h" #include "symsrc.h" +#include "demangle-ocaml.h" #include "demangle-java.h" #include "demangle-rust.h" #include "machine.h" @@ -251,8 +252,12 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) return demangled; demangled = bfd_demangle(NULL, elf_name, demangle_flags); - if (demangled == NULL) - demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); + if (demangled == NULL) { + demangled = ocaml_demangle_sym(elf_name); + if (demangled == NULL) { + demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); + } + } else if (rust_is_mangled(demangled)) /* * Input to Rust demangling is the BFD-demangled @@ -1226,12 +1231,26 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, if (sym.st_shndx == SHN_ABS) continue; - sec = elf_getscn(runtime_ss->elf, sym.st_shndx); + sec = elf_getscn(syms_ss->elf, sym.st_shndx); if (!sec) goto out_elf_end; gelf_getshdr(sec, &shdr); + /* + * We have to fallback to runtime when syms' section header has + * NOBITS set. NOBITS results in file offset (sh_offset) not + * being incremented. So sh_offset used below has different + * values for syms (invalid) and runtime (valid). + */ + if (shdr.sh_type == SHT_NOBITS) { + sec = elf_getscn(runtime_ss->elf, sym.st_shndx); + if (!sec) + goto out_elf_end; + + gelf_getshdr(sec, &shdr); + } + if (is_label && !elf_sec__filter(&shdr, secstrs)) continue; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 64a039cbba1b..77fc46ca07c0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1561,15 +1561,14 @@ static int bfd2elf_binding(asymbol *symbol) int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) { int err = -1; - long symbols_size, symbols_count; + long symbols_size, symbols_count, i; asection *section; asymbol **symbols, *sym; struct symbol *symbol; bfd *abfd; - u_int i; u64 start, len; - abfd = bfd_openr(dso->long_name, NULL); + abfd = bfd_openr(debugfile, NULL); if (!abfd) return -1; @@ -1586,21 +1585,6 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) if (section) dso->text_offset = section->vma - section->filepos; - bfd_close(abfd); - - abfd = bfd_openr(debugfile, NULL); - if (!abfd) - return -1; - - if (!bfd_check_format(abfd, bfd_object)) { - pr_debug2("%s: cannot read %s bfd file.\n", __func__, - debugfile); - goto out_close; - } - - if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) - goto out_close; - symbols_size = bfd_get_symtab_upper_bound(abfd); if (symbols_size == 0) { bfd_close(abfd); @@ -1867,8 +1851,10 @@ int dso__load(struct dso *dso, struct map *map) if (nsexit) nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (bfdrc == 0) + if (bfdrc == 0) { + ret = 0; break; + } if (!is_reg || sirc < 0) continue; @@ -2406,6 +2392,49 @@ int setup_intlist(struct intlist **list, const char *list_str, return 0; } +static int setup_addrlist(struct intlist **addr_list, struct strlist *sym_list) +{ + struct str_node *pos, *tmp; + unsigned long val; + char *sep; + const char *end; + int i = 0, err; + + *addr_list = intlist__new(NULL); + if (!*addr_list) + return -1; + + strlist__for_each_entry_safe(pos, tmp, sym_list) { + errno = 0; + val = strtoul(pos->s, &sep, 16); + if (errno || (sep == pos->s)) + continue; + + if (*sep != '\0') { + end = pos->s + strlen(pos->s) - 1; + while (end >= sep && isspace(*end)) + end--; + + if (end >= sep) + continue; + } + + err = intlist__add(*addr_list, val); + if (err) + break; + + strlist__remove(sym_list, pos); + i++; + } + + if (i == 0) { + intlist__delete(*addr_list); + *addr_list = NULL; + } + + return 0; +} + static bool symbol__read_kptr_restrict(void) { bool value = false; @@ -2489,6 +2518,10 @@ int symbol__init(struct perf_env *env) symbol_conf.sym_list_str, "symbol") < 0) goto out_free_tid_list; + if (symbol_conf.sym_list && + setup_addrlist(&symbol_conf.addr_list, symbol_conf.sym_list) < 0) + goto out_free_sym_list; + if (setup_list(&symbol_conf.bt_stop_list, symbol_conf.bt_stop_list_str, "symbol") < 0) goto out_free_sym_list; @@ -2512,6 +2545,7 @@ int symbol__init(struct perf_env *env) out_free_sym_list: strlist__delete(symbol_conf.sym_list); + intlist__delete(symbol_conf.addr_list); out_free_tid_list: intlist__delete(symbol_conf.tid_list); out_free_pid_list: @@ -2533,6 +2567,7 @@ void symbol__exit(void) strlist__delete(symbol_conf.comm_list); intlist__delete(symbol_conf.tid_list); intlist__delete(symbol_conf.pid_list); + intlist__delete(symbol_conf.addr_list); vmlinux_path__exit(); symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; symbol_conf.bt_stop_list = NULL; diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index b916afb95ec5..a70b3ec09dac 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -42,7 +42,8 @@ struct symbol_conf { report_block, report_individual_block, inline_name, - disable_add2line_warn; + disable_add2line_warn, + buildid_mmap2; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -69,11 +70,13 @@ struct symbol_conf { *sym_to_list, *bt_stop_list; struct intlist *pid_list, - *tid_list; + *tid_list, + *addr_list; const char *symfs; int res_sample; int pad_output_len_dso; int group_sort_idx; + int addr_range; }; extern struct symbol_conf symbol_conf; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 2947e3f3c6d9..b698046ec2db 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -24,7 +24,6 @@ #include <linux/perf_event.h> #include <asm/bug.h> #include <perf/evsel.h> -#include <internal/cpumap.h> #include <perf/cpumap.h> #include <internal/lib.h> // page_size #include <internal/threadmap.h> @@ -69,19 +68,22 @@ int perf_tool__process_synth_event(struct perf_tool *tool, * Assumes that the first 4095 bytes of /proc/pid/stat contains * the comm, tgid and ppid. */ -static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, - pid_t *tgid, pid_t *ppid) +static int perf_event__get_comm_ids(pid_t pid, pid_t tid, char *comm, size_t len, + pid_t *tgid, pid_t *ppid, bool *kernel) { char bf[4096]; int fd; size_t size = 0; ssize_t n; - char *name, *tgids, *ppids; + char *name, *tgids, *ppids, *vmpeak, *threads; *tgid = -1; *ppid = -1; - snprintf(bf, sizeof(bf), "/proc/%d/status", pid); + if (pid) + snprintf(bf, sizeof(bf), "/proc/%d/task/%d/status", pid, tid); + else + snprintf(bf, sizeof(bf), "/proc/%d/status", tid); fd = open(bf, O_RDONLY); if (fd < 0) { @@ -93,14 +95,20 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, close(fd); if (n <= 0) { pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n", - pid); + tid); return -1; } bf[n] = '\0'; name = strstr(bf, "Name:"); - tgids = strstr(bf, "Tgid:"); - ppids = strstr(bf, "PPid:"); + tgids = strstr(name ?: bf, "Tgid:"); + ppids = strstr(tgids ?: bf, "PPid:"); + vmpeak = strstr(ppids ?: bf, "VmPeak:"); + + if (vmpeak) + threads = NULL; + else + threads = strstr(ppids ?: bf, "Threads:"); if (name) { char *nl; @@ -116,29 +124,34 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, memcpy(comm, name, size); comm[size] = '\0'; } else { - pr_debug("Name: string not found for pid %d\n", pid); + pr_debug("Name: string not found for pid %d\n", tid); } if (tgids) { tgids += 5; /* strlen("Tgid:") */ *tgid = atoi(tgids); } else { - pr_debug("Tgid: string not found for pid %d\n", pid); + pr_debug("Tgid: string not found for pid %d\n", tid); } if (ppids) { ppids += 5; /* strlen("PPid:") */ *ppid = atoi(ppids); } else { - pr_debug("PPid: string not found for pid %d\n", pid); + pr_debug("PPid: string not found for pid %d\n", tid); } + if (!vmpeak && threads) + *kernel = true; + else + *kernel = false; + return 0; } -static int perf_event__prepare_comm(union perf_event *event, pid_t pid, +static int perf_event__prepare_comm(union perf_event *event, pid_t pid, pid_t tid, struct machine *machine, - pid_t *tgid, pid_t *ppid) + pid_t *tgid, pid_t *ppid, bool *kernel) { size_t size; @@ -147,9 +160,9 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid, memset(&event->comm, 0, sizeof(event->comm)); if (machine__is_host(machine)) { - if (perf_event__get_comm_ids(pid, event->comm.comm, + if (perf_event__get_comm_ids(pid, tid, event->comm.comm, sizeof(event->comm.comm), - tgid, ppid) != 0) { + tgid, ppid, kernel) != 0) { return -1; } } else { @@ -168,7 +181,7 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid, event->comm.header.size = (sizeof(event->comm) - (sizeof(event->comm.comm) - size) + machine->id_hdr_size); - event->comm.tid = pid; + event->comm.tid = tid; return 0; } @@ -179,8 +192,10 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, struct machine *machine) { pid_t tgid, ppid; + bool kernel_thread; - if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) + if (perf_event__prepare_comm(event, 0, pid, machine, &tgid, &ppid, + &kernel_thread) != 0) return -1; if (perf_tool__process_synth_event(tool, event, machine, process) != 0) @@ -347,6 +362,31 @@ static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end, } } +static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, + bool is_kernel) +{ + struct build_id bid; + int rc; + + if (is_kernel) + rc = sysfs__read_build_id("/sys/kernel/notes", &bid); + else + rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1; + + if (rc == 0) { + memcpy(event->build_id, bid.data, sizeof(bid.data)); + event->build_id_size = (u8) bid.size; + event->header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID; + event->__reserved_1 = 0; + event->__reserved_2 = 0; + } else { + if (event->filename[0] == '/') { + pr_debug2("Failed to read build ID for %s\n", + event->filename); + } + } +} + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -453,6 +493,9 @@ out: event->mmap2.pid = tgid; event->mmap2.tid = pid; + if (symbol_conf.buildid_mmap2) + perf_record_mmap2__read_build_id(&event->mmap2, false); + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { rc = -1; break; @@ -596,16 +639,17 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t int rc = 0; struct map *pos; struct maps *maps = machine__kernel_maps(machine); - union perf_event *event = zalloc((sizeof(event->mmap) + - machine->id_hdr_size)); + union perf_event *event; + size_t size = symbol_conf.buildid_mmap2 ? + sizeof(event->mmap2) : sizeof(event->mmap); + + event = zalloc(size + machine->id_hdr_size); if (event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); return -1; } - event->header.type = PERF_RECORD_MMAP; - /* * kernel uses 0 for user space maps, see kernel/perf_event.c * __perf_event_mmap @@ -616,23 +660,39 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; maps__for_each_entry(maps, pos) { - size_t size; - if (!__map__is_kmodule(pos)) continue; - size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.start = pos->start; - event->mmap.len = pos->end - pos->start; - event->mmap.pid = machine->pid; + if (symbol_conf.buildid_mmap2) { + size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, machine->id_hdr_size); + event->mmap2.header.size += machine->id_hdr_size; + event->mmap2.start = pos->start; + event->mmap2.len = pos->end - pos->start; + event->mmap2.pid = machine->pid; + + memcpy(event->mmap2.filename, pos->dso->long_name, + pos->dso->long_name_len + 1); + + perf_record_mmap2__read_build_id(&event->mmap2, false); + } else { + size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, machine->id_hdr_size); + event->mmap.header.size += machine->id_hdr_size; + event->mmap.start = pos->start; + event->mmap.len = pos->end - pos->start; + event->mmap.pid = machine->pid; + + memcpy(event->mmap.filename, pos->dso->long_name, + pos->dso->long_name_len + 1); + } - memcpy(event->mmap.filename, pos->dso->long_name, - pos->dso->long_name_len + 1); if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { rc = -1; break; @@ -643,6 +703,11 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t return rc; } +static int filter_task(const struct dirent *dirent) +{ + return isdigit(dirent->d_name[0]); +} + static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *mmap_event, union perf_event *fork_event, @@ -651,10 +716,10 @@ static int __event__synthesize_thread(union perf_event *comm_event, struct perf_tool *tool, struct machine *machine, bool mmap_data) { char filename[PATH_MAX]; - DIR *tasks; - struct dirent *dirent; + struct dirent **dirent; pid_t tgid, ppid; int rc = 0; + int i, n; /* special case: only send one comm event using passed in pid */ if (!full) { @@ -686,23 +751,22 @@ static int __event__synthesize_thread(union perf_event *comm_event, snprintf(filename, sizeof(filename), "%s/proc/%d/task", machine->root_dir, pid); - tasks = opendir(filename); - if (tasks == NULL) { - pr_debug("couldn't open %s\n", filename); - return 0; - } + n = scandir(filename, &dirent, filter_task, alphasort); + if (n < 0) + return n; - while ((dirent = readdir(tasks)) != NULL) { + for (i = 0; i < n; i++) { char *end; pid_t _pid; + bool kernel_thread; - _pid = strtol(dirent->d_name, &end, 10); + _pid = strtol(dirent[i]->d_name, &end, 10); if (*end) continue; rc = -1; - if (perf_event__prepare_comm(comm_event, _pid, machine, - &tgid, &ppid) != 0) + if (perf_event__prepare_comm(comm_event, pid, _pid, machine, + &tgid, &ppid, &kernel_thread) != 0) break; if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, @@ -720,7 +784,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, break; rc = 0; - if (_pid == pid) { + if (_pid == pid && !kernel_thread) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data); @@ -729,7 +793,10 @@ static int __event__synthesize_thread(union perf_event *comm_event, } } - closedir(tasks); + for (i = 0; i < n; i++) + zfree(&dirent[i]); + free(dirent); + return rc; } @@ -914,7 +981,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, return 0; snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); - n = scandir(proc_path, &dirent, 0, alphasort); + n = scandir(proc_path, &dirent, filter_task, alphasort); if (n < 0) return err; @@ -991,11 +1058,12 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { - size_t size; + union perf_event *event; + size_t size = symbol_conf.buildid_mmap2 ? + sizeof(event->mmap2) : sizeof(event->mmap); struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; - union perf_event *event; if (map == NULL) return -1; @@ -1009,7 +1077,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, * available use this, and after it is use this as a fallback for older * kernels. */ - event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); + event = zalloc(size + machine->id_hdr_size); if (event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); @@ -1026,16 +1094,31 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; } - size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), - "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; - size = PERF_ALIGN(size, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); - event->mmap.pgoff = kmap->ref_reloc_sym->addr; - event->mmap.start = map->start; - event->mmap.len = map->end - event->mmap.start; - event->mmap.pid = machine->pid; + if (symbol_conf.buildid_mmap2) { + size = snprintf(event->mmap2.filename, sizeof(event->mmap2.filename), + "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; + size = PERF_ALIGN(size, sizeof(u64)); + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + machine->id_hdr_size); + event->mmap2.pgoff = kmap->ref_reloc_sym->addr; + event->mmap2.start = map->start; + event->mmap2.len = map->end - event->mmap.start; + event->mmap2.pid = machine->pid; + + perf_record_mmap2__read_build_id(&event->mmap2, true); + } else { + size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), + "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; + size = PERF_ALIGN(size, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); + event->mmap.pgoff = kmap->ref_reloc_sym->addr; + event->mmap.start = map->start; + event->mmap.len = map->end - event->mmap.start; + event->mmap.pid = machine->pid; + } err = perf_tool__process_synth_event(tool, event, machine, process); free(event); @@ -1384,7 +1467,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, } } - if (type & PERF_SAMPLE_WEIGHT) + if (type & PERF_SAMPLE_WEIGHT_TYPE) result += sizeof(u64); if (type & PERF_SAMPLE_DATA_SRC) @@ -1412,6 +1495,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_DATA_PAGE_SIZE) result += sizeof(u64); + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) + result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { result += sizeof(u64); result += sample->aux_sample.size; @@ -1420,6 +1506,12 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, return result; } +void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data, + __u64 *array, u64 type __maybe_unused) +{ + *array = data->weight; +} + int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample) { @@ -1555,8 +1647,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } } - if (type & PERF_SAMPLE_WEIGHT) { - *array = sample->weight; + if (type & PERF_SAMPLE_WEIGHT_TYPE) { + arch_perf_synthesize_sample_weight(sample, array, type); array++; } @@ -1596,6 +1688,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) { + *array = sample->code_page_size; + array++; + } + if (type & PERF_SAMPLE_AUX) { sz = sample->aux_sample.size; *array++ = sz; diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index a3db13dea937..0f383418e3df 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -56,6 +56,34 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM; } + /* BPF and CPU are mutually exclusive */ + if (target->bpf_str && target->cpu_list) { + target->cpu_list = NULL; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__BPF_OVERRIDE_CPU; + } + + /* BPF and PID/TID are mutually exclusive */ + if (target->bpf_str && target->tid) { + target->tid = NULL; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__BPF_OVERRIDE_PID; + } + + /* BPF and UID are mutually exclusive */ + if (target->bpf_str && target->uid_str) { + target->uid_str = NULL; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__BPF_OVERRIDE_UID; + } + + /* BPF and THREADS are mutually exclusive */ + if (target->bpf_str && target->per_thread) { + target->per_thread = false; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__BPF_OVERRIDE_THREAD; + } + /* THREAD and SYSTEM/CPU are mutually exclusive */ if (target->per_thread && (target->system_wide || target->cpu_list)) { target->per_thread = false; @@ -109,6 +137,10 @@ static const char *target__error_str[] = { "PID/TID switch overriding SYSTEM", "UID switch overriding SYSTEM", "SYSTEM/CPU switch overriding PER-THREAD", + "BPF switch overriding CPU", + "BPF switch overriding PID/TID", + "BPF switch overriding UID", + "BPF switch overriding THREAD", "Invalid User: %s", "Problems obtaining information for user %s", }; @@ -134,7 +166,7 @@ int target__strerror(struct target *target, int errnum, switch (errnum) { case TARGET_ERRNO__PID_OVERRIDE_CPU ... - TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD: + TARGET_ERRNO__BPF_OVERRIDE_THREAD: snprintf(buf, buflen, "%s", msg); break; diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 6ef01a83b24e..f132c6c2eef8 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -10,6 +10,7 @@ struct target { const char *tid; const char *cpu_list; const char *uid_str; + const char *bpf_str; uid_t uid; bool system_wide; bool uses_mmap; @@ -36,6 +37,10 @@ enum target_errno { TARGET_ERRNO__PID_OVERRIDE_SYSTEM, TARGET_ERRNO__UID_OVERRIDE_SYSTEM, TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD, + TARGET_ERRNO__BPF_OVERRIDE_CPU, + TARGET_ERRNO__BPF_OVERRIDE_PID, + TARGET_ERRNO__BPF_OVERRIDE_UID, + TARGET_ERRNO__BPF_OVERRIDE_THREAD, /* for target__parse_uid() */ TARGET_ERRNO__INVALID_UID, @@ -59,6 +64,11 @@ static inline bool target__has_cpu(struct target *target) return target->system_wide || target->cpu_list; } +static inline bool target__has_bpf(struct target *target) +{ + return target->bpf_str; +} + static inline bool target__none(struct target *target) { return !target__has_task(target) && !target__has_cpu(target); diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 0e5c4786f296..a65f65d0857e 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -152,7 +152,7 @@ static bool name_in_tp_list(char *sys, struct tracepoint_path *tps) return false; } -#define for_each_event(dir, dent, tps) \ +#define for_each_event_tps(dir, dent, tps) \ while ((dent = readdir(dir))) \ if (dent->d_type == DT_DIR && \ (strcmp(dent->d_name, ".")) && \ @@ -174,7 +174,7 @@ static int copy_event_system(const char *sys, struct tracepoint_path *tps) return -errno; } - for_each_event(dir, dent, tps) { + for_each_event_tps(dir, dent, tps) { if (!name_in_tp_list(dent->d_name, tps)) continue; @@ -196,7 +196,7 @@ static int copy_event_system(const char *sys, struct tracepoint_path *tps) } rewinddir(dir); - for_each_event(dir, dent, tps) { + for_each_event_tps(dir, dent, tps) { if (!name_in_tp_list(dent->d_name, tps)) continue; @@ -274,7 +274,7 @@ static int record_event_files(struct tracepoint_path *tps) goto out; } - for_each_event(dir, dent, tps) { + for_each_event_tps(dir, dent, tps) { if (strcmp(dent->d_name, "ftrace") == 0 || !system_in_tp_list(dent->d_name, tps)) continue; @@ -289,7 +289,7 @@ static int record_event_files(struct tracepoint_path *tps) } rewinddir(dir); - for_each_event(dir, dent, tps) { + for_each_event_tps(dir, dent, tps) { if (strcmp(dent->d_name, "ftrace") == 0 || !system_in_tp_list(dent->d_name, tps)) continue; diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 0ada907c60d4..a74b517f7497 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -60,10 +60,8 @@ static int __report_module(struct addr_location *al, u64 ip, mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; - void **userdatap; - dwfl_module_info(mod, &userdatap, &s, NULL, NULL, NULL, NULL, NULL); - *userdatap = dso; + dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); if (s != al->map->start - al->map->pgoff) mod = 0; } @@ -79,6 +77,13 @@ static int __report_module(struct addr_location *al, u64 ip, al->map->start - al->map->pgoff, false); } + if (mod) { + void **userdatap; + + dwfl_module_info(mod, &userdatap, NULL, NULL, NULL, NULL, NULL, NULL); + *userdatap = dso; + } + return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; } diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c deleted file mode 100644 index 86889ebc3514..000000000000 --- a/tools/perf/util/xyarray.c +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "xyarray.h" -#include <stdlib.h> -#include <string.h> -#include <linux/zalloc.h> - -struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size) -{ - size_t row_size = ylen * entry_size; - struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size); - - if (xy != NULL) { - xy->entry_size = entry_size; - xy->row_size = row_size; - xy->entries = xlen * ylen; - xy->max_x = xlen; - xy->max_y = ylen; - } - - return xy; -} - -void xyarray__reset(struct xyarray *xy) -{ - size_t n = xy->entries * xy->entry_size; - - memset(xy->contents, 0, n); -} - -void xyarray__delete(struct xyarray *xy) -{ - free(xy); -} diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index d1d18ff5c911..9ea2c0aeb86c 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -3,7 +3,7 @@ * * Module Name: cfsize - Common get file size function * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index c3708f30ab3a..3c265bc917a1 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -3,7 +3,7 @@ * * Module Name: getopt * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 11c5046dce16..ccabdbaae6a4 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -3,7 +3,7 @@ * * Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index fd05ddee240f..edd99274cd12 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -3,7 +3,7 @@ * * Module Name: osunixdir - Unix directory access interfaces * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index c565546e85bc..fee0022560d5 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -3,7 +3,7 @@ * * Module Name: osunixmap - Unix OSL for file mappings * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 5b2fd968535f..0861728da562 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -3,7 +3,7 @@ * * Module Name: osunixxf - UNIX OSL interfaces * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index 26a5eae9f87f..e0ebc1dab1cc 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -3,7 +3,7 @@ * * Module Name: acpidump.h - Include file for acpi_dump utility * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index 76433296055d..444e3d78bd89 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -3,7 +3,7 @@ * * Module Name: apdump - Dump routines for ACPI tables (acpidump) * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index a682bae4e6f6..da0c6e13042b 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -3,7 +3,7 @@ * * Module Name: apfiles - File-related functions for acpidump utility * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 046e6b8d6baa..a4cf6042fcfd 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -3,7 +3,7 @@ * * Module Name: apmain - Main module for the acpidump utility * - * Copyright (C) 2000 - 2020, Intel Corp. + * Copyright (C) 2000 - 2021, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index c7bcddbd486d..3b1594447f29 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -270,14 +270,14 @@ clean: $(MAKE) -C bench O=$(OUTPUT) clean -install-lib: +install-lib: libcpupower $(INSTALL) -d $(DESTDIR)${libdir} $(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/ $(INSTALL) -d $(DESTDIR)${includedir} $(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h $(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h -install-tools: +install-tools: $(OUTPUT)cpupower $(INSTALL) -d $(DESTDIR)${bindir} $(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir} $(INSTALL) -d $(DESTDIR)${bash_completion_dir} @@ -293,14 +293,14 @@ install-man: $(INSTALL_DATA) -D man/cpupower-info.1 $(DESTDIR)${mandir}/man1/cpupower-info.1 $(INSTALL_DATA) -D man/cpupower-monitor.1 $(DESTDIR)${mandir}/man1/cpupower-monitor.1 -install-gmo: +install-gmo: create-gmo $(INSTALL) -d $(DESTDIR)${localedir} for HLANG in $(LANGUAGES); do \ echo '$(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo'; \ $(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ done; -install-bench: +install-bench: compile-bench @#DESTDIR must be set from outside to survive @sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) install diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index f68b4bc55273..d9d9923af85c 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -27,7 +27,7 @@ $(OUTPUT)cpufreq-bench: $(OBJS) all: $(OUTPUT)cpufreq-bench -install: +install: $(OUTPUT)cpufreq-bench mkdir -p $(DESTDIR)/$(sbindir) mkdir -p $(DESTDIR)/$(bindir) mkdir -p $(DESTDIR)/$(docdir) diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 6efc0f6b1b11..f9895e31ff5a 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -186,8 +186,7 @@ static int get_boost_mode_x86(unsigned int cpu) if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD && cpupower_cpu_info.family >= 0x10) || cpupower_cpu_info.vendor == X86_VENDOR_HYGON) { - ret = decode_pstates(cpu, cpupower_cpu_info.family, b_states, - pstates, &pstate_no); + ret = decode_pstates(cpu, b_states, pstates, &pstate_no); if (ret) return ret; diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 7c4f83a8c973..97f2c857048e 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -13,7 +13,8 @@ #define MSR_AMD_PSTATE 0xc0010064 #define MSR_AMD_PSTATE_LIMIT 0xc0010061 -union msr_pstate { +union core_pstate { + /* pre fam 17h: */ struct { unsigned fid:6; unsigned did:3; @@ -26,7 +27,8 @@ union msr_pstate { unsigned idddiv:2; unsigned res3:21; unsigned en:1; - } bits; + } pstate; + /* since fam 17h: */ struct { unsigned fid:8; unsigned did:6; @@ -35,37 +37,37 @@ union msr_pstate { unsigned idddiv:2; unsigned res1:31; unsigned en:1; - } fam17h_bits; + } pstatedef; unsigned long long val; }; -static int get_did(int family, union msr_pstate pstate) +static int get_did(union core_pstate pstate) { int t; - if (family == 0x12) + if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATEDEF) + t = pstate.pstatedef.did; + else if (cpupower_cpu_info.family == 0x12) t = pstate.val & 0xf; - else if (family == 0x17 || family == 0x18) - t = pstate.fam17h_bits.did; else - t = pstate.bits.did; + t = pstate.pstate.did; return t; } -static int get_cof(int family, union msr_pstate pstate) +static int get_cof(union core_pstate pstate) { int t; int fid, did, cof; - did = get_did(family, pstate); - if (family == 0x17 || family == 0x18) { - fid = pstate.fam17h_bits.fid; + did = get_did(pstate); + if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATEDEF) { + fid = pstate.pstatedef.fid; cof = 200 * fid / did; } else { t = 0x10; - fid = pstate.bits.fid; - if (family == 0x11) + fid = pstate.pstate.fid; + if (cpupower_cpu_info.family == 0x11) t = 0x8; cof = (100 * (fid + t)) >> did; } @@ -74,8 +76,7 @@ static int get_cof(int family, union msr_pstate pstate) /* Needs: * cpu -> the cpu that gets evaluated - * cpu_family -> The cpu's family (0x10, 0x12,...) - * boots_states -> how much boost states the machines support + * boost_states -> how much boost states the machines support * * Fills up: * pstates -> a pointer to an array of size MAX_HW_PSTATES @@ -85,31 +86,23 @@ static int get_cof(int family, union msr_pstate pstate) * * returns zero on success, -1 on failure */ -int decode_pstates(unsigned int cpu, unsigned int cpu_family, - int boost_states, unsigned long *pstates, int *no) +int decode_pstates(unsigned int cpu, int boost_states, + unsigned long *pstates, int *no) { - int i, psmax, pscur; - union msr_pstate pstate; + int i, psmax; + union core_pstate pstate; unsigned long long val; - /* Only read out frequencies from HW when CPU might be boostable - to keep the code as short and clean as possible. - Otherwise frequencies are exported via ACPI tables. - */ - if (cpu_family < 0x10 || cpu_family == 0x14) + /* Only read out frequencies from HW if HW Pstate is supported, + * otherwise frequencies are exported via ACPI tables. + */ + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_HW_PSTATE)) return -1; if (read_msr(cpu, MSR_AMD_PSTATE_LIMIT, &val)) return -1; psmax = (val >> 4) & 0x7; - - if (read_msr(cpu, MSR_AMD_PSTATE_STATUS, &val)) - return -1; - - pscur = val & 0x7; - - pscur += boost_states; psmax += boost_states; for (i = 0; i <= psmax; i++) { if (i >= MAX_HW_PSTATES) { @@ -119,12 +112,12 @@ int decode_pstates(unsigned int cpu, unsigned int cpu_family, } if (read_msr(cpu, MSR_AMD_PSTATE + i, &pstate.val)) return -1; - if ((cpu_family == 0x17) && (!pstate.fam17h_bits.en)) - continue; - else if (!pstate.bits.en) + + /* The enabled bit (bit 63) is common for all families */ + if (!pstate.pstatedef.en) continue; - pstates[i] = get_cof(cpu_family, pstate); + pstates[i] = get_cof(pstate); } *no = i; return 0; diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c index 73bfafc60e9b..72eb43593180 100644 --- a/tools/power/cpupower/utils/helpers/cpuid.c +++ b/tools/power/cpupower/utils/helpers/cpuid.c @@ -128,9 +128,23 @@ out: /* AMD or Hygon Boost state enable/disable register */ if (cpu_info->vendor == X86_VENDOR_AMD || cpu_info->vendor == X86_VENDOR_HYGON) { - if (ext_cpuid_level >= 0x80000007 && - (cpuid_edx(0x80000007) & (1 << 9))) - cpu_info->caps |= CPUPOWER_CAP_AMD_CBP; + if (ext_cpuid_level >= 0x80000007) { + if (cpuid_edx(0x80000007) & (1 << 9)) { + cpu_info->caps |= CPUPOWER_CAP_AMD_CPB; + + if (cpu_info->family >= 0x17) + cpu_info->caps |= CPUPOWER_CAP_AMD_CPB_MSR; + } + + if ((cpuid_edx(0x80000007) & (1 << 7)) && + cpu_info->family != 0x14) { + /* HW pstate was not implemented in family 0x14 */ + cpu_info->caps |= CPUPOWER_CAP_AMD_HW_PSTATE; + + if (cpu_info->family >= 0x17) + cpu_info->caps |= CPUPOWER_CAP_AMD_PSTATEDEF; + } + } if (ext_cpuid_level >= 0x80000008 && cpuid_ebx(0x80000008) & (1 << 4)) diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 0642e60a6ce1..33ffacee7fcb 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -64,12 +64,15 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, #define CPUPOWER_CAP_INV_TSC 0x00000001 #define CPUPOWER_CAP_APERF 0x00000002 -#define CPUPOWER_CAP_AMD_CBP 0x00000004 +#define CPUPOWER_CAP_AMD_CPB 0x00000004 #define CPUPOWER_CAP_PERF_BIAS 0x00000008 #define CPUPOWER_CAP_HAS_TURBO_RATIO 0x00000010 #define CPUPOWER_CAP_IS_SNB 0x00000020 #define CPUPOWER_CAP_INTEL_IDA 0x00000040 #define CPUPOWER_CAP_AMD_RDPRU 0x00000080 +#define CPUPOWER_CAP_AMD_HW_PSTATE 0x00000100 +#define CPUPOWER_CAP_AMD_PSTATEDEF 0x00000200 +#define CPUPOWER_CAP_AMD_CPB_MSR 0x00000400 #define CPUPOWER_AMD_CPBDIS 0x02000000 @@ -125,8 +128,8 @@ extern struct pci_dev *pci_slot_func_init(struct pci_access **pacc, /* AMD HW pstate decoding **************************/ -extern int decode_pstates(unsigned int cpu, unsigned int cpu_family, - int boost_states, unsigned long *pstates, int *no); +extern int decode_pstates(unsigned int cpu, int boost_states, + unsigned long *pstates, int *no); /* AMD HW pstate decoding **************************/ @@ -143,9 +146,8 @@ unsigned int cpuid_edx(unsigned int op); /* cpuid and cpuinfo helpers **************************/ /* X86 ONLY ********************************************/ #else -static inline int decode_pstates(unsigned int cpu, unsigned int cpu_family, - int boost_states, unsigned long *pstates, - int *no) +static inline int decode_pstates(unsigned int cpu, int boost_states, + unsigned long *pstates, int *no) { return -1; }; static inline int read_msr(int cpu, unsigned int idx, unsigned long long *val) diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 650b9a9a6584..fc6e34511721 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -16,17 +16,12 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, int *states) { - struct cpupower_cpu_info cpu_info; int ret; unsigned long long val; *support = *active = *states = 0; - ret = get_cpu_info(&cpu_info); - if (ret) - return ret; - - if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CBP) { + if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CPB) { *support = 1; /* AMD Family 0x17 does not utilize PCI D18F4 like prior @@ -34,7 +29,7 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, * has Hardware determined variable increments instead. */ - if (cpu_info.family == 0x17 || cpu_info.family == 0x18) { + if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CPB_MSR) { if (!read_msr(cpu, MSR_AMD_HWCR, &val)) { if (!(val & CPUPOWER_AMD_CPBDIS)) *active = 1; diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 5390158cdb40..582feb88eca3 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.7"; +static const char *version_str = "v1.8"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; @@ -1249,6 +1249,8 @@ static void dump_isst_config(int arg) isst_ctdp_display_information_end(outf); } +static void adjust_scaling_max_from_base_freq(int cpu); + static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, void *arg4) { @@ -1267,6 +1269,9 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, int pkg_id = get_physical_package_id(cpu); int die_id = get_physical_die_id(cpu); + /* Wait for updated base frequencies */ + usleep(2000); + fprintf(stderr, "Option is set to online/offline\n"); ctdp_level.core_cpumask_size = alloc_cpu_set(&ctdp_level.core_cpumask); @@ -1283,6 +1288,7 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) { fprintf(stderr, "online cpu %d\n", i); set_cpu_online_offline(i, 1); + adjust_scaling_max_from_base_freq(i); } else { fprintf(stderr, "offline cpu %d\n", i); set_cpu_online_offline(i, 0); @@ -1440,6 +1446,31 @@ static int set_cpufreq_scaling_min_max(int cpu, int max, int freq) return 0; } +static int no_turbo(void) +{ + return parse_int_file(0, "/sys/devices/system/cpu/intel_pstate/no_turbo"); +} + +static void adjust_scaling_max_from_base_freq(int cpu) +{ + int base_freq, scaling_max_freq; + + scaling_max_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_max_freq < base_freq || no_turbo()) + set_cpufreq_scaling_min_max(cpu, 1, base_freq); +} + +static void adjust_scaling_min_from_base_freq(int cpu) +{ + int base_freq, scaling_min_freq; + + scaling_min_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_min_freq < base_freq) + set_cpufreq_scaling_min_max(cpu, 0, base_freq); +} + static int set_clx_pbf_cpufreq_scaling_min_max(int cpu) { struct isst_pkg_ctdp_level_info *ctdp_level; @@ -1537,6 +1568,7 @@ static void set_scaling_min_to_cpuinfo_max(int cpu) continue; set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0); + adjust_scaling_min_from_base_freq(i); } } @@ -2272,6 +2304,102 @@ static void get_clos_assoc(int arg) isst_ctdp_display_information_end(outf); } +static void set_turbo_mode_for_cpu(int cpu, int status) +{ + int base_freq; + + if (status) { + base_freq = get_cpufreq_base_freq(cpu); + set_cpufreq_scaling_min_max(cpu, 1, base_freq); + } else { + set_scaling_max_to_cpuinfo_max(cpu); + } + + if (status) { + isst_display_result(cpu, outf, "turbo-mode", "enable", 0); + } else { + isst_display_result(cpu, outf, "turbo-mode", "disable", 0); + } +} + +static void set_turbo_mode(int arg) +{ + int i, enable = arg; + + if (cmd_help) { + if (enable) + fprintf(stderr, "Set turbo mode enable\n"); + else + fprintf(stderr, "Set turbo mode disable\n"); + exit(0); + } + + isst_ctdp_display_information_start(outf); + + for (i = 0; i < topo_max_cpus; ++i) { + int online; + + if (i) + online = parse_int_file( + 1, "/sys/devices/system/cpu/cpu%d/online", i); + else + online = + 1; /* online entry for CPU 0 needs some special configs */ + + if (online) + set_turbo_mode_for_cpu(i, enable); + + } + isst_ctdp_display_information_end(outf); +} + +static void get_set_trl(int cpu, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + unsigned long long trl; + int set = *(int *)arg4; + int ret; + + if (set && !fact_trl) { + isst_display_error_info_message(1, "Invalid TRL. Specify with [-t|--trl]", 0, 0); + exit(0); + } + + if (set) { + ret = isst_set_trl(cpu, fact_trl); + isst_display_result(cpu, outf, "turbo-mode", "set-trl", ret); + return; + } + + ret = isst_get_trl(cpu, &trl); + if (ret) + isst_display_result(cpu, outf, "turbo-mode", "get-trl", ret); + else + isst_trl_display_information(cpu, outf, trl); +} + +static void process_trl(int arg) +{ + if (cmd_help) { + if (arg) { + fprintf(stderr, "Set TRL (turbo ratio limits)\n"); + fprintf(stderr, "\t t|--trl: Specify turbo ratio limit for setting TRL\n"); + } else { + fprintf(stderr, "Get TRL (turbo ratio limits)\n"); + } + exit(0); + } + + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(get_set_trl, NULL, + NULL, NULL, &arg); + else + for_each_online_package_in_set(get_set_trl, NULL, + NULL, NULL, &arg); + isst_ctdp_display_information_end(outf); +} + static struct process_cmd_struct clx_n_cmds[] = { { "perf-profile", "info", dump_isst_config, 0 }, { "base-freq", "info", dump_pbf_config, 0 }, @@ -2302,6 +2430,10 @@ static struct process_cmd_struct isst_cmds[] = { { "core-power", "get-config", dump_clos_config, 0 }, { "core-power", "assoc", set_clos_assoc, 0 }, { "core-power", "get-assoc", get_clos_assoc, 0 }, + { "turbo-mode", "enable", set_turbo_mode, 0 }, + { "turbo-mode", "disable", set_turbo_mode, 1 }, + { "turbo-mode", "get-trl", process_trl, 0 }, + { "turbo-mode", "set-trl", process_trl, 1 }, { NULL, NULL, NULL } }; @@ -2517,6 +2649,16 @@ static void fact_help(void) printf("\tcommand : disable\n"); } +static void turbo_mode_help(void) +{ + printf("turbo-mode:\tEnables users to enable/disable turbo mode by adjusting frequency settings. Also allows to get and set turbo ratio limits (TRL).\n"); + printf("\tcommand : enable\n"); + printf("\tcommand : disable\n"); + printf("\tcommand : get-trl\n"); + printf("\tcommand : set-trl\n"); +} + + static void core_power_help(void) { printf("core-power:\tInterface that allows user to define per core/tile\n\ @@ -2541,6 +2683,7 @@ static struct process_cmd_help_struct isst_help_cmds[] = { { "base-freq", pbf_help }, { "turbo-freq", fact_help }, { "core-power", core_power_help }, + { "turbo-mode", turbo_mode_help }, { NULL, NULL } }; @@ -2604,7 +2747,7 @@ static void usage(void) if (is_clx_n_platform()) printf("\nFEATURE : [perf-profile|base-freq]\n"); else - printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power]\n"); + printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power|turbo-mode]\n"); printf("\nFor help on each feature, use -h|--help\n"); printf("\tFor example: intel-speed-select perf-profile -h\n"); diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 8afd23407522..6a26d5769984 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -665,6 +665,17 @@ int isst_get_fact_info(int cpu, int level, int fact_bucket, struct isst_fact_inf return 0; } +int isst_get_trl(int cpu, unsigned long long *trl) +{ + int ret; + + ret = isst_send_msr_command(cpu, 0x1AD, 0, trl); + if (ret) + return ret; + + return 0; +} + int isst_set_trl(int cpu, unsigned long long trl) { int ret; diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index e105fece47b6..8e54ce47648e 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -763,3 +763,21 @@ void isst_display_error_info_message(int error, char *msg, int arg_valid, int ar if (!start) format_and_print(outf, 0, NULL, NULL); } + +void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl) +{ + char header[256]; + char value[256]; + int level; + + level = print_package_info(cpu, outf); + + snprintf(header, sizeof(header), "get-trl"); + format_and_print(outf, level + 1, header, NULL); + + snprintf(header, sizeof(header), "trl"); + snprintf(value, sizeof(value), "0x%llx", trl); + format_and_print(outf, level + 2, header, value); + + format_and_print(outf, level, NULL, NULL); +} diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 60db0bb084d5..0cac6c54be87 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -228,6 +228,7 @@ extern void isst_fact_display_information(int cpu, FILE *outf, int level, int fact_bucket, int fact_avx, struct isst_fact_info *fact_info); extern int isst_set_trl(int cpu, unsigned long long trl); +extern int isst_get_trl(int cpu, unsigned long long *trl); extern int isst_set_trl_from_current_tdp(int cpu, unsigned long long trl); extern int isst_get_config_tdp_lock_status(int cpu); @@ -256,4 +257,5 @@ extern int get_cpufreq_base_freq(int cpu); extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap); extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg); extern int is_skx_based_platform(void); +extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl); #endif diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 389ea5209a83..a7c4f0772e53 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1834,12 +1834,15 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) int get_epb(int cpu) { char path[128 + PATH_BYTES]; + unsigned long long msr; int ret, epb = -1; FILE *fp; sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); - fp = fopen_or_die(path, "r"); + fp = fopen(path, "r"); + if (!fp) + goto msr_fallback; ret = fscanf(fp, "%d", &epb); if (ret != 1) @@ -1848,6 +1851,11 @@ int get_epb(int cpu) fclose(fp); return epb; + +msr_fallback: + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); + + return msr & 0xf; } void get_apic_id(struct thread_data *t) diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 1358e89cdf7d..a402f32a145c 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -69,6 +69,13 @@ HOSTCC ?= gcc HOSTLD ?= ld endif +# Some tools require Clang, LLC and/or LLVM utils +CLANG ?= clang +LLC ?= llc +LLVM_CONFIG ?= llvm-config +LLVM_OBJCOPY ?= llvm-objcopy +LLVM_STRIP ?= llvm-strip + ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif @@ -127,6 +134,7 @@ ifneq ($(silent),1) $(MAKE) $(PRINT_DIR) -C $$subdir QUIET_FLEX = @echo ' FLEX '$@; QUIET_BISON = @echo ' BISON '$@; + QUIET_GENSKEL = @echo ' GEN-SKEL '$@; descend = \ +@echo ' DESCEND '$(1); \ diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 21516e293d17..d5144fcb03ac 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -28,12 +28,12 @@ KunitBuildRequest = namedtuple('KunitBuildRequest', ['jobs', 'build_dir', 'alltests', 'make_options']) KunitExecRequest = namedtuple('KunitExecRequest', - ['timeout', 'build_dir', 'alltests']) + ['timeout', 'build_dir', 'alltests', 'filter_glob']) KunitParseRequest = namedtuple('KunitParseRequest', ['raw_output', 'input_data', 'build_dir', 'json']) KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', - 'build_dir', 'alltests', 'json', - 'make_options']) + 'build_dir', 'alltests', 'filter_glob', + 'json', 'make_options']) KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] @@ -43,9 +43,9 @@ class KunitStatus(Enum): BUILD_FAILURE = auto() TEST_FAILURE = auto() -def get_kernel_root_path(): - parts = sys.argv[0] if not __file__ else __file__ - parts = os.path.realpath(parts).split('tools/testing/kunit') +def get_kernel_root_path() -> str: + path = sys.argv[0] if not __file__ else __file__ + parts = os.path.realpath(path).split('tools/testing/kunit') if len(parts) != 2: sys.exit(1) return parts[0] @@ -93,6 +93,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, test_start = time.time() result = linux.run_kernel( timeout=None if request.alltests else request.timeout, + filter_glob=request.filter_glob, build_dir=request.build_dir) test_end = time.time() @@ -149,7 +150,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, return build_result exec_request = KunitExecRequest(request.timeout, request.build_dir, - request.alltests) + request.alltests, request.filter_glob) exec_result = exec_tests(linux, exec_request) if exec_result.status != KunitStatus.SUCCESS: return exec_result @@ -171,7 +172,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, exec_result.elapsed_time)) return parse_result -def add_common_opts(parser): +def add_common_opts(parser) -> None: parser.add_argument('--build_dir', help='As in the make command, it specifies the build ' 'directory.', @@ -182,14 +183,17 @@ def add_common_opts(parser): parser.add_argument('--alltests', help='Run all KUnit tests through allyesconfig', action='store_true') + parser.add_argument('--kunitconfig', + help='Path to Kconfig fragment that enables KUnit tests', + metavar='kunitconfig') -def add_build_opts(parser): +def add_build_opts(parser) -> None: parser.add_argument('--jobs', help='As in the make command, "Specifies the number of ' 'jobs (commands) to run simultaneously."', type=int, default=8, metavar='jobs') -def add_exec_opts(parser): +def add_exec_opts(parser) -> None: parser.add_argument('--timeout', help='maximum number of seconds to allow for all tests ' 'to run. This does not include time taken to build the ' @@ -197,8 +201,16 @@ def add_exec_opts(parser): type=int, default=300, metavar='timeout') + parser.add_argument('filter_glob', + help='maximum number of seconds to allow for all tests ' + 'to run. This does not include time taken to build the ' + 'tests.', + type=str, + nargs='?', + default='', + metavar='filter_glob') -def add_parse_opts(parser): +def add_parse_opts(parser) -> None: parser.add_argument('--raw_output', help='don\'t format output from kernel', action='store_true') parser.add_argument('--json', @@ -256,16 +268,14 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig) request = KunitRequest(cli_args.raw_output, cli_args.timeout, cli_args.jobs, cli_args.build_dir, cli_args.alltests, + cli_args.filter_glob, cli_args.json, cli_args.make_options) result = run_tests(linux, request) @@ -277,10 +287,7 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig) request = KunitConfigRequest(cli_args.build_dir, cli_args.make_options) @@ -292,10 +299,7 @@ def main(argv, linux=None): sys.exit(1) elif cli_args.subcommand == 'build': if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig) request = KunitBuildRequest(cli_args.jobs, cli_args.build_dir, @@ -309,14 +313,12 @@ def main(argv, linux=None): sys.exit(1) elif cli_args.subcommand == 'exec': if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) exec_request = KunitExecRequest(cli_args.timeout, cli_args.build_dir, - cli_args.alltests) + cli_args.alltests, + cli_args.filter_glob) exec_result = exec_tests(linux, exec_request) parse_request = KunitParseRequest(cli_args.raw_output, exec_result.result, diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index 02ffc3a3e5dc..0b550cbd667d 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -8,6 +8,7 @@ import collections import re +from typing import List, Set CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$' @@ -30,25 +31,24 @@ class KconfigParseError(Exception): class Kconfig(object): """Represents defconfig or .config specified using the Kconfig language.""" - def __init__(self): - self._entries = [] + def __init__(self) -> None: + self._entries = [] # type: List[KconfigEntry] - def entries(self): + def entries(self) -> Set[KconfigEntry]: return set(self._entries) def add_entry(self, entry: KconfigEntry) -> None: self._entries.append(entry) def is_subset_of(self, other: 'Kconfig') -> bool: + other_dict = {e.name: e.value for e in other.entries()} for a in self.entries(): - found = False - for b in other.entries(): - if a.name != b.name: + b = other_dict.get(a.name) + if b is None: + if a.value == 'n': continue - if a.value != b.value: - return False - found = True - if a.value != 'n' and found == False: + return False + elif a.value != b: return False return True diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 624b31b2dbd6..f5cca5c38cac 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -13,7 +13,7 @@ import kunit_parser from kunit_parser import TestStatus -def get_json_result(test_result, def_config, build_dir, json_path): +def get_json_result(test_result, def_config, build_dir, json_path) -> str: sub_groups = [] # Each test suite is mapped to a KernelCI sub_group diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 57c1724b7e5d..f309a33256cd 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -11,6 +11,7 @@ import subprocess import os import shutil import signal +from typing import Iterator from contextlib import ExitStack @@ -39,7 +40,7 @@ class BuildError(Exception): class LinuxSourceTreeOperations(object): """An abstraction over command line operations performed on a source tree.""" - def make_mrproper(self): + def make_mrproper(self) -> None: try: subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT) except OSError as e: @@ -47,7 +48,7 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output.decode()) - def make_olddefconfig(self, build_dir, make_options): + def make_olddefconfig(self, build_dir, make_options) -> None: command = ['make', 'ARCH=um', 'olddefconfig'] if make_options: command.extend(make_options) @@ -60,7 +61,7 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output.decode()) - def make_allyesconfig(self, build_dir, make_options): + def make_allyesconfig(self, build_dir, make_options) -> None: kunit_parser.print_with_timestamp( 'Enabling all CONFIGs for UML...') command = ['make', 'ARCH=um', 'allyesconfig'] @@ -82,7 +83,7 @@ class LinuxSourceTreeOperations(object): kunit_parser.print_with_timestamp( 'Starting Kernel with all configs takes a few minutes...') - def make(self, jobs, build_dir, make_options): + def make(self, jobs, build_dir, make_options) -> None: command = ['make', 'ARCH=um', '--jobs=' + str(jobs)] if make_options: command.extend(make_options) @@ -100,7 +101,7 @@ class LinuxSourceTreeOperations(object): if stderr: # likely only due to build warnings print(stderr.decode()) - def linux_bin(self, params, timeout, build_dir): + def linux_bin(self, params, timeout, build_dir) -> None: """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = get_file_path(build_dir, 'linux') outfile = get_outfile_path(build_dir) @@ -110,23 +111,38 @@ class LinuxSourceTreeOperations(object): stderr=subprocess.STDOUT) process.wait(timeout) -def get_kconfig_path(build_dir): +def get_kconfig_path(build_dir) -> str: return get_file_path(build_dir, KCONFIG_PATH) -def get_kunitconfig_path(build_dir): +def get_kunitconfig_path(build_dir) -> str: return get_file_path(build_dir, KUNITCONFIG_PATH) -def get_outfile_path(build_dir): +def get_outfile_path(build_dir) -> str: return get_file_path(build_dir, OUTFILE_PATH) class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" - def __init__(self): - self._ops = LinuxSourceTreeOperations() + def __init__(self, build_dir: str, load_config=True, kunitconfig_path='') -> None: signal.signal(signal.SIGINT, self.signal_handler) - def clean(self): + self._ops = LinuxSourceTreeOperations() + + if not load_config: + return + + if kunitconfig_path: + if not os.path.exists(kunitconfig_path): + raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist') + else: + kunitconfig_path = get_kunitconfig_path(build_dir) + if not os.path.exists(kunitconfig_path): + shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path) + + self._kconfig = kunit_config.Kconfig() + self._kconfig.read_from_file(kunitconfig_path) + + def clean(self) -> bool: try: self._ops.make_mrproper() except ConfigError as e: @@ -134,17 +150,7 @@ class LinuxSourceTree(object): return False return True - def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH): - kunitconfig_path = get_kunitconfig_path(build_dir) - if not os.path.exists(kunitconfig_path): - shutil.copyfile(defconfig, kunitconfig_path) - - def read_kunitconfig(self, build_dir): - kunitconfig_path = get_kunitconfig_path(build_dir) - self._kconfig = kunit_config.Kconfig() - self._kconfig.read_from_file(kunitconfig_path) - - def validate_config(self, build_dir): + def validate_config(self, build_dir) -> bool: kconfig_path = get_kconfig_path(build_dir) validated_kconfig = kunit_config.Kconfig() validated_kconfig.read_from_file(kconfig_path) @@ -158,7 +164,7 @@ class LinuxSourceTree(object): return False return True - def build_config(self, build_dir, make_options): + def build_config(self, build_dir, make_options) -> bool: kconfig_path = get_kconfig_path(build_dir) if build_dir and not os.path.exists(build_dir): os.mkdir(build_dir) @@ -170,7 +176,7 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def build_reconfig(self, build_dir, make_options): + def build_reconfig(self, build_dir, make_options) -> bool: """Creates a new .config if it is not a subset of the .kunitconfig.""" kconfig_path = get_kconfig_path(build_dir) if os.path.exists(kconfig_path): @@ -186,7 +192,7 @@ class LinuxSourceTree(object): print('Generating .config ...') return self.build_config(build_dir, make_options) - def build_um_kernel(self, alltests, jobs, build_dir, make_options): + def build_um_kernel(self, alltests, jobs, build_dir, make_options) -> bool: try: if alltests: self._ops.make_allyesconfig(build_dir, make_options) @@ -197,8 +203,12 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def run_kernel(self, args=[], build_dir='', timeout=None): - args.extend(['mem=1G']) + def run_kernel(self, args=None, build_dir='', filter_glob='', timeout=None) -> Iterator[str]: + if not args: + args = [] + args.extend(['mem=1G', 'console=tty']) + if filter_glob: + args.append('kunit.filter_glob='+filter_glob) self._ops.linux_bin(args, timeout, build_dir) outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) @@ -206,6 +216,6 @@ class LinuxSourceTree(object): for line in file: yield line - def signal_handler(self, sig, frame): + def signal_handler(self, sig, frame) -> None: logging.error('Build interruption occurred. Cleaning console.') subprocess.call(['stty', 'sane']) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 6614ec4d0898..e8bcc139702e 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,32 +12,32 @@ from collections import namedtuple from datetime import datetime from enum import Enum, auto from functools import reduce -from typing import List, Optional, Tuple +from typing import Iterable, Iterator, List, Optional, Tuple TestResult = namedtuple('TestResult', ['status','suites','log']) class TestSuite(object): - def __init__(self): - self.status = None - self.name = None - self.cases = [] + def __init__(self) -> None: + self.status = TestStatus.SUCCESS + self.name = '' + self.cases = [] # type: List[TestCase] - def __str__(self): - return 'TestSuite(' + self.status + ',' + self.name + ',' + str(self.cases) + ')' + def __str__(self) -> str: + return 'TestSuite(' + str(self.status) + ',' + self.name + ',' + str(self.cases) + ')' - def __repr__(self): + def __repr__(self) -> str: return str(self) class TestCase(object): - def __init__(self): - self.status = None + def __init__(self) -> None: + self.status = TestStatus.SUCCESS self.name = '' - self.log = [] + self.log = [] # type: List[str] - def __str__(self): - return 'TestCase(' + self.status + ',' + self.name + ',' + str(self.log) + ')' + def __str__(self) -> str: + return 'TestCase(' + str(self.status) + ',' + self.name + ',' + str(self.log) + ')' - def __repr__(self): + def __repr__(self) -> str: return str(self) class TestStatus(Enum): @@ -51,7 +51,7 @@ kunit_start_re = re.compile(r'TAP version [0-9]+$') kunit_end_re = re.compile('(List of all partitions:|' 'Kernel panic - not syncing: VFS:)') -def isolate_kunit_output(kernel_output): +def isolate_kunit_output(kernel_output) -> Iterator[str]: started = False for line in kernel_output: line = line.rstrip() # line always has a trailing \n @@ -64,7 +64,7 @@ def isolate_kunit_output(kernel_output): elif started: yield line[prefix_len:] if prefix_len > 0 else line -def raw_output(kernel_output): +def raw_output(kernel_output) -> None: for line in kernel_output: print(line.rstrip()) @@ -72,36 +72,36 @@ DIVIDER = '=' * 60 RESET = '\033[0;0m' -def red(text): +def red(text) -> str: return '\033[1;31m' + text + RESET -def yellow(text): +def yellow(text) -> str: return '\033[1;33m' + text + RESET -def green(text): +def green(text) -> str: return '\033[1;32m' + text + RESET -def print_with_timestamp(message): +def print_with_timestamp(message) -> None: print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message)) -def format_suite_divider(message): +def format_suite_divider(message) -> str: return '======== ' + message + ' ========' -def print_suite_divider(message): +def print_suite_divider(message) -> None: print_with_timestamp(DIVIDER) print_with_timestamp(format_suite_divider(message)) -def print_log(log): +def print_log(log) -> None: for m in log: print_with_timestamp(m) TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*#).*$') -def consume_non_diagnositic(lines: List[str]) -> None: +def consume_non_diagnostic(lines: List[str]) -> None: while lines and not TAP_ENTRIES.match(lines[0]): lines.pop(0) -def save_non_diagnositic(lines: List[str], test_case: TestCase) -> None: +def save_non_diagnostic(lines: List[str], test_case: TestCase) -> None: while lines and not TAP_ENTRIES.match(lines[0]): test_case.log.append(lines[0]) lines.pop(0) @@ -113,7 +113,7 @@ OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$') OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$') def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool: - save_non_diagnositic(lines, test_case) + save_non_diagnostic(lines, test_case) if not lines: test_case.status = TestStatus.TEST_CRASHED return True @@ -139,7 +139,7 @@ SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# (.*)$') DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^[\s]+# .*?: kunit test case crashed!$') def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: - save_non_diagnositic(lines, test_case) + save_non_diagnostic(lines, test_case) if not lines: return False line = lines[0] @@ -155,7 +155,7 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: def parse_test_case(lines: List[str]) -> Optional[TestCase]: test_case = TestCase() - save_non_diagnositic(lines, test_case) + save_non_diagnostic(lines, test_case) while parse_diagnostic(lines, test_case): pass if parse_ok_not_ok_test_case(lines, test_case): @@ -166,7 +166,7 @@ def parse_test_case(lines: List[str]) -> Optional[TestCase]: SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$') def parse_subtest_header(lines: List[str]) -> Optional[str]: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if not lines: return None match = SUBTEST_HEADER.match(lines[0]) @@ -179,7 +179,7 @@ def parse_subtest_header(lines: List[str]) -> Optional[str]: SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)') def parse_subtest_plan(lines: List[str]) -> Optional[int]: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) match = SUBTEST_PLAN.match(lines[0]) if match: lines.pop(0) @@ -202,7 +202,7 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus: def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite, expected_suite_index: int) -> bool: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if not lines: test_suite.status = TestStatus.TEST_CRASHED return False @@ -224,18 +224,17 @@ def parse_ok_not_ok_test_suite(lines: List[str], else: return False -def bubble_up_errors(to_status, status_container_list) -> TestStatus: - status_list = map(to_status, status_container_list) - return reduce(max_status, status_list, TestStatus.SUCCESS) +def bubble_up_errors(statuses: Iterable[TestStatus]) -> TestStatus: + return reduce(max_status, statuses, TestStatus.SUCCESS) def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: - max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases) + max_test_case_status = bubble_up_errors(x.status for x in test_suite.cases) return max_status(max_test_case_status, test_suite.status) def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]: if not lines: return None - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) test_suite = TestSuite() test_suite.status = TestStatus.SUCCESS name = parse_subtest_header(lines) @@ -264,7 +263,7 @@ def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[Te TAP_HEADER = re.compile(r'^TAP version 14$') def parse_tap_header(lines: List[str]) -> bool: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if TAP_HEADER.match(lines[0]): lines.pop(0) return True @@ -274,7 +273,7 @@ def parse_tap_header(lines: List[str]) -> bool: TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') def parse_test_plan(lines: List[str]) -> Optional[int]: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) match = TEST_PLAN.match(lines[0]) if match: lines.pop(0) @@ -282,11 +281,11 @@ def parse_test_plan(lines: List[str]) -> Optional[int]: else: return None -def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus: - return bubble_up_errors(lambda x: x.status, test_suite_list) +def bubble_up_suite_errors(test_suites: Iterable[TestSuite]) -> TestStatus: + return bubble_up_errors(x.status for x in test_suites) def parse_test_result(lines: List[str]) -> TestResult: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if not lines or not parse_tap_header(lines): return TestResult(TestStatus.NO_TESTS, [], lines) expected_test_suite_num = parse_test_plan(lines) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index b593f4448e83..1ad3049e9069 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -12,6 +12,7 @@ from unittest import mock import tempfile, shutil # Handling test_tmpdir import json +import signal import os import kunit_config @@ -21,16 +22,18 @@ import kunit_json import kunit test_tmpdir = '' +abs_test_data_dir = '' def setUpModule(): - global test_tmpdir + global test_tmpdir, abs_test_data_dir test_tmpdir = tempfile.mkdtemp() + abs_test_data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'test_data')) def tearDownModule(): shutil.rmtree(test_tmpdir) -def get_absolute_path(path): - return os.path.join(os.path.dirname(__file__), path) +def test_data_path(path): + return os.path.join(abs_test_data_dir, path) class KconfigTest(unittest.TestCase): @@ -46,8 +49,7 @@ class KconfigTest(unittest.TestCase): def test_read_from_file(self): kconfig = kunit_config.Kconfig() - kconfig_path = get_absolute_path( - 'test_data/test_read_from_file.kconfig') + kconfig_path = test_data_path('test_read_from_file.kconfig') kconfig.read_from_file(kconfig_path) @@ -98,21 +100,18 @@ class KUnitParserTest(unittest.TestCase): str(needle) + '" not found in "' + str(haystack) + '"!') def test_output_isolated_correctly(self): - log_path = get_absolute_path( - 'test_data/test_output_isolated_correctly.log') - file = open(log_path) - result = kunit_parser.isolate_kunit_output(file.readlines()) + log_path = test_data_path('test_output_isolated_correctly.log') + with open(log_path) as file: + result = kunit_parser.isolate_kunit_output(file.readlines()) self.assertContains('TAP version 14', result) self.assertContains(' # Subtest: example', result) self.assertContains(' 1..2', result) self.assertContains(' ok 1 - example_simple_test', result) self.assertContains(' ok 2 - example_mock_test', result) self.assertContains('ok 1 - example', result) - file.close() def test_output_with_prefix_isolated_correctly(self): - log_path = get_absolute_path( - 'test_data/test_pound_sign.log') + log_path = test_data_path('test_pound_sign.log') with open(log_path) as file: result = kunit_parser.isolate_kunit_output(file.readlines()) self.assertContains('TAP version 14', result) @@ -141,61 +140,51 @@ class KUnitParserTest(unittest.TestCase): self.assertContains('ok 3 - string-stream-test', result) def test_parse_successful_test_log(self): - all_passed_log = get_absolute_path( - 'test_data/test_is_test_passed-all_passed.log') - file = open(all_passed_log) - result = kunit_parser.parse_run_tests(file.readlines()) + all_passed_log = test_data_path('test_is_test_passed-all_passed.log') + with open(all_passed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - file.close() def test_parse_failed_test_log(self): - failed_log = get_absolute_path( - 'test_data/test_is_test_passed-failure.log') - file = open(failed_log) - result = kunit_parser.parse_run_tests(file.readlines()) + failed_log = test_data_path('test_is_test_passed-failure.log') + with open(failed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.FAILURE, result.status) - file.close() def test_no_tests(self): - empty_log = get_absolute_path( - 'test_data/test_is_test_passed-no_tests_run.log') - file = open(empty_log) - result = kunit_parser.parse_run_tests( - kunit_parser.isolate_kunit_output(file.readlines())) + empty_log = test_data_path('test_is_test_passed-no_tests_run.log') + with open(empty_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.isolate_kunit_output(file.readlines())) self.assertEqual(0, len(result.suites)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.status) - file.close() def test_no_kunit_output(self): - crash_log = get_absolute_path( - 'test_data/test_insufficient_memory.log') - file = open(crash_log) + crash_log = test_data_path('test_insufficient_memory.log') print_mock = mock.patch('builtins.print').start() - result = kunit_parser.parse_run_tests( - kunit_parser.isolate_kunit_output(file.readlines())) + with open(crash_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.isolate_kunit_output(file.readlines())) print_mock.assert_any_call(StrContains('no tests run!')) print_mock.stop() file.close() def test_crashed_test(self): - crashed_log = get_absolute_path( - 'test_data/test_is_test_passed-crash.log') - file = open(crashed_log) - result = kunit_parser.parse_run_tests(file.readlines()) + crashed_log = test_data_path('test_is_test_passed-crash.log') + with open(crashed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.TEST_CRASHED, result.status) - file.close() def test_ignores_prefix_printk_time(self): - prefix_log = get_absolute_path( - 'test_data/test_config_printk_time.log') + prefix_log = test_data_path('test_config_printk_time.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -204,8 +193,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_ignores_multiple_prefixes(self): - prefix_log = get_absolute_path( - 'test_data/test_multiple_prefixes.log') + prefix_log = test_data_path('test_multiple_prefixes.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -214,8 +202,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_mixed_kernel_output(self): - mixed_prefix_log = get_absolute_path( - 'test_data/test_interrupted_tap_output.log') + mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -224,7 +211,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_poundsign(self): - pound_log = get_absolute_path('test_data/test_pound_sign.log') + pound_log = test_data_path('test_pound_sign.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -233,7 +220,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_kernel_panic_end(self): - panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log') + panic_log = test_data_path('test_kernel_panic_interrupt.log') with open(panic_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -242,7 +229,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_pound_no_prefix(self): - pound_log = get_absolute_path('test_data/test_pound_no_prefix.log') + pound_log = test_data_path('test_pound_no_prefix.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -250,10 +237,27 @@ class KUnitParserTest(unittest.TestCase): result.status) self.assertEqual('kunit-resource-test', result.suites[0].name) +class LinuxSourceTreeTest(unittest.TestCase): + + def setUp(self): + mock.patch.object(signal, 'signal').start() + self.addCleanup(mock.patch.stopall) + + def test_invalid_kunitconfig(self): + with self.assertRaisesRegex(kunit_kernel.ConfigError, 'nonexistent.* does not exist'): + kunit_kernel.LinuxSourceTree('', kunitconfig_path='/nonexistent_file') + + def test_valid_kunitconfig(self): + with tempfile.NamedTemporaryFile('wt') as kunitconfig: + tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) + + # TODO: add more test cases. + + class KUnitJsonTest(unittest.TestCase): def _json_for(self, log_file): - with(open(get_absolute_path(log_file))) as file: + with open(test_data_path(log_file)) as file: test_result = kunit_parser.parse_run_tests(file) json_obj = kunit_json.get_json_result( test_result=test_result, @@ -263,22 +267,19 @@ class KUnitJsonTest(unittest.TestCase): return json.loads(json_obj) def test_failed_test_json(self): - result = self._json_for( - 'test_data/test_is_test_passed-failure.log') + result = self._json_for('test_is_test_passed-failure.log') self.assertEqual( {'name': 'example_simple_test', 'status': 'FAIL'}, result["sub_groups"][1]["test_cases"][0]) def test_crashed_test_json(self): - result = self._json_for( - 'test_data/test_is_test_passed-crash.log') + result = self._json_for('test_is_test_passed-crash.log') self.assertEqual( {'name': 'example_simple_test', 'status': 'ERROR'}, result["sub_groups"][1]["test_cases"][0]) def test_no_tests_json(self): - result = self._json_for( - 'test_data/test_is_test_passed-no_tests_run.log') + result = self._json_for('test_is_test_passed-no_tests_run.log') self.assertEqual(0, len(result['sub_groups'])) class StrContains(str): @@ -287,106 +288,104 @@ class StrContains(str): class KUnitMainTest(unittest.TestCase): def setUp(self): - path = get_absolute_path('test_data/test_is_test_passed-all_passed.log') - file = open(path) - all_passed_log = file.readlines() - self.print_patch = mock.patch('builtins.print') - self.print_mock = self.print_patch.start() + path = test_data_path('test_is_test_passed-all_passed.log') + with open(path) as file: + all_passed_log = file.readlines() + + self.print_mock = mock.patch('builtins.print').start() + self.addCleanup(mock.patch.stopall) + self.linux_source_mock = mock.Mock() self.linux_source_mock.build_reconfig = mock.Mock(return_value=True) self.linux_source_mock.build_um_kernel = mock.Mock(return_value=True) self.linux_source_mock.run_kernel = mock.Mock(return_value=all_passed_log) - def tearDown(self): - self.print_patch.stop() - pass - def test_config_passes_args_pass(self): kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 0 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_build_passes_args_pass(self): kunit.main(['build'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 0 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, '.kunit', None) - assert self.linux_source_mock.run_kernel.call_count == 0 + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_exec_passes_args_pass(self): kunit.main(['exec'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 0 - assert self.linux_source_mock.run_kernel.call_count == 1 - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=300) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir='.kunit', filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_passes_args_pass(self): kunit.main(['run'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - build_dir='.kunit', timeout=300) + build_dir='.kunit', filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_exec_passes_args_fail(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) with self.assertRaises(SystemExit) as e: kunit.main(['exec'], self.linux_source_mock) - assert type(e.exception) == SystemExit - assert e.exception.code == 1 + self.assertEqual(e.exception.code, 1) def test_run_passes_args_fail(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) with self.assertRaises(SystemExit) as e: kunit.main(['run'], self.linux_source_mock) - assert type(e.exception) == SystemExit - assert e.exception.code == 1 - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 1 + self.assertEqual(e.exception.code, 1) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.print_mock.assert_any_call(StrContains(' 0 tests run')) def test_exec_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) kunit.main(['exec', '--raw_output'], self.linux_source_mock) - assert self.linux_source_mock.run_kernel.call_count == 1 - for kall in self.print_mock.call_args_list: - assert kall != mock.call(StrContains('Testing complete.')) - assert kall != mock.call(StrContains(' 0 tests run')) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) + for call in self.print_mock.call_args_list: + self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) def test_run_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) kunit.main(['run', '--raw_output'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 1 - for kall in self.print_mock.call_args_list: - assert kall != mock.call(StrContains('Testing complete.')) - assert kall != mock.call(StrContains(' 0 tests run')) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) + for call in self.print_mock.call_args_list: + self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) def test_exec_timeout(self): timeout = 3453 kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock) - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=timeout) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir='.kunit', filter_glob='', timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_timeout(self): timeout = 3453 kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - build_dir='.kunit', timeout=timeout) + build_dir='.kunit', filter_glob='', timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_builddir(self): build_dir = '.kunit' kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - build_dir=build_dir, timeout=300) + build_dir=build_dir, filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_config_builddir(self): build_dir = '.kunit' kunit.main(['config', '--build_dir', build_dir], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) def test_build_builddir(self): build_dir = '.kunit' @@ -396,8 +395,23 @@ class KUnitMainTest(unittest.TestCase): def test_exec_builddir(self): build_dir = '.kunit' kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock) - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir=build_dir, filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) + @mock.patch.object(kunit_kernel, 'LinuxSourceTree') + def test_run_kunitconfig(self, mock_linux_init): + mock_linux_init.return_value = self.linux_source_mock + kunit.main(['run', '--kunitconfig=mykunitconfig']) + # Just verify that we parsed and initialized it correctly here. + mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig') + + @mock.patch.object(kunit_kernel, 'LinuxSourceTree') + def test_config_kunitconfig(self, mock_linux_init): + mock_linux_init.return_value = self.linux_source_mock + kunit.main(['config', '--kunitconfig=mykunitconfig']) + # Just verify that we parsed and initialized it correctly here. + mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig') + if __name__ == '__main__': unittest.main() diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c index cac891028cd1..3e3a5f518864 100644 --- a/tools/testing/nvdimm/config_check.c +++ b/tools/testing/nvdimm/config_check.c @@ -12,7 +12,8 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); - BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); + if (IS_ENABLED(CONFIG_ACPI_NFIT)) + BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX)); BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM)); } diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild index 75baebf8f4ba..197bcb2b7f35 100644 --- a/tools/testing/nvdimm/test/Kbuild +++ b/tools/testing/nvdimm/test/Kbuild @@ -5,5 +5,9 @@ ccflags-y += -I$(srctree)/drivers/acpi/nfit/ obj-m += nfit_test.o obj-m += nfit_test_iomap.o -nfit_test-y := nfit.o +ifeq ($(CONFIG_ACPI_NFIT),m) + nfit_test-y := nfit.o +else + nfit_test-y := ndtest.o +endif nfit_test_iomap-y := iomap.o diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c new file mode 100644 index 000000000000..6862915f1fb0 --- /dev/null +++ b/tools/testing/nvdimm/test/ndtest.c @@ -0,0 +1,1129 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/platform_device.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/genalloc.h> +#include <linux/vmalloc.h> +#include <linux/dma-mapping.h> +#include <linux/list_sort.h> +#include <linux/libnvdimm.h> +#include <linux/ndctl.h> +#include <nd-core.h> +#include <linux/printk.h> +#include <linux/seq_buf.h> + +#include "../watermark.h" +#include "nfit_test.h" +#include "ndtest.h" + +enum { + DIMM_SIZE = SZ_32M, + LABEL_SIZE = SZ_128K, + NUM_INSTANCES = 2, + NUM_DCR = 4, + NDTEST_MAX_MAPPING = 6, +}; + +#define NDTEST_SCM_DIMM_CMD_MASK \ + ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ + (1ul << ND_CMD_GET_CONFIG_DATA) | \ + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) + +#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \ + (((node & 0xfff) << 16) | ((socket & 0xf) << 12) \ + | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf)) + +static DEFINE_SPINLOCK(ndtest_lock); +static struct ndtest_priv *instances[NUM_INSTANCES]; +static struct class *ndtest_dimm_class; +static struct gen_pool *ndtest_pool; + +static struct ndtest_dimm dimm_group1[] = { + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0), + .uuid_str = "1e5c75d2-b618-11ea-9aa3-507b9ddc0f72", + .physical_id = 0, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1), + .uuid_str = "1c4d43ac-b618-11ea-be80-507b9ddc0f72", + .physical_id = 1, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0), + .uuid_str = "a9f17ffc-b618-11ea-b36d-507b9ddc0f72", + .physical_id = 2, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), + .uuid_str = "b6b83b22-b618-11ea-8aae-507b9ddc0f72", + .physical_id = 3, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), + .uuid_str = "bf9baaee-b618-11ea-b181-507b9ddc0f72", + .physical_id = 4, + .num_formats = 2, + }, +}; + +static struct ndtest_dimm dimm_group2[] = { + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0), + .uuid_str = "ca0817e2-b618-11ea-9db3-507b9ddc0f72", + .physical_id = 0, + .num_formats = 1, + .flags = PAPR_PMEM_UNARMED | PAPR_PMEM_EMPTY | + PAPR_PMEM_SAVE_FAILED | PAPR_PMEM_SHUTDOWN_DIRTY | + PAPR_PMEM_HEALTH_FATAL, + }, +}; + +static struct ndtest_mapping region0_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = 0, + .size = SZ_16M, + }, + { + .dimm = 1, + .position = 1, + .start = 0, + .size = SZ_16M, + } +}; + +static struct ndtest_mapping region1_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = SZ_16M, + .size = SZ_16M, + }, + { + .dimm = 1, + .position = 1, + .start = SZ_16M, + .size = SZ_16M, + }, + { + .dimm = 2, + .position = 2, + .start = SZ_16M, + .size = SZ_16M, + }, + { + .dimm = 3, + .position = 3, + .start = SZ_16M, + .size = SZ_16M, + }, +}; + +static struct ndtest_mapping region2_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = 0, + .size = DIMM_SIZE, + }, +}; + +static struct ndtest_mapping region3_mapping[] = { + { + .dimm = 1, + .start = 0, + .size = DIMM_SIZE, + } +}; + +static struct ndtest_mapping region4_mapping[] = { + { + .dimm = 2, + .start = 0, + .size = DIMM_SIZE, + } +}; + +static struct ndtest_mapping region5_mapping[] = { + { + .dimm = 3, + .start = 0, + .size = DIMM_SIZE, + } +}; + +static struct ndtest_region bus0_regions[] = { + { + .type = ND_DEVICE_NAMESPACE_PMEM, + .num_mappings = ARRAY_SIZE(region0_mapping), + .mapping = region0_mapping, + .size = DIMM_SIZE, + .range_index = 1, + }, + { + .type = ND_DEVICE_NAMESPACE_PMEM, + .num_mappings = ARRAY_SIZE(region1_mapping), + .mapping = region1_mapping, + .size = DIMM_SIZE * 2, + .range_index = 2, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region2_mapping), + .mapping = region2_mapping, + .size = DIMM_SIZE, + .range_index = 3, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region3_mapping), + .mapping = region3_mapping, + .size = DIMM_SIZE, + .range_index = 4, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region4_mapping), + .mapping = region4_mapping, + .size = DIMM_SIZE, + .range_index = 5, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region5_mapping), + .mapping = region5_mapping, + .size = DIMM_SIZE, + .range_index = 6, + }, +}; + +static struct ndtest_mapping region6_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = 0, + .size = DIMM_SIZE, + }, +}; + +static struct ndtest_region bus1_regions[] = { + { + .type = ND_DEVICE_NAMESPACE_IO, + .num_mappings = ARRAY_SIZE(region6_mapping), + .mapping = region6_mapping, + .size = DIMM_SIZE, + .range_index = 1, + }, +}; + +static struct ndtest_config bus_configs[NUM_INSTANCES] = { + /* bus 1 */ + { + .dimm_start = 0, + .dimm_count = ARRAY_SIZE(dimm_group1), + .dimms = dimm_group1, + .regions = bus0_regions, + .num_regions = ARRAY_SIZE(bus0_regions), + }, + /* bus 2 */ + { + .dimm_start = ARRAY_SIZE(dimm_group1), + .dimm_count = ARRAY_SIZE(dimm_group2), + .dimms = dimm_group2, + .regions = bus1_regions, + .num_regions = ARRAY_SIZE(bus1_regions), + }, +}; + +static inline struct ndtest_priv *to_ndtest_priv(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + return container_of(pdev, struct ndtest_priv, pdev); +} + +static int ndtest_config_get(struct ndtest_dimm *p, unsigned int buf_len, + struct nd_cmd_get_config_data_hdr *hdr) +{ + unsigned int len; + + if ((hdr->in_offset + hdr->in_length) > LABEL_SIZE) + return -EINVAL; + + hdr->status = 0; + len = min(hdr->in_length, LABEL_SIZE - hdr->in_offset); + memcpy(hdr->out_buf, p->label_area + hdr->in_offset, len); + + return buf_len - len; +} + +static int ndtest_config_set(struct ndtest_dimm *p, unsigned int buf_len, + struct nd_cmd_set_config_hdr *hdr) +{ + unsigned int len; + + if ((hdr->in_offset + hdr->in_length) > LABEL_SIZE) + return -EINVAL; + + len = min(hdr->in_length, LABEL_SIZE - hdr->in_offset); + memcpy(p->label_area + hdr->in_offset, hdr->in_buf, len); + + return buf_len - len; +} + +static int ndtest_get_config_size(struct ndtest_dimm *dimm, unsigned int buf_len, + struct nd_cmd_get_config_size *size) +{ + size->status = 0; + size->max_xfer = 8; + size->config_size = dimm->config_size; + + return 0; +} + +static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len, int *cmd_rc) +{ + struct ndtest_dimm *dimm; + int _cmd_rc; + + if (!cmd_rc) + cmd_rc = &_cmd_rc; + + *cmd_rc = 0; + + if (!nvdimm) + return -EINVAL; + + dimm = nvdimm_provider_data(nvdimm); + if (!dimm) + return -EINVAL; + + switch (cmd) { + case ND_CMD_GET_CONFIG_SIZE: + *cmd_rc = ndtest_get_config_size(dimm, buf_len, buf); + break; + case ND_CMD_GET_CONFIG_DATA: + *cmd_rc = ndtest_config_get(dimm, buf_len, buf); + break; + case ND_CMD_SET_CONFIG_DATA: + *cmd_rc = ndtest_config_set(dimm, buf_len, buf); + break; + default: + return -EINVAL; + } + + /* Failures for a DIMM can be injected using fail_cmd and + * fail_cmd_code, see the device attributes below + */ + if ((1 << cmd) & dimm->fail_cmd) + return dimm->fail_cmd_code ? dimm->fail_cmd_code : -EIO; + + return 0; +} + +static int ndtest_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw) +{ + struct ndtest_dimm *dimm = ndbr->blk_provider_data; + struct ndtest_blk_mmio *mmio = dimm->mmio; + struct nd_region *nd_region = &ndbr->nd_region; + unsigned int lane; + + if (!mmio) + return -ENOMEM; + + lane = nd_region_acquire_lane(nd_region); + if (rw) + memcpy(mmio->base + dpa, iobuf, len); + else { + memcpy(iobuf, mmio->base + dpa, len); + arch_invalidate_pmem(mmio->base + dpa, len); + } + + nd_region_release_lane(nd_region, lane); + + return 0; +} + +static int ndtest_blk_region_enable(struct nvdimm_bus *nvdimm_bus, + struct device *dev) +{ + struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nvdimm *nvdimm; + struct ndtest_dimm *dimm; + struct ndtest_blk_mmio *mmio; + + nvdimm = nd_blk_region_to_dimm(ndbr); + dimm = nvdimm_provider_data(nvdimm); + + nd_blk_region_set_provider_data(ndbr, dimm); + dimm->blk_region = to_nd_region(dev); + + mmio = devm_kzalloc(dev, sizeof(struct ndtest_blk_mmio), GFP_KERNEL); + if (!mmio) + return -ENOMEM; + + mmio->base = (void __iomem *) devm_nvdimm_memremap( + dev, dimm->address, 12, nd_blk_memremap_flags(ndbr)); + if (!mmio->base) { + dev_err(dev, "%s failed to map blk dimm\n", nvdimm_name(nvdimm)); + return -ENOMEM; + } + mmio->size = dimm->size; + mmio->base_offset = 0; + + dimm->mmio = mmio; + + return 0; +} + +static struct nfit_test_resource *ndtest_resource_lookup(resource_size_t addr) +{ + int i; + + for (i = 0; i < NUM_INSTANCES; i++) { + struct nfit_test_resource *n, *nfit_res = NULL; + struct ndtest_priv *t = instances[i]; + + if (!t) + continue; + spin_lock(&ndtest_lock); + list_for_each_entry(n, &t->resources, list) { + if (addr >= n->res.start && (addr < n->res.start + + resource_size(&n->res))) { + nfit_res = n; + break; + } else if (addr >= (unsigned long) n->buf + && (addr < (unsigned long) n->buf + + resource_size(&n->res))) { + nfit_res = n; + break; + } + } + spin_unlock(&ndtest_lock); + if (nfit_res) + return nfit_res; + } + + pr_warn("Failed to get resource\n"); + + return NULL; +} + +static void ndtest_release_resource(void *data) +{ + struct nfit_test_resource *res = data; + + spin_lock(&ndtest_lock); + list_del(&res->list); + spin_unlock(&ndtest_lock); + + if (resource_size(&res->res) >= DIMM_SIZE) + gen_pool_free(ndtest_pool, res->res.start, + resource_size(&res->res)); + vfree(res->buf); + kfree(res); +} + +static void *ndtest_alloc_resource(struct ndtest_priv *p, size_t size, + dma_addr_t *dma) +{ + dma_addr_t __dma; + void *buf; + struct nfit_test_resource *res; + struct genpool_data_align data = { + .align = SZ_128M, + }; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return NULL; + + buf = vmalloc(size); + if (size >= DIMM_SIZE) + __dma = gen_pool_alloc_algo(ndtest_pool, size, + gen_pool_first_fit_align, &data); + else + __dma = (unsigned long) buf; + + if (!__dma) + goto buf_err; + + INIT_LIST_HEAD(&res->list); + res->dev = &p->pdev.dev; + res->buf = buf; + res->res.start = __dma; + res->res.end = __dma + size - 1; + res->res.name = "NFIT"; + spin_lock_init(&res->lock); + INIT_LIST_HEAD(&res->requests); + spin_lock(&ndtest_lock); + list_add(&res->list, &p->resources); + spin_unlock(&ndtest_lock); + + if (dma) + *dma = __dma; + + if (!devm_add_action(&p->pdev.dev, ndtest_release_resource, res)) + return res->buf; + +buf_err: + if (__dma && size >= DIMM_SIZE) + gen_pool_free(ndtest_pool, __dma, size); + if (buf) + vfree(buf); + kfree(res); + + return NULL; +} + +static ssize_t range_index_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct ndtest_region *region = nd_region_provider_data(nd_region); + + return sprintf(buf, "%d\n", region->range_index); +} +static DEVICE_ATTR_RO(range_index); + +static struct attribute *ndtest_region_attributes[] = { + &dev_attr_range_index.attr, + NULL, +}; + +static const struct attribute_group ndtest_region_attribute_group = { + .name = "papr", + .attrs = ndtest_region_attributes, +}; + +static const struct attribute_group *ndtest_region_attribute_groups[] = { + &ndtest_region_attribute_group, + NULL, +}; + +static int ndtest_create_region(struct ndtest_priv *p, + struct ndtest_region *region) +{ + struct nd_mapping_desc mappings[NDTEST_MAX_MAPPING]; + struct nd_blk_region_desc ndbr_desc; + struct nd_interleave_set *nd_set; + struct nd_region_desc *ndr_desc; + struct resource res; + int i, ndimm = region->mapping[0].dimm; + u64 uuid[2]; + + memset(&res, 0, sizeof(res)); + memset(&mappings, 0, sizeof(mappings)); + memset(&ndbr_desc, 0, sizeof(ndbr_desc)); + ndr_desc = &ndbr_desc.ndr_desc; + + if (!ndtest_alloc_resource(p, region->size, &res.start)) + return -ENOMEM; + + res.end = res.start + region->size - 1; + ndr_desc->mapping = mappings; + ndr_desc->res = &res; + ndr_desc->provider_data = region; + ndr_desc->attr_groups = ndtest_region_attribute_groups; + + if (uuid_parse(p->config->dimms[ndimm].uuid_str, (uuid_t *)uuid)) { + pr_err("failed to parse UUID\n"); + return -ENXIO; + } + + nd_set = devm_kzalloc(&p->pdev.dev, sizeof(*nd_set), GFP_KERNEL); + if (!nd_set) + return -ENOMEM; + + nd_set->cookie1 = cpu_to_le64(uuid[0]); + nd_set->cookie2 = cpu_to_le64(uuid[1]); + nd_set->altcookie = nd_set->cookie1; + ndr_desc->nd_set = nd_set; + + if (region->type == ND_DEVICE_NAMESPACE_BLK) { + mappings[0].start = 0; + mappings[0].size = DIMM_SIZE; + mappings[0].nvdimm = p->config->dimms[ndimm].nvdimm; + + ndr_desc->mapping = &mappings[0]; + ndr_desc->num_mappings = 1; + ndr_desc->num_lanes = 1; + ndbr_desc.enable = ndtest_blk_region_enable; + ndbr_desc.do_io = ndtest_blk_do_io; + region->region = nvdimm_blk_region_create(p->bus, ndr_desc); + + goto done; + } + + for (i = 0; i < region->num_mappings; i++) { + ndimm = region->mapping[i].dimm; + mappings[i].start = region->mapping[i].start; + mappings[i].size = region->mapping[i].size; + mappings[i].position = region->mapping[i].position; + mappings[i].nvdimm = p->config->dimms[ndimm].nvdimm; + } + + ndr_desc->num_mappings = region->num_mappings; + region->region = nvdimm_pmem_region_create(p->bus, ndr_desc); + +done: + if (!region->region) { + dev_err(&p->pdev.dev, "Error registering region %pR\n", + ndr_desc->res); + return -ENXIO; + } + + return 0; +} + +static int ndtest_init_regions(struct ndtest_priv *p) +{ + int i, ret = 0; + + for (i = 0; i < p->config->num_regions; i++) { + ret = ndtest_create_region(p, &p->config->regions[i]); + if (ret) + return ret; + } + + return 0; +} + +static void put_dimms(void *data) +{ + struct ndtest_priv *p = data; + int i; + + for (i = 0; i < p->config->dimm_count; i++) + if (p->config->dimms[i].dev) { + device_unregister(p->config->dimms[i].dev); + p->config->dimms[i].dev = NULL; + } +} + +static ssize_t handle_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + + return sprintf(buf, "%#x\n", dimm->handle); +} +static DEVICE_ATTR_RO(handle); + +static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + + return sprintf(buf, "%#x\n", dimm->fail_cmd); +} + +static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + unsigned long val; + ssize_t rc; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm->fail_cmd = val; + + return size; +} +static DEVICE_ATTR_RW(fail_cmd); + +static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", dimm->fail_cmd_code); +} + +static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + unsigned long val; + ssize_t rc; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm->fail_cmd_code = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd_code); + +static struct attribute *dimm_attributes[] = { + &dev_attr_handle.attr, + &dev_attr_fail_cmd.attr, + &dev_attr_fail_cmd_code.attr, + NULL, +}; + +static struct attribute_group dimm_attribute_group = { + .attrs = dimm_attributes, +}; + +static const struct attribute_group *dimm_attribute_groups[] = { + &dimm_attribute_group, + NULL, +}; + +static ssize_t phys_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%#x\n", dimm->physical_id); +} +static DEVICE_ATTR_RO(phys_id); + +static ssize_t vendor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "0x1234567\n"); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%04x-%02x-%04x-%08x", 0xabcd, + 0xa, 2016, ~(dimm->handle)); +} +static DEVICE_ATTR_RO(id); + +static ssize_t nvdimm_handle_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%#x\n", dimm->handle); +} + +static struct device_attribute dev_attr_nvdimm_show_handle = { + .attr = { .name = "handle", .mode = 0444 }, + .show = nvdimm_handle_show, +}; + +static ssize_t subsystem_vendor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "0x%04x\n", 0); +} +static DEVICE_ATTR_RO(subsystem_vendor); + +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", 42); +} +static DEVICE_ATTR_RO(dirty_shutdown); + +static ssize_t formats_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%d\n", dimm->num_formats); +} +static DEVICE_ATTR_RO(formats); + +static ssize_t format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + if (dimm->num_formats > 1) + return sprintf(buf, "0x201\n"); + + return sprintf(buf, "0x101\n"); +} +static DEVICE_ATTR_RO(format); + +static ssize_t format1_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "0x301\n"); +} +static DEVICE_ATTR_RO(format1); + +static umode_t ndtest_nvdimm_attr_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + if (a == &dev_attr_format1.attr && dimm->num_formats <= 1) + return 0; + + return a->mode; +} + +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + struct seq_buf s; + u64 flags; + + flags = dimm->flags; + + seq_buf_init(&s, buf, PAGE_SIZE); + if (flags & PAPR_PMEM_UNARMED_MASK) + seq_buf_printf(&s, "not_armed "); + + if (flags & PAPR_PMEM_BAD_SHUTDOWN_MASK) + seq_buf_printf(&s, "flush_fail "); + + if (flags & PAPR_PMEM_BAD_RESTORE_MASK) + seq_buf_printf(&s, "restore_fail "); + + if (flags & PAPR_PMEM_SAVE_MASK) + seq_buf_printf(&s, "save_fail "); + + if (flags & PAPR_PMEM_SMART_EVENT_MASK) + seq_buf_printf(&s, "smart_notify "); + + + if (seq_buf_used(&s)) + seq_buf_printf(&s, "\n"); + + return seq_buf_used(&s); +} +static DEVICE_ATTR_RO(flags); + +static struct attribute *ndtest_nvdimm_attributes[] = { + &dev_attr_nvdimm_show_handle.attr, + &dev_attr_vendor.attr, + &dev_attr_id.attr, + &dev_attr_phys_id.attr, + &dev_attr_subsystem_vendor.attr, + &dev_attr_dirty_shutdown.attr, + &dev_attr_formats.attr, + &dev_attr_format.attr, + &dev_attr_format1.attr, + &dev_attr_flags.attr, + NULL, +}; + +static const struct attribute_group ndtest_nvdimm_attribute_group = { + .name = "papr", + .attrs = ndtest_nvdimm_attributes, + .is_visible = ndtest_nvdimm_attr_visible, +}; + +static const struct attribute_group *ndtest_nvdimm_attribute_groups[] = { + &ndtest_nvdimm_attribute_group, + NULL, +}; + +static int ndtest_dimm_register(struct ndtest_priv *priv, + struct ndtest_dimm *dimm, int id) +{ + struct device *dev = &priv->pdev.dev; + unsigned long dimm_flags = dimm->flags; + + if (dimm->num_formats > 1) { + set_bit(NDD_ALIASING, &dimm_flags); + set_bit(NDD_LABELING, &dimm_flags); + } + + if (dimm->flags & PAPR_PMEM_UNARMED_MASK) + set_bit(NDD_UNARMED, &dimm_flags); + + dimm->nvdimm = nvdimm_create(priv->bus, dimm, + ndtest_nvdimm_attribute_groups, dimm_flags, + NDTEST_SCM_DIMM_CMD_MASK, 0, NULL); + if (!dimm->nvdimm) { + dev_err(dev, "Error creating DIMM object for %pOF\n", priv->dn); + return -ENXIO; + } + + dimm->dev = device_create_with_groups(ndtest_dimm_class, + &priv->pdev.dev, + 0, dimm, dimm_attribute_groups, + "test_dimm%d", id); + if (!dimm->dev) { + pr_err("Could not create dimm device attributes\n"); + return -ENOMEM; + } + + return 0; +} + +static int ndtest_nvdimm_init(struct ndtest_priv *p) +{ + struct ndtest_dimm *d; + void *res; + int i, id; + + for (i = 0; i < p->config->dimm_count; i++) { + d = &p->config->dimms[i]; + d->id = id = p->config->dimm_start + i; + res = ndtest_alloc_resource(p, LABEL_SIZE, NULL); + if (!res) + return -ENOMEM; + + d->label_area = res; + sprintf(d->label_area, "label%d", id); + d->config_size = LABEL_SIZE; + + if (!ndtest_alloc_resource(p, d->size, + &p->dimm_dma[id])) + return -ENOMEM; + + if (!ndtest_alloc_resource(p, LABEL_SIZE, + &p->label_dma[id])) + return -ENOMEM; + + if (!ndtest_alloc_resource(p, LABEL_SIZE, + &p->dcr_dma[id])) + return -ENOMEM; + + d->address = p->dimm_dma[id]; + + ndtest_dimm_register(p, d, id); + } + + return 0; +} + +static ssize_t compatible_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "nvdimm_test"); +} +static DEVICE_ATTR_RO(compatible); + +static struct attribute *of_node_attributes[] = { + &dev_attr_compatible.attr, + NULL +}; + +static const struct attribute_group of_node_attribute_group = { + .name = "of_node", + .attrs = of_node_attributes, +}; + +static const struct attribute_group *ndtest_attribute_groups[] = { + &of_node_attribute_group, + NULL, +}; + +static int ndtest_bus_register(struct ndtest_priv *p) +{ + p->config = &bus_configs[p->pdev.id]; + + p->bus_desc.ndctl = ndtest_ctl; + p->bus_desc.module = THIS_MODULE; + p->bus_desc.provider_name = NULL; + p->bus_desc.attr_groups = ndtest_attribute_groups; + + p->bus = nvdimm_bus_register(&p->pdev.dev, &p->bus_desc); + if (!p->bus) { + dev_err(&p->pdev.dev, "Error creating nvdimm bus %pOF\n", p->dn); + return -ENOMEM; + } + + return 0; +} + +static int ndtest_remove(struct platform_device *pdev) +{ + struct ndtest_priv *p = to_ndtest_priv(&pdev->dev); + + nvdimm_bus_unregister(p->bus); + return 0; +} + +static int ndtest_probe(struct platform_device *pdev) +{ + struct ndtest_priv *p; + int rc; + + p = to_ndtest_priv(&pdev->dev); + if (ndtest_bus_register(p)) + return -ENOMEM; + + p->dcr_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR, + sizeof(dma_addr_t), GFP_KERNEL); + p->label_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR, + sizeof(dma_addr_t), GFP_KERNEL); + p->dimm_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR, + sizeof(dma_addr_t), GFP_KERNEL); + + rc = ndtest_nvdimm_init(p); + if (rc) + goto err; + + rc = ndtest_init_regions(p); + if (rc) + goto err; + + rc = devm_add_action_or_reset(&pdev->dev, put_dimms, p); + if (rc) + goto err; + + platform_set_drvdata(pdev, p); + + return 0; + +err: + pr_err("%s:%d Failed nvdimm init\n", __func__, __LINE__); + return rc; +} + +static const struct platform_device_id ndtest_id[] = { + { KBUILD_MODNAME }, + { }, +}; + +static struct platform_driver ndtest_driver = { + .probe = ndtest_probe, + .remove = ndtest_remove, + .driver = { + .name = KBUILD_MODNAME, + }, + .id_table = ndtest_id, +}; + +static void ndtest_release(struct device *dev) +{ + struct ndtest_priv *p = to_ndtest_priv(dev); + + kfree(p); +} + +static void cleanup_devices(void) +{ + int i; + + for (i = 0; i < NUM_INSTANCES; i++) + if (instances[i]) + platform_device_unregister(&instances[i]->pdev); + + nfit_test_teardown(); + + if (ndtest_pool) + gen_pool_destroy(ndtest_pool); + + + if (ndtest_dimm_class) + class_destroy(ndtest_dimm_class); +} + +static __init int ndtest_init(void) +{ + int rc, i; + + pmem_test(); + libnvdimm_test(); + device_dax_test(); + dax_pmem_test(); + dax_pmem_core_test(); +#ifdef CONFIG_DEV_DAX_PMEM_COMPAT + dax_pmem_compat_test(); +#endif + + nfit_test_setup(ndtest_resource_lookup, NULL); + + ndtest_dimm_class = class_create(THIS_MODULE, "nfit_test_dimm"); + if (IS_ERR(ndtest_dimm_class)) { + rc = PTR_ERR(ndtest_dimm_class); + goto err_register; + } + + ndtest_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE); + if (!ndtest_pool) { + rc = -ENOMEM; + goto err_register; + } + + if (gen_pool_add(ndtest_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) { + rc = -ENOMEM; + goto err_register; + } + + /* Each instance can be taken as a bus, which can have multiple dimms */ + for (i = 0; i < NUM_INSTANCES; i++) { + struct ndtest_priv *priv; + struct platform_device *pdev; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + rc = -ENOMEM; + goto err_register; + } + + INIT_LIST_HEAD(&priv->resources); + pdev = &priv->pdev; + pdev->name = KBUILD_MODNAME; + pdev->id = i; + pdev->dev.release = ndtest_release; + rc = platform_device_register(pdev); + if (rc) { + put_device(&pdev->dev); + goto err_register; + } + get_device(&pdev->dev); + + instances[i] = priv; + } + + rc = platform_driver_register(&ndtest_driver); + if (rc) + goto err_register; + + return 0; + +err_register: + pr_err("Error registering platform device\n"); + cleanup_devices(); + + return rc; +} + +static __exit void ndtest_exit(void) +{ + cleanup_devices(); + platform_driver_unregister(&ndtest_driver); +} + +module_init(ndtest_init); +module_exit(ndtest_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("IBM Corporation"); diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h new file mode 100644 index 000000000000..2c54c9cbb90c --- /dev/null +++ b/tools/testing/nvdimm/test/ndtest.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef NDTEST_H +#define NDTEST_H + +#include <linux/platform_device.h> +#include <linux/libnvdimm.h> + +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10)) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED) + +struct ndtest_config; + +struct ndtest_priv { + struct platform_device pdev; + struct device_node *dn; + struct list_head resources; + struct nvdimm_bus_descriptor bus_desc; + struct nvdimm_bus *bus; + struct ndtest_config *config; + + dma_addr_t *dcr_dma; + dma_addr_t *label_dma; + dma_addr_t *dimm_dma; +}; + +struct ndtest_blk_mmio { + void __iomem *base; + u64 size; + u64 base_offset; + u32 line_size; + u32 num_lines; + u32 table_size; +}; + +struct ndtest_dimm { + struct device *dev; + struct nvdimm *nvdimm; + struct ndtest_blk_mmio *mmio; + struct nd_region *blk_region; + + dma_addr_t address; + unsigned long long flags; + unsigned long config_size; + void *label_area; + char *uuid_str; + + unsigned int size; + unsigned int handle; + unsigned int fail_cmd; + unsigned int physical_id; + unsigned int num_formats; + int id; + int fail_cmd_code; + u8 no_alias; +}; + +struct ndtest_mapping { + u64 start; + u64 size; + u8 position; + u8 dimm; +}; + +struct ndtest_region { + struct nd_region *region; + struct ndtest_mapping *mapping; + u64 size; + u8 type; + u8 num_mappings; + u8 range_index; +}; + +struct ndtest_config { + struct ndtest_dimm *dimms; + struct ndtest_region *regions; + unsigned int dimm_count; + unsigned int dimm_start; + u8 num_regions; +}; + +#endif /* NDTEST_H */ diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 71c960dcd8a4..652254754b4c 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -55,7 +55,6 @@ int main(void) struct test *test, tests[] = { { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, - { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index afbab4aeef3c..6c575cf34a71 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -33,7 +33,9 @@ TARGETS += memfd TARGETS += memory-hotplug TARGETS += mincore TARGETS += mount +TARGETS += mount_setattr TARGETS += mqueue +TARGETS += nci TARGETS += net TARGETS += net/forwarding TARGETS += net/mptcp @@ -77,8 +79,10 @@ TARGETS += zram TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug -# User can optionally provide a TARGETS skiplist. -SKIP_TARGETS ?= +# User can optionally provide a TARGETS skiplist. By default we skip +# BPF since it has cutting edge build time dependencies which require +# more effort to install. +SKIP_TARGETS ?= bpf ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) @@ -123,15 +127,6 @@ ARCH ?= $(SUBARCH) export KSFT_KHDR_INSTALL_DONE := 1 export BUILD -# build and run gpio when output directory is the src dir. -# gpio has dependency on tools/gpio and builds tools/gpio -# objects in the src directory in all cases making the src -# repo dirty even when objects are relocated. -ifneq (1,$(DEFAULT_INSTALL_HDR_PATH)) - TMP := $(filter-out gpio, $(TARGETS)) - TARGETS := $(TMP) -endif - # set default goal to all, so make without a target runs all, even when # all isn't the first target in the file. .DEFAULT_GOAL := all diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 1c5556bdd11d..0dbd594c2747 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -457,7 +457,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index f95074c9b48b..9210691aa998 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -625,7 +625,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index c9fa141ebdcc..75fc482d63b6 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -81,7 +81,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, last_index = 0; /* Set some value in tagged memory and make the buffer underflow */ for (j = sizes[i] - 1; (j >= -underflow_range) && - (cur_mte_cxt.fault_valid == false); j--) { + (!cur_mte_cxt.fault_valid); j--) { ptr[j] = '1'; last_index = j; } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index f5b7ef93618c..c0c48fdb9ac1 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -17,7 +17,6 @@ test_sockmap test_lirc_mode2_user get_cgroup_id_user test_skb_cgroup_id_user -test_socket_cookie test_cgroup_storage test_flow_dissector flow_dissector_load @@ -26,7 +25,6 @@ test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user test_sysctl -test_current_pid_tgid_new_ns xdping test_cpp *.skel.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8c33e999319a..044bfdcf5b74 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 include ../../../../scripts/Kbuild.include include ../../../scripts/Makefile.arch +include ../../../scripts/Makefile.include CXX ?= $(CROSS_COMPILE)g++ @@ -18,13 +19,11 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif -CLANG ?= clang -LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ - -I$(TOOLSINCDIR) -I$(APIDIR) \ + -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread @@ -32,21 +31,20 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_verifier_log test_dev_cgroup \ - test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ + test_sock test_sockmap get_cgroup_id_user \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sysctl \ - test_progs-no_alu32 \ - test_current_pid_tgid_new_ns + test_progs-no_alu32 # Also test bpf-gcc, if present ifneq ($(BPF_GCC),) TEST_GEN_PROGS += test_progs-bpf_gcc endif -TEST_GEN_FILES = -TEST_FILES = test_lwt_ip_encap.o \ - test_tc_edt.o \ - xsk_prereqs.sh +TEST_GEN_FILES = test_lwt_ip_encap.o \ + test_tc_edt.o +TEST_FILES = xsk_prereqs.sh \ + $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -82,7 +80,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ xdpxceiver -TEST_CUSTOM_PROGS = urandom_read +TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); @@ -113,7 +111,15 @@ SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build INCLUDE_DIR := $(SCRATCH_DIR)/include BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a -RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids +ifneq ($(CROSS_COMPILE),) +HOST_BUILD_DIR := $(BUILD_DIR)/host +HOST_SCRATCH_DIR := $(OUTPUT)/host-tools +else +HOST_BUILD_DIR := $(BUILD_DIR) +HOST_SCRATCH_DIR := $(SCRATCH_DIR) +endif +HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a +RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -121,6 +127,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +ifeq ($(VMLINUX_BTF),) +$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") +endif # Define simple and short `make test_progs`, `make test_sysctl`, etc targets # to build individual tests. @@ -132,6 +141,14 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; +# sort removes libbpf duplicates when not cross-building +MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ + $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \ + $(INCLUDE_DIR)) +$(MAKE_DIRS): + $(call msg,MKDIR,,$@) + $(Q)mkdir -p $@ + $(OUTPUT)/%.o: %.c $(call msg,CC,,$@) $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ @@ -154,7 +171,7 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(Q)$(CC) -c $(CFLAGS) -o $@ $< -DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool +DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ @@ -168,7 +185,6 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c -$(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c @@ -179,10 +195,11 @@ $(OUTPUT)/test_sysctl: cgroup_helpers.c BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ - $(BPFOBJ) | $(BUILD_DIR)/bpftool + $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ - OUTPUT=$(BUILD_DIR)/bpftool/ \ - prefix= DESTDIR=$(SCRATCH_DIR)/ install + CC=$(HOSTCC) LD=$(HOSTLD) \ + OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ + prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ -C $(BPFTOOLDIR)/Documentation \ @@ -195,9 +212,14 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers -$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR): - $(call msg,MKDIR,,$@) - $(Q)mkdir -p $@ +ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) +$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + ../../../include/uapi/linux/bpf.h \ + | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ + OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ + DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers +endif $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) @@ -208,7 +230,7 @@ else $(Q)cp "$(VMLINUX_H)" $@ endif -$(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ +$(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/bpf/resolve_btfids/main.c \ $(TOOLSDIR)/lib/rbtree.c \ $(TOOLSDIR)/lib/zalloc.c \ @@ -216,7 +238,8 @@ $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/lib/ctype.c \ $(TOOLSDIR)/lib/str_error_r.c $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ - OUTPUT=$(BUILD_DIR)/resolve_btfids/ BPFOBJ=$(BPFOBJ) + CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \ + OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, @@ -387,10 +410,12 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) +TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS $(eval $(call DEFINE_TEST_RUNNER,test_progs)) # Define test_progs-no_alu32 test runner. TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) # Define test_progs BPF-GCC-flavored test runner. @@ -447,7 +472,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature \ $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko) diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index ca064180d4d0..fd148b8410fa 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -6,6 +6,30 @@ General instructions on running selftests can be found in __ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests +========================= +Running Selftests in a VM +========================= + +It's now possible to run the selftests using ``tools/testing/selftests/bpf/vmtest.sh``. +The script tries to ensure that the tests are run with the same environment as they +would be run post-submit in the CI used by the Maintainers. + +This script downloads a suitable Kconfig and VM userspace image from the system used by +the CI. It builds the kernel (without overwriting your existing Kconfig), recompiles the +bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and +saves the resulting output (by default in ``~/.bpf_selftests``). + +For more information on about using the script, run: + +.. code-block:: console + + $ tools/testing/selftests/bpf/vmtest.sh -h + +.. note:: The script uses pahole and clang based on host environment setting. + If you want to change pahole and llvm, you can change `PATH` environment + variable in the beginning of script. + +.. note:: The script currently only supports x86_64. Additional information about selftest failures are documented here. diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index da87c7f31891..bde6c9d4cbd4 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -319,7 +319,7 @@ static void ringbuf_custom_process_ring(struct ringbuf_custom *r) smp_store_release(r->consumer_pos, cons_pos); else break; - }; + } } static void *ringbuf_custom_consumer(void *input) diff --git a/tools/testing/selftests/bpf/bpf_sockopt_helpers.h b/tools/testing/selftests/bpf/bpf_sockopt_helpers.h new file mode 100644 index 000000000000..11f3a0976174 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_sockopt_helpers.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <sys/socket.h> +#include <bpf/bpf_helpers.h> + +int get_set_sk_priority(void *ctx) +{ + int prio; + + /* Verify that context allows calling bpf_getsockopt and + * bpf_setsockopt by reading and writing back socket + * priority. + */ + + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; + + return 1; +} diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 6a9053162cf2..91f0fac632f4 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -177,6 +177,7 @@ struct tcp_congestion_ops { * after all the ca_state processing. (optional) */ void (*cong_control)(struct sock *sk, const struct rate_sample *rs); + void *owner; }; #define min(a, b) ((a) < (b) ? (a) : (b)) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index b83ea448bc79..89c6d58e5dd6 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -28,6 +28,12 @@ TRACE_EVENT(bpf_testmod_test_read, __entry->pid, __entry->comm, __entry->off, __entry->len) ); +/* A bare tracepoint with no event associated with it */ +DECLARE_TRACE(bpf_testmod_test_write_bare, + TP_PROTO(struct task_struct *task, struct bpf_testmod_test_write_ctx *ctx), + TP_ARGS(task, ctx) +); + #endif /* _BPF_TESTMOD_EVENTS_H */ #undef TRACE_INCLUDE_PATH diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 2df19d73ca49..141d8da687d2 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -3,6 +3,7 @@ #include <linux/error-injection.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/percpu-defs.h> #include <linux/sysfs.h> #include <linux/tracepoint.h> #include "bpf_testmod.h" @@ -10,6 +11,8 @@ #define CREATE_TRACE_POINTS #include "bpf_testmod-events.h" +DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; + noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -28,9 +31,28 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, EXPORT_SYMBOL(bpf_testmod_test_read); ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO); +noinline ssize_t +bpf_testmod_test_write(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t len) +{ + struct bpf_testmod_test_write_ctx ctx = { + .buf = buf, + .off = off, + .len = len, + }; + + trace_bpf_testmod_test_write_bare(current, &ctx); + + return -EIO; /* always fail */ +} +EXPORT_SYMBOL(bpf_testmod_test_write); +ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO); + static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { - .attr = { .name = "bpf_testmod", .mode = 0444, }, + .attr = { .name = "bpf_testmod", .mode = 0666, }, .read = bpf_testmod_test_read, + .write = bpf_testmod_test_write, }; static int bpf_testmod_init(void) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index b81adfedb4f6..b3892dc40111 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -11,4 +11,10 @@ struct bpf_testmod_test_read_ctx { size_t len; }; +struct bpf_testmod_test_write_ctx { + char *buf; + loff_t off; + size_t len; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c new file mode 100644 index 000000000000..69bd7853e8f1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "atomic_bounds.skel.h" + +void test_atomic_bounds(void) +{ + struct atomic_bounds *skel; + __u32 duration = 0; + + skel = atomic_bounds__open_and_load(); + if (CHECK(!skel, "skel_load", "couldn't load program\n")) + return; + + atomic_bounds__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c new file mode 100644 index 000000000000..21efe7bbf10d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "atomics.skel.h" + +static void test_add(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.add); + if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.add); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run add", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->add64_value, 3, "add64_value"); + ASSERT_EQ(skel->bss->add64_result, 1, "add64_result"); + + ASSERT_EQ(skel->data->add32_value, 3, "add32_value"); + ASSERT_EQ(skel->bss->add32_result, 1, "add32_result"); + + ASSERT_EQ(skel->bss->add_stack_value_copy, 3, "add_stack_value"); + ASSERT_EQ(skel->bss->add_stack_result, 1, "add_stack_result"); + + ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value"); + +cleanup: + bpf_link__destroy(link); +} + +static void test_sub(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.sub); + if (CHECK(IS_ERR(link), "attach(sub)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.sub); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run sub", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->sub64_value, -1, "sub64_value"); + ASSERT_EQ(skel->bss->sub64_result, 1, "sub64_result"); + + ASSERT_EQ(skel->data->sub32_value, -1, "sub32_value"); + ASSERT_EQ(skel->bss->sub32_result, 1, "sub32_result"); + + ASSERT_EQ(skel->bss->sub_stack_value_copy, -1, "sub_stack_value"); + ASSERT_EQ(skel->bss->sub_stack_result, 1, "sub_stack_result"); + + ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value"); + +cleanup: + bpf_link__destroy(link); +} + +static void test_and(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.and); + if (CHECK(IS_ERR(link), "attach(and)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.and); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run and", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->and64_value, 0x010ull << 32, "and64_value"); + ASSERT_EQ(skel->bss->and64_result, 0x110ull << 32, "and64_result"); + + ASSERT_EQ(skel->data->and32_value, 0x010, "and32_value"); + ASSERT_EQ(skel->bss->and32_result, 0x110, "and32_result"); + + ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value"); +cleanup: + bpf_link__destroy(link); +} + +static void test_or(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.or); + if (CHECK(IS_ERR(link), "attach(or)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.or); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run or", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->or64_value, 0x111ull << 32, "or64_value"); + ASSERT_EQ(skel->bss->or64_result, 0x110ull << 32, "or64_result"); + + ASSERT_EQ(skel->data->or32_value, 0x111, "or32_value"); + ASSERT_EQ(skel->bss->or32_result, 0x110, "or32_result"); + + ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value"); +cleanup: + bpf_link__destroy(link); +} + +static void test_xor(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.xor); + if (CHECK(IS_ERR(link), "attach(xor)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.xor); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run xor", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->xor64_value, 0x101ull << 32, "xor64_value"); + ASSERT_EQ(skel->bss->xor64_result, 0x110ull << 32, "xor64_result"); + + ASSERT_EQ(skel->data->xor32_value, 0x101, "xor32_value"); + ASSERT_EQ(skel->bss->xor32_result, 0x110, "xor32_result"); + + ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value"); +cleanup: + bpf_link__destroy(link); +} + +static void test_cmpxchg(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.cmpxchg); + if (CHECK(IS_ERR(link), "attach(cmpxchg)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.cmpxchg); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run add", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->cmpxchg64_value, 2, "cmpxchg64_value"); + ASSERT_EQ(skel->bss->cmpxchg64_result_fail, 1, "cmpxchg_result_fail"); + ASSERT_EQ(skel->bss->cmpxchg64_result_succeed, 1, "cmpxchg_result_succeed"); + + ASSERT_EQ(skel->data->cmpxchg32_value, 2, "lcmpxchg32_value"); + ASSERT_EQ(skel->bss->cmpxchg32_result_fail, 1, "cmpxchg_result_fail"); + ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed"); + +cleanup: + bpf_link__destroy(link); +} + +static void test_xchg(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.xchg); + if (CHECK(IS_ERR(link), "attach(xchg)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.xchg); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run add", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->xchg64_value, 2, "xchg64_value"); + ASSERT_EQ(skel->bss->xchg64_result, 1, "xchg64_result"); + + ASSERT_EQ(skel->data->xchg32_value, 2, "xchg32_value"); + ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result"); + +cleanup: + bpf_link__destroy(link); +} + +void test_atomics(void) +{ + struct atomics *skel; + __u32 duration = 0; + + skel = atomics__open_and_load(); + if (CHECK(!skel, "skel_load", "atomics skeleton failed\n")) + return; + + if (skel->data->skip_tests) { + printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)", + __func__); + test__skip(); + goto cleanup; + } + + if (test__start_subtest("add")) + test_add(skel); + if (test__start_subtest("sub")) + test_sub(skel); + if (test__start_subtest("and")) + test_and(skel); + if (test__start_subtest("or")) + test_or(skel); + if (test__start_subtest("xor")) + test_xor(skel); + if (test__start_subtest("cmpxchg")) + test_cmpxchg(skel); + if (test__start_subtest("xchg")) + test_xchg(skel); + +cleanup: + atomics__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c new file mode 100644 index 000000000000..d0f06e40c16d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "bind_perm.skel.h" + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/capability.h> + +static int duration; + +void try_bind(int family, int port, int expected_errno) +{ + struct sockaddr_storage addr = {}; + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + int fd = -1; + + fd = socket(family, SOCK_STREAM, 0); + if (CHECK(fd < 0, "fd", "errno %d", errno)) + goto close_socket; + + if (family == AF_INET) { + sin = (struct sockaddr_in *)&addr; + sin->sin_family = family; + sin->sin_port = htons(port); + } else { + sin6 = (struct sockaddr_in6 *)&addr; + sin6->sin6_family = family; + sin6->sin6_port = htons(port); + } + + errno = 0; + bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(errno, expected_errno, "bind"); + +close_socket: + if (fd >= 0) + close(fd); +} + +bool cap_net_bind_service(cap_flag_value_t flag) +{ + const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE; + cap_flag_value_t original_value; + bool was_effective = false; + cap_t caps; + + caps = cap_get_proc(); + if (CHECK(!caps, "cap_get_proc", "errno %d", errno)) + goto free_caps; + + if (CHECK(cap_get_flag(caps, CAP_NET_BIND_SERVICE, CAP_EFFECTIVE, + &original_value), + "cap_get_flag", "errno %d", errno)) + goto free_caps; + + was_effective = (original_value == CAP_SET); + + if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service, + flag), + "cap_set_flag", "errno %d", errno)) + goto free_caps; + + if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno)) + goto free_caps; + +free_caps: + CHECK(cap_free(caps), "cap_free", "errno %d", errno); + return was_effective; +} + +void test_bind_perm(void) +{ + bool cap_was_effective; + struct bind_perm *skel; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/bind_perm"); + if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) + return; + + skel = bind_perm__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + goto close_cgroup_fd; + + skel->links.bind_v4_prog = bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel, "bind_v4_prog")) + goto close_skeleton; + + skel->links.bind_v6_prog = bpf_program__attach_cgroup(skel->progs.bind_v6_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel, "bind_v6_prog")) + goto close_skeleton; + + cap_was_effective = cap_net_bind_service(CAP_CLEAR); + + try_bind(AF_INET, 110, EACCES); + try_bind(AF_INET6, 110, EACCES); + + try_bind(AF_INET, 111, 0); + try_bind(AF_INET6, 111, 0); + + if (cap_was_effective) + cap_net_bind_service(CAP_SET); + +close_skeleton: + bind_perm__destroy(skel); +close_cgroup_fd: + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 0e586368948d..74c45d557a2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -7,6 +7,7 @@ #include "bpf_iter_task.skel.h" #include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" +#include "bpf_iter_task_vma.skel.h" #include "bpf_iter_task_btf.skel.h" #include "bpf_iter_tcp4.skel.h" #include "bpf_iter_tcp6.skel.h" @@ -64,6 +65,22 @@ free_link: bpf_link__destroy(link); } +static int read_fd_into_buffer(int fd, char *buf, int size) +{ + int bufleft = size; + int len; + + do { + len = read(fd, buf, bufleft); + if (len > 0) { + buf += len; + bufleft -= len; + } + } while (len > 0); + + return len < 0 ? len : size - bufleft; +} + static void test_ipv6_route(void) { struct bpf_iter_ipv6_route *skel; @@ -177,7 +194,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) { struct bpf_program *prog = skel->progs.dump_task_struct; struct bpf_iter_task_btf__bss *bss = skel->bss; - int iter_fd = -1, len = 0, bufleft = TASKBUFSZ; + int iter_fd = -1, err; struct bpf_link *link; char *buf = taskbuf; int ret = 0; @@ -190,14 +207,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) goto free_link; - do { - len = read(iter_fd, buf, bufleft); - if (len > 0) { - buf += len; - bufleft -= len; - } - } while (len > 0); - + err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ); if (bss->skip) { printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); ret = 1; @@ -205,7 +215,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) goto free_link; } - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno))) goto free_link; CHECK(strstr(taskbuf, "(struct task_struct)") == NULL, @@ -1133,6 +1143,92 @@ static void test_buf_neg_offset(void) bpf_iter_test_kern6__destroy(skel); } +#define CMP_BUFFER_SIZE 1024 +static char task_vma_output[CMP_BUFFER_SIZE]; +static char proc_maps_output[CMP_BUFFER_SIZE]; + +/* remove \0 and \t from str, and only keep the first line */ +static void str_strip_first_line(char *str) +{ + char *dst = str, *src = str; + + do { + if (*src == ' ' || *src == '\t') + src++; + else + *(dst++) = *(src++); + + } while (*src != '\0' && *src != '\n'); + + *dst = '\0'; +} + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static void test_task_vma(void) +{ + int err, iter_fd = -1, proc_maps_fd = -1; + struct bpf_iter_task_vma *skel; + int len, read_size = 4; + char maps_path[64]; + + skel = bpf_iter_task_vma__open(); + if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n")) + return; + + skel->bss->pid = getpid(); + + err = bpf_iter_task_vma__load(skel); + if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n")) + goto out; + + skel->links.proc_maps = bpf_program__attach_iter( + skel->progs.proc_maps, NULL); + + if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter", + "attach iterator failed\n")) { + skel->links.proc_maps = NULL; + goto out; + } + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto out; + + /* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks + * to trigger seq_file corner cases. The expected output is much + * longer than 1kB, so the while loop will terminate. + */ + len = 0; + while (len < CMP_BUFFER_SIZE) { + err = read_fd_into_buffer(iter_fd, task_vma_output + len, + min(read_size, CMP_BUFFER_SIZE - len)); + if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n")) + goto out; + len += err; + } + + /* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */ + snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid); + proc_maps_fd = open(maps_path, O_RDONLY); + if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n")) + goto out; + err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE); + if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n")) + goto out; + + /* strip and compare the first line of the two files */ + str_strip_first_line(task_vma_output); + str_strip_first_line(proc_maps_output); + + CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output", + "found mismatch\n"); +out: + close(proc_maps_fd); + close(iter_fd); + bpf_iter_task_vma__destroy(skel); +} + void test_bpf_iter(void) { if (test__start_subtest("btf_id_or_null")) @@ -1149,6 +1245,8 @@ void test_bpf_iter(void) test_task_stack(); if (test__start_subtest("task_file")) test_task_file(); + if (test__start_subtest("task_vma")) + test_task_vma(); if (test__start_subtest("task_btf")) test_task_btf(); if (test__start_subtest("tcp4")) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 9a8f47fc0b91..37c5494a0381 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <linux/err.h> +#include <netinet/tcp.h> #include <test_progs.h> #include "bpf_dctcp.skel.h" #include "bpf_cubic.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8ae97e2a4b9d..6a7ee7420701 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -914,7 +914,7 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Member exceeds struct_size", }, -/* Test member exeeds the size of struct +/* Test member exceeds the size of struct * * struct A { * int m; @@ -948,7 +948,7 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Member exceeds struct_size", }, -/* Test member exeeds the size of struct +/* Test member exceeds the size of struct * * struct A { * int m; @@ -3509,6 +3509,27 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 3 /* arr_t */, .max_entries = 4, }, +/* + * elf .rodata section size 4 and btf .rodata section vlen 0. + */ +{ + .descr = "datasec: vlen == 0", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* .rodata section */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 0), 4), + /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0.rodata"), + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, }; /* struct btf_raw_test raw_tests[] */ diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index 76ebe4c250f1..eb90a6b8850d 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -20,39 +20,6 @@ static __u32 bpf_map_id(struct bpf_map *map) return info.id; } -/* - * Trigger synchronize_rcu() in kernel. - * - * ARRAY_OF_MAPS/HASH_OF_MAPS lookup/update operations trigger synchronize_rcu() - * if looking up an existing non-NULL element or updating the map with a valid - * inner map FD. Use this fact to trigger synchronize_rcu(): create map-in-map, - * create a trivial ARRAY map, update map-in-map with ARRAY inner map. Then - * cleanup. At the end, at least one synchronize_rcu() would be called. - */ -static int kern_sync_rcu(void) -{ - int inner_map_fd, outer_map_fd, err, zero = 0; - - inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0); - if (CHECK(inner_map_fd < 0, "inner_map_create", "failed %d\n", -errno)) - return -1; - - outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, - sizeof(int), inner_map_fd, 1, 0); - if (CHECK(outer_map_fd < 0, "outer_map_create", "failed %d\n", -errno)) { - close(inner_map_fd); - return -1; - } - - err = bpf_map_update_elem(outer_map_fd, &zero, &inner_map_fd, 0); - if (err) - err = -errno; - CHECK(err, "outer_map_update", "failed %d\n", err); - close(inner_map_fd); - close(outer_map_fd); - return err; -} - static void test_lookup_update(void) { int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index b549fcfacc0b..0a1fc9816cef 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -45,13 +45,13 @@ static int prog_load_cnt(int verdict, int val) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), BPF_MOV64_IMM(BPF_REG_1, val), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), BPF_MOV64_IMM(BPF_REG_2, 0), diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c new file mode 100644 index 000000000000..36af1c138faf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Jesper Dangaard Brouer */ + +#include <linux/if_link.h> /* before test_progs.h, avoid bpf_util.h redefines */ +#include <test_progs.h> +#include "test_check_mtu.skel.h" +#include "network_helpers.h" + +#include <stdlib.h> +#include <inttypes.h> + +#define IFINDEX_LO 1 + +static __u32 duration; /* Hint: needed for CHECK macro */ + +static int read_mtu_device_lo(void) +{ + const char *filename = "/sys/class/net/lo/mtu"; + char buf[11] = {}; + int value, n, fd; + + fd = open(filename, 0, O_RDONLY); + if (fd == -1) + return -1; + + n = read(fd, buf, sizeof(buf)); + close(fd); + + if (n == -1) + return -2; + + value = strtoimax(buf, NULL, 10); + if (errno == ERANGE) + return -3; + + return value; +} + +static void test_check_mtu_xdp_attach(void) +{ + struct bpf_link_info link_info; + __u32 link_info_len = sizeof(link_info); + struct test_check_mtu *skel; + struct bpf_program *prog; + struct bpf_link *link; + int err = 0; + int fd; + + skel = test_check_mtu__open_and_load(); + if (CHECK(!skel, "open and load skel", "failed")) + return; /* Exit if e.g. helper unknown to kernel */ + + prog = skel->progs.xdp_use_helper_basic; + + link = bpf_program__attach_xdp(prog, IFINDEX_LO); + if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + goto out; + skel->links.xdp_use_helper_basic = link; + + memset(&link_info, 0, sizeof(link_info)); + fd = bpf_link__fd(link); + err = bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + if (CHECK(err, "link_info", "failed: %d\n", err)) + goto out; + + CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type", + "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP); + CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex", + "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO); + + err = bpf_link__detach(link); + CHECK(err, "link_detach", "failed %d\n", err); + +out: + test_check_mtu__destroy(skel); +} + +static void test_check_mtu_run_xdp(struct test_check_mtu *skel, + struct bpf_program *prog, + __u32 mtu_expect) +{ + const char *prog_name = bpf_program__name(prog); + int retval_expect = XDP_PASS; + __u32 mtu_result = 0; + char buf[256] = {}; + int err; + struct bpf_prog_test_run_attr tattr = { + .repeat = 1, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .prog_fd = bpf_program__fd(prog), + }; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != 0, "bpf_prog_test_run", + "prog_name:%s (err %d errno %d retval %d)\n", + prog_name, err, errno, tattr.retval); + + CHECK(tattr.retval != retval_expect, "retval", + "progname:%s unexpected retval=%d expected=%d\n", + prog_name, tattr.retval, retval_expect); + + /* Extract MTU that BPF-prog got */ + mtu_result = skel->bss->global_bpf_mtu_xdp; + ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user"); +} + + +static void test_check_mtu_xdp(__u32 mtu, __u32 ifindex) +{ + struct test_check_mtu *skel; + int err; + + skel = test_check_mtu__open(); + if (CHECK(!skel, "skel_open", "failed")) + return; + + /* Update "constants" in BPF-prog *BEFORE* libbpf load */ + skel->rodata->GLOBAL_USER_MTU = mtu; + skel->rodata->GLOBAL_USER_IFINDEX = ifindex; + + err = test_check_mtu__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_check_mtu_run_xdp(skel, skel->progs.xdp_use_helper, mtu); + test_check_mtu_run_xdp(skel, skel->progs.xdp_exceed_mtu, mtu); + test_check_mtu_run_xdp(skel, skel->progs.xdp_minus_delta, mtu); + +cleanup: + test_check_mtu__destroy(skel); +} + +static void test_check_mtu_run_tc(struct test_check_mtu *skel, + struct bpf_program *prog, + __u32 mtu_expect) +{ + const char *prog_name = bpf_program__name(prog); + int retval_expect = BPF_OK; + __u32 mtu_result = 0; + char buf[256] = {}; + int err; + struct bpf_prog_test_run_attr tattr = { + .repeat = 1, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .prog_fd = bpf_program__fd(prog), + }; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != 0, "bpf_prog_test_run", + "prog_name:%s (err %d errno %d retval %d)\n", + prog_name, err, errno, tattr.retval); + + CHECK(tattr.retval != retval_expect, "retval", + "progname:%s unexpected retval=%d expected=%d\n", + prog_name, tattr.retval, retval_expect); + + /* Extract MTU that BPF-prog got */ + mtu_result = skel->bss->global_bpf_mtu_tc; + ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user"); +} + + +static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) +{ + struct test_check_mtu *skel; + int err; + + skel = test_check_mtu__open(); + if (CHECK(!skel, "skel_open", "failed")) + return; + + /* Update "constants" in BPF-prog *BEFORE* libbpf load */ + skel->rodata->GLOBAL_USER_MTU = mtu; + skel->rodata->GLOBAL_USER_IFINDEX = ifindex; + + err = test_check_mtu__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_check_mtu_run_tc(skel, skel->progs.tc_use_helper, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu_da, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu); +cleanup: + test_check_mtu__destroy(skel); +} + +void test_check_mtu(void) +{ + __u32 mtu_lo; + + if (test__start_subtest("bpf_check_mtu XDP-attach")) + test_check_mtu_xdp_attach(); + + mtu_lo = read_mtu_device_lo(); + if (CHECK(mtu_lo < 0, "reading MTU value", "failed (err:%d)", mtu_lo)) + return; + + if (test__start_subtest("bpf_check_mtu XDP-run")) + test_check_mtu_xdp(mtu_lo, 0); + + if (test__start_subtest("bpf_check_mtu XDP-run ifindex-lookup")) + test_check_mtu_xdp(mtu_lo, IFINDEX_LO); + + if (test__start_subtest("bpf_check_mtu TC-run")) + test_check_mtu_tc(mtu_lo, 0); + + if (test__start_subtest("bpf_check_mtu TC-run ifindex-lookup")) + test_check_mtu_tc(mtu_lo, IFINDEX_LO); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 9781d85cb223..e075d03ab630 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -7,6 +7,7 @@ #include <string.h> #include <linux/pkt_cls.h> +#include <netinet/tcp.h> #include <test_progs.h> diff --git a/tools/testing/selftests/bpf/prog_tests/core_read_macros.c b/tools/testing/selftests/bpf/prog_tests/core_read_macros.c new file mode 100644 index 000000000000..96f5cf3c6fa2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_read_macros.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> + +struct callback_head { + struct callback_head *next; + void (*func)(struct callback_head *head); +}; + +/* ___shuffled flavor is just an illusion for BPF code, it doesn't really + * exist and user-space needs to provide data in the memory layout that + * matches callback_head. We just defined ___shuffled flavor to make it easier + * to work with the skeleton + */ +struct callback_head___shuffled { + struct callback_head___shuffled *next; + void (*func)(struct callback_head *head); +}; + +#include "test_core_read_macros.skel.h" + +void test_core_read_macros(void) +{ + int duration = 0, err; + struct test_core_read_macros* skel; + struct test_core_read_macros__bss *bss; + struct callback_head u_probe_in; + struct callback_head___shuffled u_core_in; + + skel = test_core_read_macros__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + bss->my_pid = getpid(); + + /* next pointers have to be set from the kernel side */ + bss->k_probe_in.func = (void *)(long)0x1234; + bss->k_core_in.func = (void *)(long)0xabcd; + + u_probe_in.next = &u_probe_in; + u_probe_in.func = (void *)(long)0x5678; + bss->u_probe_in = &u_probe_in; + + u_core_in.next = &u_core_in; + u_core_in.func = (void *)(long)0xdbca; + bss->u_core_in = &u_core_in; + + err = test_core_read_macros__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + ASSERT_EQ(bss->k_probe_out, 0x1234, "k_probe_out"); + ASSERT_EQ(bss->k_core_out, 0xabcd, "k_core_out"); + + ASSERT_EQ(bss->u_probe_out, 0x5678, "u_probe_out"); + ASSERT_EQ(bss->u_core_out, 0xdbca, "u_core_out"); + +cleanup: + test_core_read_macros__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index 3b9dbf7433f0..7c9b62e971f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -2,8 +2,8 @@ /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -/* x86-64 fits 55 JITed and 43 interpreted progs into half page */ -#define CNT 40 +/* that's kernel internal BPF_MAX_TRAMP_PROGS define */ +#define CNT 38 void test_fexit_stress(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c new file mode 100644 index 000000000000..8bcc2869102f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "test_progs.h" +#include "network_helpers.h" + +static __u32 duration; + +static void test_global_func_args0(struct bpf_object *obj) +{ + int err, i, map_fd, actual_value; + const char *map_name = "values"; + + map_fd = bpf_find_map(__func__, obj, map_name); + if (CHECK(map_fd < 0, "bpf_find_map", "cannot find BPF map %s: %s\n", + map_name, strerror(errno))) + return; + + struct { + const char *descr; + int expected_value; + } tests[] = { + {"passing NULL pointer", 0}, + {"returning value", 1}, + {"reading local variable", 100 }, + {"writing local variable", 101 }, + {"reading global variable", 42 }, + {"writing global variable", 43 }, + {"writing to pointer-to-pointer", 1 }, + }; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + const int expected_value = tests[i].expected_value; + + err = bpf_map_lookup_elem(map_fd, &i, &actual_value); + + CHECK(err || actual_value != expected_value, tests[i].descr, + "err %d result %d expected %d\n", err, actual_value, expected_value); + } +} + +void test_global_func_args(void) +{ + const char *file = "./test_global_func_args.o"; + __u32 retval; + struct bpf_object *obj; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + if (CHECK(err, "load program", "error %d loading %s\n", err, file)) + return; + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "pass global func args run", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + test_global_func_args0(obj); + + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c new file mode 100644 index 000000000000..4c232b456479 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "test_ksyms_module.skel.h" + +static int duration; + +void test_ksyms_module(void) +{ + struct test_ksyms_module* skel; + int err; + + skel = test_ksyms_module__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = test_ksyms_module__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + ASSERT_EQ(skel->bss->triggered, true, "triggered"); + ASSERT_EQ(skel->bss->out_mod_ksym_global, 123, "global_ksym_val"); + +cleanup: + test_ksyms_module__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 50796b651f72..5bc53d53d86e 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -21,9 +21,34 @@ static int trigger_module_test_read(int read_sz) return 0; } +static int trigger_module_test_write(int write_sz) +{ + int fd, err; + char *buf = malloc(write_sz); + + if (!buf) + return -ENOMEM; + + memset(buf, 'a', write_sz); + buf[write_sz-1] = '\0'; + + fd = open("/sys/kernel/bpf_testmod", O_WRONLY); + err = -errno; + if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) { + free(buf); + return err; + } + + write(fd, buf, write_sz); + close(fd); + free(buf); + return 0; +} + void test_module_attach(void) { const int READ_SZ = 456; + const int WRITE_SZ = 457; struct test_module_attach* skel; struct test_module_attach__bss *bss; int err; @@ -48,8 +73,10 @@ void test_module_attach(void) /* trigger tracepoint */ ASSERT_OK(trigger_module_test_read(READ_SZ), "trigger_read"); + ASSERT_OK(trigger_module_test_write(WRITE_SZ), "trigger_write"); ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp"); + ASSERT_EQ(bss->raw_tp_bare_write_sz, WRITE_SZ, "raw_tp_bare"); ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf"); ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry"); ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual"); diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index e74dc501b27f..31a3114906e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -1,85 +1,87 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ + +#define _GNU_SOURCE #include <test_progs.h> +#include "test_ns_current_pid_tgid.skel.h" #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> #include <sys/syscall.h> +#include <sched.h> +#include <sys/wait.h> +#include <sys/mount.h> +#include <sys/fcntl.h> -struct bss { - __u64 dev; - __u64 ino; - __u64 pid_tgid; - __u64 user_pid_tgid; -}; +#define STACK_SIZE (1024 * 1024) +static char child_stack[STACK_SIZE]; -void test_ns_current_pid_tgid(void) +static int test_current_pid_tgid(void *args) { - const char *probe_name = "raw_tracepoint/sys_enter"; - const char *file = "test_ns_current_pid_tgid.o"; - int err, key = 0, duration = 0; - struct bpf_link *link = NULL; - struct bpf_program *prog; - struct bpf_map *bss_map; - struct bpf_object *obj; - struct bss bss; + struct test_ns_current_pid_tgid__bss *bss; + struct test_ns_current_pid_tgid *skel; + int err = -1, duration = 0; + pid_t tgid, pid; struct stat st; - __u64 id; - - obj = bpf_object__open_file(file, NULL); - if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) - return; - err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) + skel = test_ns_current_pid_tgid__open_and_load(); + if (CHECK(!skel, "skel_open_load", "failed to load skeleton\n")) goto cleanup; - bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); - if (CHECK(!bss_map, "find_bss_map", "failed\n")) + pid = syscall(SYS_gettid); + tgid = getpid(); + + err = stat("/proc/self/ns/pid", &st); + if (CHECK(err, "stat", "failed /proc/self/ns/pid: %d\n", err)) goto cleanup; - prog = bpf_object__find_program_by_title(obj, probe_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", - probe_name)) + bss = skel->bss; + bss->dev = st.st_dev; + bss->ino = st.st_ino; + bss->user_pid = 0; + bss->user_tgid = 0; + + err = test_ns_current_pid_tgid__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) goto cleanup; - memset(&bss, 0, sizeof(bss)); - pid_t tid = syscall(SYS_gettid); - pid_t pid = getpid(); + /* trigger tracepoint */ + usleep(1); + ASSERT_EQ(bss->user_pid, pid, "pid"); + ASSERT_EQ(bss->user_tgid, tgid, "tgid"); + err = 0; - id = (__u64) tid << 32 | pid; - bss.user_pid_tgid = id; +cleanup: + test_ns_current_pid_tgid__destroy(skel); - if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) { - perror("Failed to stat /proc/self/ns/pid"); - goto cleanup; - } + return err; +} - bss.dev = st.st_dev; - bss.ino = st.st_ino; +static void test_ns_current_pid_tgid_new_ns(void) +{ + int wstatus, duration = 0; + pid_t cpid; - err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); - if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err)) - goto cleanup; + /* Create a process in a new namespace, this process + * will be the init process of this new namespace hence will be pid 1. + */ + cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE, + CLONE_NEWPID | SIGCHLD, NULL); - link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", - PTR_ERR(link))) { - link = NULL; - goto cleanup; - } + if (CHECK(cpid == -1, "clone", strerror(errno))) + return; - /* trigger some syscalls */ - usleep(1); + if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno))) + return; - err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); - if (CHECK(err, "set_bss", "failed to get bss : %d\n", err)) - goto cleanup; + if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed")) + return; +} - if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", - "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) - goto cleanup; -cleanup: - bpf_link__destroy(link); - bpf_object__close(obj); +void test_ns_current_pid_tgid(void) +{ + if (test__start_subtest("ns_current_pid_tgid_root_ns")) + test_current_pid_tgid(NULL); + if (test__start_subtest("ns_current_pid_tgid_new_ns")) + test_ns_current_pid_tgid_new_ns(); } diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c new file mode 100644 index 000000000000..0e378d63fe18 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/recursion.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "recursion.skel.h" + +void test_recursion(void) +{ + struct bpf_prog_info prog_info = {}; + __u32 prog_info_len = sizeof(prog_info); + struct recursion *skel; + int key = 0; + int err; + + skel = recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = recursion__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0"); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1"); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2"); + + ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0"); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1"); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2"); + + err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_lookup), + &prog_info, &prog_info_len); + if (!ASSERT_OK(err, "get_prog_info")) + goto out; + ASSERT_EQ(prog_info.recursion_misses, 2, "recursion_misses"); +out: + recursion__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/socket_cookie.c b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c new file mode 100644 index 000000000000..232db28dde18 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Google LLC. +// Copyright (c) 2018 Facebook + +#include <test_progs.h> +#include "socket_cookie_prog.skel.h" +#include "network_helpers.h" + +static int duration; + +struct socket_cookie { + __u64 cookie_key; + __u32 cookie_value; +}; + +void test_socket_cookie(void) +{ + int server_fd = 0, client_fd = 0, cgroup_fd = 0, err = 0; + socklen_t addr_len = sizeof(struct sockaddr_in6); + struct socket_cookie_prog *skel; + __u32 cookie_expected_value; + struct sockaddr_in6 addr; + struct socket_cookie val; + + skel = socket_cookie_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + cgroup_fd = test__join_cgroup("/socket_cookie"); + if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n")) + goto out; + + skel->links.set_cookie = bpf_program__attach_cgroup( + skel->progs.set_cookie, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach")) + goto close_cgroup_fd; + + skel->links.update_cookie_sockops = bpf_program__attach_cgroup( + skel->progs.update_cookie_sockops, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.update_cookie_sockops, "prog_attach")) + goto close_cgroup_fd; + + skel->links.update_cookie_tracing = bpf_program__attach( + skel->progs.update_cookie_tracing); + if (!ASSERT_OK_PTR(skel->links.update_cookie_tracing, "prog_attach")) + goto close_cgroup_fd; + + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno)) + goto close_cgroup_fd; + + client_fd = connect_to_fd(server_fd, 0); + if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno)) + goto close_server_fd; + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.socket_cookies), + &client_fd, &val); + if (!ASSERT_OK(err, "map_lookup(socket_cookies)")) + goto close_client_fd; + + err = getsockname(client_fd, (struct sockaddr *)&addr, &addr_len); + if (!ASSERT_OK(err, "getsockname")) + goto close_client_fd; + + cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; + ASSERT_EQ(val.cookie_value, cookie_expected_value, "cookie_value"); + +close_client_fd: + close(client_fd); +close_server_fd: + close(server_fd); +close_cgroup_fd: + close(cgroup_fd); +out: + socket_cookie_prog__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 85f73261fab0..b8b48cac2ac3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Cloudflare #include <error.h> +#include <netinet/tcp.h> #include "test_progs.h" #include "test_skmsg_load_helpers.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index b25c9c45c148..d5b44b135c00 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -2,6 +2,12 @@ #include <test_progs.h> #include "cgroup_helpers.h" +#include <linux/tcp.h> + +#ifndef SOL_TCP +#define SOL_TCP IPPROTO_TCP +#endif + #define SOL_CUSTOM 0xdeadbeef static int getsetsockopt(void) @@ -11,6 +17,7 @@ static int getsetsockopt(void) char u8[4]; __u32 u32; char cc[16]; /* TCP_CA_NAME_MAX */ + struct tcp_zerocopy_receive zc; } buf = {}; socklen_t optlen; char *big_buf = NULL; @@ -154,6 +161,27 @@ static int getsetsockopt(void) goto err; } + /* TCP_ZEROCOPY_RECEIVE triggers */ + memset(&buf, 0, sizeof(buf)); + optlen = sizeof(buf.zc); + err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); + if (err) { + log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", + err, errno); + goto err; + } + + memset(&buf, 0, sizeof(buf)); + buf.zc.address = 12345; /* rejected by BPF */ + optlen = sizeof(buf.zc); + errno = 0; + err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); + if (errno != EPERM) { + log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", + err, errno); + goto err; + } + free(big_buf); close(fd); return 0; diff --git a/tools/testing/selftests/bpf/prog_tests/stack_var_off.c b/tools/testing/selftests/bpf/prog_tests/stack_var_off.c new file mode 100644 index 000000000000..2ce9deefa59c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stack_var_off.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "test_stack_var_off.skel.h" + +/* Test read and writes to the stack performed with offsets that are not + * statically known. + */ +void test_stack_var_off(void) +{ + int duration = 0; + struct test_stack_var_off *skel; + + skel = test_stack_var_off__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + /* Give pid to bpf prog so it doesn't trigger for anyone else. */ + skel->bss->test_pid = getpid(); + /* Initialize the probe's input. */ + skel->bss->input[0] = 2; + skel->bss->input[1] = 42; /* This will be returned in probe_res. */ + + if (!ASSERT_OK(test_stack_var_off__attach(skel), "skel_attach")) + goto cleanup; + + /* Trigger probe. */ + usleep(1); + + if (CHECK(skel->bss->probe_res != 42, "check_probe_res", + "wrong probe res: %d\n", skel->bss->probe_res)) + goto cleanup; + +cleanup: + test_stack_var_off__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 32e4348b714b..7e13129f593a 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -61,6 +61,14 @@ void test_test_global_funcs(void) { "test_global_func6.o" , "modified ctx ptr R2" }, { "test_global_func7.o" , "foo() doesn't return scalar" }, { "test_global_func8.o" }, + { "test_global_func9.o" }, + { "test_global_func10.o", "invalid indirect read from stack" }, + { "test_global_func11.o", "Caller passes invalid args into func#1" }, + { "test_global_func12.o", "invalid mem access 'mem_or_null'" }, + { "test_global_func13.o", "Caller passes invalid args into func#1" }, + { "test_global_func14.o", "reference type('FWD S') size cannot be determined" }, + { "test_global_func15.o", "At program exit the register R0 has value" }, + { "test_global_func16.o", "invalid indirect read from stack" }, }; libbpf_print_fn_t old_print_fn = NULL; int err, i, duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c index 61fca681d524..b54bc0c351b7 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_ima.c +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -9,6 +9,7 @@ #include <unistd.h> #include <sys/wait.h> #include <test_progs.h> +#include <linux/ring_buffer.h> #include "ima.skel.h" @@ -31,9 +32,18 @@ static int run_measured_process(const char *measured_dir, u32 *monitored_pid) return -EINVAL; } +static u64 ima_hash_from_bpf; + +static int process_sample(void *ctx, void *data, size_t len) +{ + ima_hash_from_bpf = *((u64 *)data); + return 0; +} + void test_test_ima(void) { char measured_dir_template[] = "/tmp/ima_measuredXXXXXX"; + struct ring_buffer *ringbuf; const char *measured_dir; char cmd[256]; @@ -44,6 +54,11 @@ void test_test_ima(void) if (CHECK(!skel, "skel_load", "skeleton failed\n")) goto close_prog; + ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), + process_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ringbuf")) + goto close_prog; + err = ima__attach(skel); if (CHECK(err, "attach", "attach failed: %d\n", err)) goto close_prog; @@ -60,11 +75,9 @@ void test_test_ima(void) if (CHECK(err, "run_measured_process", "err = %d\n", err)) goto close_clean; - CHECK(skel->data->ima_hash_ret < 0, "ima_hash_ret", - "ima_hash_ret = %ld\n", skel->data->ima_hash_ret); - - CHECK(skel->bss->ima_hash == 0, "ima_hash", - "ima_hash = %lu\n", skel->bss->ima_hash); + err = ring_buffer__consume(ringbuf); + ASSERT_EQ(err, 1, "num_samples_or_err"); + ASSERT_NEQ(ima_hash_from_bpf, 0, "ima_hash"); close_clean: snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir); diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index c0fe73a17ed1..d2c16eaae367 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -34,61 +34,6 @@ struct storage { struct bpf_spin_lock lock; }; -/* Copies an rm binary to a temp file. dest is a mkstemp template */ -static int copy_rm(char *dest) -{ - int fd_in, fd_out = -1, ret = 0; - struct stat stat; - char *buf = NULL; - - fd_in = open("/bin/rm", O_RDONLY); - if (fd_in < 0) - return -errno; - - fd_out = mkstemp(dest); - if (fd_out < 0) { - ret = -errno; - goto out; - } - - ret = fstat(fd_in, &stat); - if (ret == -1) { - ret = -errno; - goto out; - } - - buf = malloc(stat.st_blksize); - if (!buf) { - ret = -errno; - goto out; - } - - while (ret = read(fd_in, buf, stat.st_blksize), ret > 0) { - ret = write(fd_out, buf, ret); - if (ret < 0) { - ret = -errno; - goto out; - - } - } - if (ret < 0) { - ret = -errno; - goto out; - - } - - /* Set executable permission on the copied file */ - ret = chmod(dest, 0100); - if (ret == -1) - ret = -errno; - -out: - free(buf); - close(fd_in); - close(fd_out); - return ret; -} - /* Fork and exec the provided rm binary and return the exit code of the * forked process and its pid. */ @@ -168,9 +113,11 @@ static bool check_syscall_operations(int map_fd, int obj_fd) void test_test_local_storage(void) { - char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX"; + char tmp_dir_path[] = "/tmp/local_storageXXXXXX"; int err, serv_sk = -1, task_fd = -1, rm_fd = -1; struct local_storage *skel = NULL; + char tmp_exec_path[64]; + char cmd[256]; skel = local_storage__open_and_load(); if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) @@ -189,18 +136,24 @@ void test_test_local_storage(void) task_fd)) goto close_prog; - err = copy_rm(tmp_exec_path); - if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno)) + if (CHECK(!mkdtemp(tmp_dir_path), "mkdtemp", + "unable to create tmpdir: %d\n", errno)) goto close_prog; + snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm", + tmp_dir_path); + snprintf(cmd, sizeof(cmd), "cp /bin/rm %s", tmp_exec_path); + if (CHECK_FAIL(system(cmd))) + goto close_prog_rmdir; + rm_fd = open(tmp_exec_path, O_RDONLY); if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d", tmp_exec_path, rm_fd, errno)) - goto close_prog; + goto close_prog_rmdir; if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map), rm_fd)) - goto close_prog; + goto close_prog_rmdir; /* Sets skel->bss->monitored_pid to the pid of the forked child * forks a child process that executes tmp_exec_path and tries to @@ -209,33 +162,36 @@ void test_test_local_storage(void) */ err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path); if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err)) - goto close_prog_unlink; + goto close_prog_rmdir; /* Set the process being monitored to be the current process */ skel->bss->monitored_pid = getpid(); - /* Remove the temporary created executable */ - err = unlink(tmp_exec_path); - if (CHECK(err != 0, "unlink", "unable to unlink %s: %d", tmp_exec_path, - errno)) - goto close_prog_unlink; + /* Move copy_of_rm to a new location so that it triggers the + * inode_rename LSM hook with a new_dentry that has a NULL inode ptr. + */ + snprintf(cmd, sizeof(cmd), "mv %s/copy_of_rm %s/check_null_ptr", + tmp_dir_path, tmp_dir_path); + if (CHECK_FAIL(system(cmd))) + goto close_prog_rmdir; CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", "inode_local_storage not set\n"); serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) - goto close_prog; + goto close_prog_rmdir; CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", "sk_local_storage not set\n"); if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map), serv_sk)) - goto close_prog; + goto close_prog_rmdir; -close_prog_unlink: - unlink(tmp_exec_path); +close_prog_rmdir: + snprintf(cmd, sizeof(cmd), "rm -rf %s", tmp_dir_path); + system(cmd); close_prog: close(serv_sk); close(rm_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index 6ab29226c99b..2755e4f81499 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -10,7 +10,6 @@ #include <unistd.h> #include <malloc.h> #include <stdlib.h> -#include <unistd.h> #include "lsm.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index 781c8d11604b..f3022d934e2d 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -4,7 +4,7 @@ #include <sys/prctl.h> #include <test_progs.h> -#define MAX_TRAMP_PROGS 40 +#define MAX_TRAMP_PROGS 38 struct inst { struct bpf_object *obj; @@ -52,7 +52,7 @@ void test_trampoline_count(void) struct bpf_link *link; char comm[16] = {}; - /* attach 'allowed' 40 trampoline programs */ + /* attach 'allowed' trampoline programs */ for (i = 0; i < MAX_TRAMP_PROGS; i++) { obj = bpf_object__open_file(object, NULL); if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { diff --git a/tools/testing/selftests/bpf/progs/atomic_bounds.c b/tools/testing/selftests/bpf/progs/atomic_bounds.c new file mode 100644 index 000000000000..e5fff7fc7f8f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/atomic_bounds.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> + +#ifdef ENABLE_ATOMICS_TESTS +bool skip_tests __attribute((__section__(".data"))) = false; +#else +bool skip_tests = true; +#endif + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(sub, int x) +{ +#ifdef ENABLE_ATOMICS_TESTS + int a = 0; + int b = __sync_fetch_and_add(&a, 1); + /* b is certainly 0 here. Can the verifier tell? */ + while (b) + continue; +#endif + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c new file mode 100644 index 000000000000..c245345e41ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/atomics.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> + +#ifdef ENABLE_ATOMICS_TESTS +bool skip_tests __attribute((__section__(".data"))) = false; +#else +bool skip_tests = true; +#endif + +__u64 add64_value = 1; +__u64 add64_result = 0; +__u32 add32_value = 1; +__u32 add32_result = 0; +__u64 add_stack_value_copy = 0; +__u64 add_stack_result = 0; +__u64 add_noreturn_value = 1; + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(add, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + __u64 add_stack_value = 1; + + add64_result = __sync_fetch_and_add(&add64_value, 2); + add32_result = __sync_fetch_and_add(&add32_value, 2); + add_stack_result = __sync_fetch_and_add(&add_stack_value, 2); + add_stack_value_copy = add_stack_value; + __sync_fetch_and_add(&add_noreturn_value, 2); +#endif + + return 0; +} + +__s64 sub64_value = 1; +__s64 sub64_result = 0; +__s32 sub32_value = 1; +__s32 sub32_result = 0; +__s64 sub_stack_value_copy = 0; +__s64 sub_stack_result = 0; +__s64 sub_noreturn_value = 1; + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(sub, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + __u64 sub_stack_value = 1; + + sub64_result = __sync_fetch_and_sub(&sub64_value, 2); + sub32_result = __sync_fetch_and_sub(&sub32_value, 2); + sub_stack_result = __sync_fetch_and_sub(&sub_stack_value, 2); + sub_stack_value_copy = sub_stack_value; + __sync_fetch_and_sub(&sub_noreturn_value, 2); +#endif + + return 0; +} + +__u64 and64_value = (0x110ull << 32); +__u64 and64_result = 0; +__u32 and32_value = 0x110; +__u32 and32_result = 0; +__u64 and_noreturn_value = (0x110ull << 32); + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(and, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + + and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32); + and32_result = __sync_fetch_and_and(&and32_value, 0x011); + __sync_fetch_and_and(&and_noreturn_value, 0x011ull << 32); +#endif + + return 0; +} + +__u64 or64_value = (0x110ull << 32); +__u64 or64_result = 0; +__u32 or32_value = 0x110; +__u32 or32_result = 0; +__u64 or_noreturn_value = (0x110ull << 32); + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(or, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32); + or32_result = __sync_fetch_and_or(&or32_value, 0x011); + __sync_fetch_and_or(&or_noreturn_value, 0x011ull << 32); +#endif + + return 0; +} + +__u64 xor64_value = (0x110ull << 32); +__u64 xor64_result = 0; +__u32 xor32_value = 0x110; +__u32 xor32_result = 0; +__u64 xor_noreturn_value = (0x110ull << 32); + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(xor, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32); + xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011); + __sync_fetch_and_xor(&xor_noreturn_value, 0x011ull << 32); +#endif + + return 0; +} + +__u64 cmpxchg64_value = 1; +__u64 cmpxchg64_result_fail = 0; +__u64 cmpxchg64_result_succeed = 0; +__u32 cmpxchg32_value = 1; +__u32 cmpxchg32_result_fail = 0; +__u32 cmpxchg32_result_succeed = 0; + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(cmpxchg, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3); + cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2); + + cmpxchg32_result_fail = __sync_val_compare_and_swap(&cmpxchg32_value, 0, 3); + cmpxchg32_result_succeed = __sync_val_compare_and_swap(&cmpxchg32_value, 1, 2); +#endif + + return 0; +} + +__u64 xchg64_value = 1; +__u64 xchg64_result = 0; +__u32 xchg32_value = 1; +__u32 xchg32_result = 0; + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(xchg, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + __u64 val64 = 2; + __u32 val32 = 2; + + xchg64_result = __sync_lock_test_and_set(&xchg64_value, val64); + xchg32_result = __sync_lock_test_and_set(&xchg32_value, val32); +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c index c6520f21f5f5..115a3b0ad984 100644 --- a/tools/testing/selftests/bpf/progs/bind4_prog.c +++ b/tools/testing/selftests/bpf/progs/bind4_prog.c @@ -29,18 +29,48 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) char veth2[IFNAMSIZ] = "test_sock_addr2"; char missing[IFNAMSIZ] = "nonexistent_dev"; char del_bind[IFNAMSIZ] = ""; + int veth1_idx, veth2_idx; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &veth1, sizeof(veth1))) + &veth1, sizeof(veth1))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth1_idx, sizeof(veth1_idx)) || !veth1_idx) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &veth2, sizeof(veth2))) + &veth2, sizeof(veth2))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth2_idx, sizeof(veth2_idx)) || !veth2_idx || + veth1_idx == veth2_idx) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &missing, sizeof(missing)) != -ENODEV) + &missing, sizeof(missing)) != -ENODEV) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth1_idx, sizeof(veth1_idx))) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &del_bind, sizeof(del_bind))) + &del_bind, sizeof(del_bind))) + return 1; + + return 0; +} + +static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt) +{ + int old, tmp, new = 0xeb9f; + + /* Socket in test case has guarantee that old never equals to new. */ + if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old)) || + old == new) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &new, sizeof(new))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &tmp, sizeof(tmp)) || + tmp != new) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old))) return 1; return 0; @@ -93,6 +123,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx) if (bind_to_device(ctx)) return 0; + /* Test for misc socket options. */ + if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY)) + return 0; + ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP); ctx->user_port = bpf_htons(SERV4_REWRITE_PORT); diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c index 4358e44dcf47..4c0d348034b9 100644 --- a/tools/testing/selftests/bpf/progs/bind6_prog.c +++ b/tools/testing/selftests/bpf/progs/bind6_prog.c @@ -35,18 +35,48 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) char veth2[IFNAMSIZ] = "test_sock_addr2"; char missing[IFNAMSIZ] = "nonexistent_dev"; char del_bind[IFNAMSIZ] = ""; + int veth1_idx, veth2_idx; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &veth1, sizeof(veth1))) + &veth1, sizeof(veth1))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth1_idx, sizeof(veth1_idx)) || !veth1_idx) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &veth2, sizeof(veth2))) + &veth2, sizeof(veth2))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth2_idx, sizeof(veth2_idx)) || !veth2_idx || + veth1_idx == veth2_idx) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &missing, sizeof(missing)) != -ENODEV) + &missing, sizeof(missing)) != -ENODEV) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth1_idx, sizeof(veth1_idx))) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &del_bind, sizeof(del_bind))) + &del_bind, sizeof(del_bind))) + return 1; + + return 0; +} + +static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt) +{ + int old, tmp, new = 0xeb9f; + + /* Socket in test case has guarantee that old never equals to new. */ + if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old)) || + old == new) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &new, sizeof(new))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &tmp, sizeof(tmp)) || + tmp != new) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old))) return 1; return 0; @@ -107,6 +137,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx) if (bind_to_device(ctx)) return 0; + /* Test for misc socket options. */ + if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY)) + return 0; + ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0); ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1); ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2); diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c b/tools/testing/selftests/bpf/progs/bind_perm.c new file mode 100644 index 000000000000..7bd2a027025d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bind_perm.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +static __always_inline int bind_prog(struct bpf_sock_addr *ctx, int family) +{ + struct bpf_sock *sk; + + sk = ctx->sk; + if (!sk) + return 0; + + if (sk->family != family) + return 0; + + if (ctx->type != SOCK_STREAM) + return 0; + + /* Return 1 OR'ed with the first bit set to indicate + * that CAP_NET_BIND_SERVICE should be bypassed. + */ + if (ctx->user_port == bpf_htons(111)) + return (1 | 2); + + return 1; +} + +SEC("cgroup/bind4") +int bind_v4_prog(struct bpf_sock_addr *ctx) +{ + return bind_prog(ctx, AF_INET); +} + +SEC("cgroup/bind6") +int bind_v6_prog(struct bpf_sock_addr *ctx) +{ + return bind_prog(ctx, AF_INET6); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 6a1255465fd6..3d83b185c4bc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -7,6 +7,7 @@ #define bpf_iter__netlink bpf_iter__netlink___not_used #define bpf_iter__task bpf_iter__task___not_used #define bpf_iter__task_file bpf_iter__task_file___not_used +#define bpf_iter__task_vma bpf_iter__task_vma___not_used #define bpf_iter__tcp bpf_iter__tcp___not_used #define tcp6_sock tcp6_sock___not_used #define bpf_iter__udp bpf_iter__udp___not_used @@ -26,6 +27,7 @@ #undef bpf_iter__netlink #undef bpf_iter__task #undef bpf_iter__task_file +#undef bpf_iter__task_vma #undef bpf_iter__tcp #undef tcp6_sock #undef bpf_iter__udp @@ -67,6 +69,12 @@ struct bpf_iter__task_file { struct file *file; } __attribute__((preserve_access_index)); +struct bpf_iter__task_vma { + struct bpf_iter_meta *meta; + struct task_struct *task; + struct vm_area_struct *vma; +} __attribute__((preserve_access_index)); + struct bpf_iter__bpf_map { struct bpf_iter_meta *meta; struct bpf_map *map; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c new file mode 100644 index 000000000000..11d1aa37cf11 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +/* Copied from mm.h */ +#define VM_READ 0x00000001 +#define VM_WRITE 0x00000002 +#define VM_EXEC 0x00000004 +#define VM_MAYSHARE 0x00000080 + +/* Copied from kdev_t.h */ +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) + +#define D_PATH_BUF_SIZE 1024 +char d_path_buf[D_PATH_BUF_SIZE] = {}; +__u32 pid = 0; + +SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx) +{ + struct vm_area_struct *vma = ctx->vma; + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + struct file *file; + char perm_str[] = "----"; + + if (task == (void *)0 || vma == (void *)0) + return 0; + + file = vma->vm_file; + if (task->tgid != pid) + return 0; + perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-'; + perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-'; + perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-'; + perm_str[3] = (vma->vm_flags & VM_MAYSHARE) ? 's' : 'p'; + BPF_SEQ_PRINTF(seq, "%08llx-%08llx %s ", vma->vm_start, vma->vm_end, perm_str); + + if (file) { + __u32 dev = file->f_inode->i_sb->s_dev; + + bpf_d_path(&file->f_path, d_path_buf, D_PATH_BUF_SIZE); + + BPF_SEQ_PRINTF(seq, "%08llx ", vma->vm_pgoff << 12); + BPF_SEQ_PRINTF(seq, "%02x:%02x %u", MAJOR(dev), MINOR(dev), + file->f_inode->i_ino); + BPF_SEQ_PRINTF(seq, "\t%s\n", d_path_buf); + } else { + BPF_SEQ_PRINTF(seq, "%08llx 00:00 0\n", 0ULL); + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c index 5bfef2887e70..418d9c6d4952 100644 --- a/tools/testing/selftests/bpf/progs/bprm_opts.c +++ b/tools/testing/selftests/bpf/progs/bprm_opts.c @@ -4,7 +4,7 @@ * Copyright 2020 Google LLC. */ -#include "vmlinux.h" +#include <linux/bpf.h> #include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c index 7396308677a3..a979aaef2a76 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port4.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c @@ -10,6 +10,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include <bpf_sockopt_helpers.h> + char _license[] SEC("license") = "GPL"; int _version SEC("version") = 1; @@ -58,6 +60,9 @@ int connect4(struct bpf_sock_addr *ctx) SEC("cgroup/getsockname4") int getsockname4(struct bpf_sock_addr *ctx) { + if (!get_set_sk_priority(ctx)) + return 1; + /* Expose local server as 1.2.3.4:60000 to client. */ if (ctx->user_port == bpf_htons(60123)) { ctx->user_ip4 = bpf_htonl(0x01020304); @@ -71,6 +76,9 @@ int getpeername4(struct bpf_sock_addr *ctx) { struct svc_addr *orig; + if (!get_set_sk_priority(ctx)) + return 1; + /* Expose service 1.2.3.4:60000 as peer instead of backend. */ if (ctx->user_port == bpf_htons(60123)) { orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0); diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c index c1a2b555e9ad..afc8f1c5a9d6 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port6.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c @@ -9,6 +9,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include <bpf_sockopt_helpers.h> + char _license[] SEC("license") = "GPL"; int _version SEC("version") = 1; @@ -63,6 +65,9 @@ int connect6(struct bpf_sock_addr *ctx) SEC("cgroup/getsockname6") int getsockname6(struct bpf_sock_addr *ctx) { + if (!get_set_sk_priority(ctx)) + return 1; + /* Expose local server as [fc00::1]:60000 to client. */ if (ctx->user_port == bpf_htons(60124)) { ctx->user_ip6[0] = bpf_htonl(0xfc000000); @@ -79,6 +84,9 @@ int getpeername6(struct bpf_sock_addr *ctx) { struct svc_addr *orig; + if (!get_set_sk_priority(ctx)) + return 1; + /* Expose service [fc00::1]:60000 as peer instead of backend. */ if (ctx->user_port == bpf_htons(60124)) { orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0); diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c index 86b21aff4bc5..96060ff4ffc6 100644 --- a/tools/testing/selftests/bpf/progs/ima.c +++ b/tools/testing/selftests/bpf/progs/ima.c @@ -9,20 +9,37 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -long ima_hash_ret = -1; -u64 ima_hash = 0; u32 monitored_pid = 0; +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} ringbuf SEC(".maps"); + char _license[] SEC("license") = "GPL"; SEC("lsm.s/bprm_committed_creds") -int BPF_PROG(ima, struct linux_binprm *bprm) +void BPF_PROG(ima, struct linux_binprm *bprm) { - u32 pid = bpf_get_current_pid_tgid() >> 32; + u64 ima_hash = 0; + u64 *sample; + int ret; + u32 pid; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid == monitored_pid) { + ret = bpf_ima_inode_hash(bprm->file->f_inode, &ima_hash, + sizeof(ima_hash)); + if (ret < 0 || ima_hash == 0) + return; + + sample = bpf_ringbuf_reserve(&ringbuf, sizeof(u64), 0); + if (!sample) + return; - if (pid == monitored_pid) - ima_hash_ret = bpf_ima_inode_hash(bprm->file->f_inode, - &ima_hash, sizeof(ima_hash)); + *sample = ima_hash; + bpf_ringbuf_submit(sample, 0); + } - return 0; + return; } diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 3e3de130f28f..95868bc7ada9 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -50,7 +50,6 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; bool is_self_unlink; - int err; if (pid != monitored_pid) return 0; @@ -66,8 +65,27 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) return -EPERM; } - storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + return 0; +} + +SEC("lsm/inode_rename") +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct local_storage *storage; + int err; + + /* new_dentry->d_inode can be NULL when the inode is renamed to a file + * that did not exist before. The helper should be able to handle this + * NULL pointer. + */ + bpf_inode_storage_get(&inode_storage_map, new_dentry->d_inode, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + + storage = bpf_inode_storage_get(&inode_storage_map, old_dentry->d_inode, + 0, 0); if (!storage) return 0; @@ -76,7 +94,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) inode_storage_result = -1; bpf_spin_unlock(&storage->lock); - err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode); if (!err) inode_storage_result = err; @@ -133,37 +151,18 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, return 0; } -SEC("lsm/file_open") -int BPF_PROG(file_open, struct file *file) -{ - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct local_storage *storage; - - if (pid != monitored_pid) - return 0; - - if (!file->f_inode) - return 0; - - storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (!storage) - return 0; - - bpf_spin_lock(&storage->lock); - storage->value = DUMMY_STORAGE_VALUE; - bpf_spin_unlock(&storage->lock); - return 0; -} - /* This uses the local storage to remember the inode of the binary that a * process was originally executing. */ SEC("lsm/bprm_committed_creds") void BPF_PROG(exec, struct linux_binprm *bprm) { + __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; + if (pid != monitored_pid) + return; + storage = bpf_task_storage_get(&task_storage_map, bpf_get_current_task_btf(), 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -172,4 +171,13 @@ void BPF_PROG(exec, struct linux_binprm *bprm) storage->exec_inode = bprm->file->f_inode; bpf_spin_unlock(&storage->lock); } + + storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode, + 0, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return; + + bpf_spin_lock(&storage->lock); + storage->value = DUMMY_STORAGE_VALUE; + bpf_spin_unlock(&storage->lock); } diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index ff4d343b94b5..33694ef8acfa 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -30,6 +30,53 @@ struct { __type(value, __u64); } lru_hash SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} percpu_array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} percpu_hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} lru_percpu_hash SEC(".maps"); + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, __u64); +} inner_map SEC(".maps"); + +struct outer_arr { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct inner_map); +} outer_arr SEC(".maps") = { + .values = { [0] = &inner_map }, +}; + +struct outer_hash { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __array(values, struct inner_map); +} outer_hash SEC(".maps") = { + .values = { [0] = &inner_map }, +}; + char _license[] SEC("license") = "GPL"; int monitored_pid = 0; @@ -61,6 +108,7 @@ SEC("lsm.s/bprm_committed_creds") int BPF_PROG(test_void_hook, struct linux_binprm *bprm) { __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct inner_map *inner_map; char args[64]; __u32 key = 0; __u64 *value; @@ -80,6 +128,27 @@ int BPF_PROG(test_void_hook, struct linux_binprm *bprm) value = bpf_map_lookup_elem(&lru_hash, &key); if (value) *value = 0; + value = bpf_map_lookup_elem(&percpu_array, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&percpu_hash, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&lru_percpu_hash, &key); + if (value) + *value = 0; + inner_map = bpf_map_lookup_elem(&outer_arr, &key); + if (inner_map) { + value = bpf_map_lookup_elem(inner_map, &key); + if (value) + *value = 0; + } + inner_map = bpf_map_lookup_elem(&outer_hash, &key); + if (inner_map) { + value = bpf_map_lookup_elem(inner_map, &key); + if (value) + *value = 0; + } return 0; } diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c new file mode 100644 index 000000000000..49f679375b9d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/recursion.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, long); +} hash1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, long); +} hash2 SEC(".maps"); + +int pass1 = 0; +int pass2 = 0; + +SEC("fentry/__htab_map_lookup_elem") +int BPF_PROG(on_lookup, struct bpf_map *map) +{ + int key = 0; + + if (map == (void *)&hash1) { + pass1++; + return 0; + } + if (map == (void *)&hash2) { + pass2++; + /* htab_map_gen_lookup() will inline below call + * into direct call to __htab_map_lookup_elem() + */ + bpf_map_lookup_elem(&hash2, &key); + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c new file mode 100644 index 000000000000..3d1ae8b3402f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <sys/socket.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include <bpf_sockopt_helpers.h> + +#define SERV4_IP 0xc0a801feU /* 192.168.1.254 */ +#define SERV4_PORT 4040 + +SEC("cgroup/recvmsg4") +int recvmsg4_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock *sk; + __u32 user_ip4; + __u16 user_port; + + sk = ctx->sk; + if (!sk) + return 1; + + if (sk->family != AF_INET) + return 1; + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 1; + + if (!get_set_sk_priority(ctx)) + return 1; + + ctx->user_ip4 = bpf_htonl(SERV4_IP); + ctx->user_port = bpf_htons(SERV4_PORT); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c new file mode 100644 index 000000000000..27dfb21b21b4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/in6.h> +#include <sys/socket.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include <bpf_sockopt_helpers.h> + +#define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */ +#define SERV6_IP_1 0x12345678 +#define SERV6_IP_2 0x00000000 +#define SERV6_IP_3 0x0000abcd +#define SERV6_PORT 6060 + +SEC("cgroup/recvmsg6") +int recvmsg6_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock *sk; + __u32 user_ip4; + __u16 user_port; + + sk = ctx->sk; + if (!sk) + return 1; + + if (sk->family != AF_INET6) + return 1; + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 1; + + if (!get_set_sk_priority(ctx)) + return 1; + + ctx->user_ip6[0] = bpf_htonl(SERV6_IP_0); + ctx->user_ip6[1] = bpf_htonl(SERV6_IP_1); + ctx->user_ip6[2] = bpf_htonl(SERV6_IP_2); + ctx->user_ip6[3] = bpf_htonl(SERV6_IP_3); + ctx->user_port = bpf_htons(SERV6_PORT); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index 092d9da536f3..ac5abc34cde8 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -8,6 +8,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include <bpf_sockopt_helpers.h> + #define SRC1_IP4 0xAC100001U /* 172.16.0.1 */ #define SRC2_IP4 0x00000000U #define SRC_REWRITE_IP4 0x7f000004U @@ -21,9 +23,14 @@ int _version SEC("version") = 1; SEC("cgroup/sendmsg4") int sendmsg_v4_prog(struct bpf_sock_addr *ctx) { + int prio; + if (ctx->type != SOCK_DGRAM) return 0; + if (!get_set_sk_priority(ctx)) + return 0; + /* Rewrite source. */ if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) || ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) { diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index 255a432bc163..24694b1a8d82 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -8,6 +8,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include <bpf_sockopt_helpers.h> + #define SRC_REWRITE_IP6_0 0 #define SRC_REWRITE_IP6_1 0 #define SRC_REWRITE_IP6_2 0 @@ -28,6 +30,9 @@ int sendmsg_v6_prog(struct bpf_sock_addr *ctx) if (ctx->type != SOCK_DGRAM) return 0; + if (!get_set_sk_priority(ctx)) + return 0; + /* Rewrite source. */ if (ctx->msg_src_ip6[3] == bpf_htonl(1) || ctx->msg_src_ip6[3] == bpf_htonl(0)) { diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c index 0cb5656a22b0..35630a5aaf5f 100644 --- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018 Facebook -#include <linux/bpf.h> -#include <sys/socket.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include <bpf/bpf_tracing.h> + +#define AF_INET6 10 struct socket_cookie { __u64 cookie_key; @@ -19,6 +21,14 @@ struct { __type(value, struct socket_cookie); } socket_cookies SEC(".maps"); +/* + * These three programs get executed in a row on connect() syscalls. The + * userspace side of the test creates a client socket, issues a connect() on it + * and then checks that the local storage associated with this socket has: + * cookie_value == local_port << 8 | 0xFF + * The different parts of this cookie_value are appended by those hooks if they + * all agree on the output of bpf_get_socket_cookie(). + */ SEC("cgroup/connect6") int set_cookie(struct bpf_sock_addr *ctx) { @@ -32,16 +42,16 @@ int set_cookie(struct bpf_sock_addr *ctx) if (!p) return 1; - p->cookie_value = 0xFF; + p->cookie_value = 0xF; p->cookie_key = bpf_get_socket_cookie(ctx); return 1; } SEC("sockops") -int update_cookie(struct bpf_sock_ops *ctx) +int update_cookie_sockops(struct bpf_sock_ops *ctx) { - struct bpf_sock *sk; + struct bpf_sock *sk = ctx->sk; struct socket_cookie *p; if (ctx->family != AF_INET6) @@ -50,21 +60,40 @@ int update_cookie(struct bpf_sock_ops *ctx) if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB) return 1; - if (!ctx->sk) + if (!sk) return 1; - p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0, 0); + p = bpf_sk_storage_get(&socket_cookies, sk, 0, 0); if (!p) return 1; if (p->cookie_key != bpf_get_socket_cookie(ctx)) return 1; - p->cookie_value = (ctx->local_port << 8) | p->cookie_value; + p->cookie_value |= (ctx->local_port << 8); return 1; } -int _version SEC("version") = 1; +SEC("fexit/inet_stream_connect") +int BPF_PROG(update_cookie_tracing, struct socket *sock, + struct sockaddr *uaddr, int addr_len, int flags) +{ + struct socket_cookie *p; + + if (uaddr->sa_family != AF_INET6) + return 0; + + p = bpf_sk_storage_get(&socket_cookies, sock->sk, 0, 0); + if (!p) + return 0; + + if (p->cookie_key != bpf_get_socket_cookie(sock->sk)) + return 0; + + p->cookie_value |= 0xF0; + + return 0; +} char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index 712df7b49cb1..d3597f81e6e9 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <string.h> -#include <netinet/in.h> -#include <netinet/tcp.h> +#include <linux/tcp.h> #include <linux/bpf.h> +#include <netinet/in.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; @@ -12,6 +12,10 @@ __u32 _version SEC("version") = 1; #define PAGE_SIZE 4096 #endif +#ifndef SOL_TCP +#define SOL_TCP IPPROTO_TCP +#endif + #define SOL_CUSTOM 0xdeadbeef struct sockopt_sk { @@ -57,6 +61,21 @@ int _getsockopt(struct bpf_sockopt *ctx) return 1; } + if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { + /* Verify that TCP_ZEROCOPY_RECEIVE triggers. + * It has a custom implementation for performance + * reasons. + */ + + if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end) + return 0; /* EPERM, bounds check */ + + if (((struct tcp_zerocopy_receive *)optval)->address != 0) + return 0; /* EPERM, unexpected data */ + + return 1; + } + if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c new file mode 100644 index 000000000000..b7787b43f9db --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Jesper Dangaard Brouer */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> + +#include <stddef.h> +#include <stdint.h> + +char _license[] SEC("license") = "GPL"; + +/* Userspace will update with MTU it can see on device */ +static volatile const int GLOBAL_USER_MTU; +static volatile const __u32 GLOBAL_USER_IFINDEX; + +/* BPF-prog will update these with MTU values it can see */ +__u32 global_bpf_mtu_xdp = 0; +__u32 global_bpf_mtu_tc = 0; + +SEC("xdp") +int xdp_use_helper_basic(struct xdp_md *ctx) +{ + __u32 mtu_len = 0; + + if (bpf_check_mtu(ctx, 0, &mtu_len, 0, 0)) + return XDP_ABORTED; + + return XDP_PASS; +} + +SEC("xdp") +int xdp_use_helper(struct xdp_md *ctx) +{ + int retval = XDP_PASS; /* Expected retval on successful test */ + __u32 mtu_len = 0; + __u32 ifindex = 0; + int delta = 0; + + /* When ifindex is zero, save net_device lookup and use ctx netdev */ + if (GLOBAL_USER_IFINDEX > 0) + ifindex = GLOBAL_USER_IFINDEX; + + if (bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0)) { + /* mtu_len is also valid when check fail */ + retval = XDP_ABORTED; + goto out; + } + + if (mtu_len != GLOBAL_USER_MTU) + retval = XDP_DROP; + +out: + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("xdp") +int xdp_exceed_mtu(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 data_len = data_end - data; + int retval = XDP_ABORTED; /* Fail */ + __u32 mtu_len = 0; + int delta; + int err; + + /* Exceed MTU with 1 via delta adjust */ + delta = GLOBAL_USER_MTU - (data_len - ETH_HLEN) + 1; + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0); + if (err) { + retval = XDP_PASS; /* Success in exceeding MTU check */ + if (err != BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = XDP_DROP; + } + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("xdp") +int xdp_minus_delta(struct xdp_md *ctx) +{ + int retval = XDP_PASS; /* Expected retval on successful test */ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 data_len = data_end - data; + __u32 mtu_len = 0; + int delta; + + /* Borderline test case: Minus delta exceeding packet length allowed */ + delta = -((data_len - ETH_HLEN) + 1); + + /* Minus length (adjusted via delta) still pass MTU check, other helpers + * are responsible for catching this, when doing actual size adjust + */ + if (bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0)) + retval = XDP_ABORTED; + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("classifier") +int tc_use_helper(struct __sk_buff *ctx) +{ + int retval = BPF_OK; /* Expected retval on successful test */ + __u32 mtu_len = 0; + int delta = 0; + + if (bpf_check_mtu(ctx, 0, &mtu_len, delta, 0)) { + retval = BPF_DROP; + goto out; + } + + if (mtu_len != GLOBAL_USER_MTU) + retval = BPF_REDIRECT; +out: + global_bpf_mtu_tc = mtu_len; + return retval; +} + +SEC("classifier") +int tc_exceed_mtu(struct __sk_buff *ctx) +{ + __u32 ifindex = GLOBAL_USER_IFINDEX; + int retval = BPF_DROP; /* Fail */ + __u32 skb_len = ctx->len; + __u32 mtu_len = 0; + int delta; + int err; + + /* Exceed MTU with 1 via delta adjust */ + delta = GLOBAL_USER_MTU - (skb_len - ETH_HLEN) + 1; + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0); + if (err) { + retval = BPF_OK; /* Success in exceeding MTU check */ + if (err != BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = BPF_DROP; + } + + global_bpf_mtu_tc = mtu_len; + return retval; +} + +SEC("classifier") +int tc_exceed_mtu_da(struct __sk_buff *ctx) +{ + /* SKB Direct-Access variant */ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 data_len = data_end - data; + int retval = BPF_DROP; /* Fail */ + __u32 mtu_len = 0; + int delta; + int err; + + /* Exceed MTU with 1 via delta adjust */ + delta = GLOBAL_USER_MTU - (data_len - ETH_HLEN) + 1; + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0); + if (err) { + retval = BPF_OK; /* Success in exceeding MTU check */ + if (err != BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = BPF_DROP; + } + + global_bpf_mtu_tc = mtu_len; + return retval; +} + +SEC("classifier") +int tc_minus_delta(struct __sk_buff *ctx) +{ + int retval = BPF_OK; /* Expected retval on successful test */ + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 skb_len = ctx->len; + __u32 mtu_len = 0; + int delta; + + /* Borderline test case: Minus delta exceeding packet length allowed */ + delta = -((skb_len - ETH_HLEN) + 1); + + /* Minus length (adjusted via delta) still pass MTU check, other helpers + * are responsible for catching this, when doing actual size adjust + */ + if (bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0)) + retval = BPF_DROP; + + global_bpf_mtu_xdp = mtu_len; + return retval; +} diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index c9f8464996ea..3c1e042962e6 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -70,6 +70,7 @@ typedef struct { uint64_t errors_total_encap_adjust_failed; uint64_t errors_total_encap_buffer_too_small; uint64_t errors_total_redirect_loop; + uint64_t errors_total_encap_mtu_violate; } metrics_t; typedef enum { @@ -407,6 +408,7 @@ static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *e payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr); int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead; uint16_t proto = ETH_P_IP; + uint32_t mtu_len = 0; /* Loop protection: the inner packet's TTL is decremented as a safeguard * against any forwarding loop. As the only interesting field is the TTL @@ -479,6 +481,11 @@ static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *e } } + if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) { + metrics->errors_total_encap_mtu_violate++; + return TC_ACT_SHOT; + } + if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET, BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_NO_CSUM_RESET) || diff --git a/tools/testing/selftests/bpf/progs/test_core_read_macros.c b/tools/testing/selftests/bpf/progs/test_core_read_macros.c new file mode 100644 index 000000000000..fd54caa17319 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_read_macros.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +/* shuffled layout for relocatable (CO-RE) reads */ +struct callback_head___shuffled { + void (*func)(struct callback_head___shuffled *head); + struct callback_head___shuffled *next; +}; + +struct callback_head k_probe_in = {}; +struct callback_head___shuffled k_core_in = {}; + +struct callback_head *u_probe_in = 0; +struct callback_head___shuffled *u_core_in = 0; + +long k_probe_out = 0; +long u_probe_out = 0; + +long k_core_out = 0; +long u_core_out = 0; + +int my_pid = 0; + +SEC("raw_tracepoint/sys_enter") +int handler(void *ctx) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + + if (my_pid != pid) + return 0; + + /* next pointers for kernel address space have to be initialized from + * BPF side, user-space mmaped addresses are stil user-space addresses + */ + k_probe_in.next = &k_probe_in; + __builtin_preserve_access_index(({k_core_in.next = &k_core_in;})); + + k_probe_out = (long)BPF_PROBE_READ(&k_probe_in, next, next, func); + k_core_out = (long)BPF_CORE_READ(&k_core_in, next, next, func); + u_probe_out = (long)BPF_PROBE_READ_USER(u_probe_in, next, next, func); + u_core_out = (long)BPF_CORE_READ_USER(u_core_in, next, next, func); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c new file mode 100644 index 000000000000..61c2ae92ce41 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func10.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct Small { + int x; +}; + +struct Big { + int x; + int y; +}; + +__noinline int foo(const struct Big *big) +{ + if (big == 0) + return 0; + + return bpf_get_prandom_u32() < big->y; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + const struct Small small = {.x = skb->len }; + + return foo((struct Big *)&small) ? 1 : 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func11.c b/tools/testing/selftests/bpf/progs/test_global_func11.c new file mode 100644 index 000000000000..28488047c849 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func11.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct S { + int x; +}; + +__noinline int foo(const struct S *s) +{ + return s ? bpf_get_prandom_u32() < s->x : 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + return foo(skb); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func12.c b/tools/testing/selftests/bpf/progs/test_global_func12.c new file mode 100644 index 000000000000..62343527cc59 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func12.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct S { + int x; +}; + +__noinline int foo(const struct S *s) +{ + return bpf_get_prandom_u32() < s->x; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + const struct S s = {.x = skb->len }; + + return foo(&s); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func13.c b/tools/testing/selftests/bpf/progs/test_global_func13.c new file mode 100644 index 000000000000..ff8897c1ac22 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func13.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct S { + int x; +}; + +__noinline int foo(const struct S *s) +{ + if (s) + return bpf_get_prandom_u32() < s->x; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + const struct S *s = (const struct S *)(0xbedabeda); + + return foo(s); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func14.c b/tools/testing/selftests/bpf/progs/test_global_func14.c new file mode 100644 index 000000000000..698c77199ebf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func14.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct S; + +__noinline int foo(const struct S *s) +{ + if (s) + return bpf_get_prandom_u32() < *(const int *) s; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + + return foo(NULL); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c new file mode 100644 index 000000000000..c19c435988d5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func15.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__noinline int foo(unsigned int *v) +{ + if (v) + *v = bpf_get_prandom_u32(); + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + unsigned int v = 1; + + foo(&v); + + return v; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c new file mode 100644 index 000000000000..0312d1e8d8c0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func16.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__noinline int foo(int (*arr)[10]) +{ + if (arr) + return (*arr)[9]; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + int array[10]; + + const int rv = foo(&array); + + return rv ? 1 : 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func9.c b/tools/testing/selftests/bpf/progs/test_global_func9.c new file mode 100644 index 000000000000..bd233ddede98 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func9.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct S { + int x; +}; + +struct C { + int x; + int y; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct S); +} map SEC(".maps"); + +enum E { + E_ITEM +}; + +static int global_data_x = 100; +static int volatile global_data_y = 500; + +__noinline int foo(const struct S *s) +{ + if (s) + return bpf_get_prandom_u32() < s->x; + + return 0; +} + +__noinline int bar(int *x) +{ + if (x) + *x &= bpf_get_prandom_u32(); + + return 0; +} +__noinline int baz(volatile int *x) +{ + if (x) + *x &= bpf_get_prandom_u32(); + + return 0; +} + +__noinline int qux(enum E *e) +{ + if (e) + return *e; + + return 0; +} + +__noinline int quux(int (*arr)[10]) +{ + if (arr) + return (*arr)[9]; + + return 0; +} + +__noinline int quuz(int **p) +{ + if (p) + *p = NULL; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + int result = 0; + + { + const struct S s = {.x = skb->len }; + + result |= foo(&s); + } + + { + const __u32 key = 1; + const struct S *s = bpf_map_lookup_elem(&map, &key); + + result |= foo(s); + } + + { + const struct C c = {.x = skb->len, .y = skb->family }; + + result |= foo((const struct S *)&c); + } + + { + result |= foo(NULL); + } + + { + bar(&result); + bar(&global_data_x); + } + + { + result |= baz(&global_data_y); + } + + { + enum E e = E_ITEM; + + result |= qux(&e); + } + + { + int array[10] = {0}; + + result |= quux(&array); + } + + { + int *p; + + result |= quuz(&p); + } + + return result ? 1 : 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c new file mode 100644 index 000000000000..cae309538a9e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> + +struct S { + int v; +}; + +static volatile struct S global_variable; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 7); + __type(key, __u32); + __type(value, int); +} values SEC(".maps"); + +static void save_value(__u32 index, int value) +{ + bpf_map_update_elem(&values, &index, &value, 0); +} + +__noinline int foo(__u32 index, struct S *s) +{ + if (s) { + save_value(index, s->v); + return ++s->v; + } + + save_value(index, 0); + + return 1; +} + +__noinline int bar(__u32 index, volatile struct S *s) +{ + if (s) { + save_value(index, s->v); + return ++s->v; + } + + save_value(index, 0); + + return 1; +} + +__noinline int baz(struct S **s) +{ + if (s) + *s = 0; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + __u32 index = 0; + + { + const int v = foo(index++, 0); + + save_value(index++, v); + } + + { + struct S s = { .v = 100 }; + + foo(index++, &s); + save_value(index++, s.v); + } + + { + global_variable.v = 42; + bar(index++, &global_variable); + save_value(index++, global_variable.v); + } + + { + struct S v, *p = &v; + + baz(&p); + save_value(index++, !p); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c new file mode 100644 index 000000000000..d6a0b3086b90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> + +extern const int bpf_testmod_ksym_percpu __ksym; + +int out_mod_ksym_global = 0; +bool triggered = false; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + int *val; + __u32 cpu; + + val = (int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); + out_mod_ksym_global = *val; + triggered = true; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index efd1e287ac17..bd37ceec5587 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -17,6 +17,16 @@ int BPF_PROG(handle_raw_tp, return 0; } +__u32 raw_tp_bare_write_sz = 0; + +SEC("raw_tp/bpf_testmod_test_write_bare") +int BPF_PROG(handle_raw_tp_bare, + struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx) +{ + raw_tp_bare_write_sz = BPF_CORE_READ(write_ctx, len); + return 0; +} + __u32 tp_btf_read_sz = 0; SEC("tp_btf/bpf_testmod_test_read") diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c index 1dca70a6de2f..0763d49f9c42 100644 --- a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c @@ -5,31 +5,21 @@ #include <stdint.h> #include <bpf/bpf_helpers.h> -static volatile struct { - __u64 dev; - __u64 ino; - __u64 pid_tgid; - __u64 user_pid_tgid; -} res; +__u64 user_pid = 0; +__u64 user_tgid = 0; +__u64 dev = 0; +__u64 ino = 0; -SEC("raw_tracepoint/sys_enter") -int trace(void *ctx) +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int handler(const void *ctx) { - __u64 ns_pid_tgid, expected_pid; struct bpf_pidns_info nsdata; - __u32 key = 0; - if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata, - sizeof(struct bpf_pidns_info))) + if (bpf_get_ns_current_pid_tgid(dev, ino, &nsdata, sizeof(struct bpf_pidns_info))) return 0; - ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid; - expected_pid = res.user_pid_tgid; - - if (expected_pid != ns_pid_tgid) - return 0; - - res.pid_tgid = ns_pid_tgid; + user_pid = nsdata.pid; + user_tgid = nsdata.tgid; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_stack_var_off.c b/tools/testing/selftests/bpf/progs/test_stack_var_off.c new file mode 100644 index 000000000000..665e6ae09d37 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_stack_var_off.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +int probe_res; + +char input[4] = {}; +int test_pid; + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int probe(void *ctx) +{ + /* This BPF program performs variable-offset reads and writes on a + * stack-allocated buffer. + */ + char stack_buf[16]; + unsigned long len; + unsigned long last; + + if ((bpf_get_current_pid_tgid() >> 32) != test_pid) + return 0; + + /* Copy the input to the stack. */ + __builtin_memcpy(stack_buf, input, 4); + + /* The first byte in the buffer indicates the length. */ + len = stack_buf[0] & 0xf; + last = (len - 1) & 0xf; + + /* Append something to the buffer. The offset where we write is not + * statically known; this is a variable-offset stack write. + */ + stack_buf[len] = 42; + + /* Index into the buffer at an unknown offset. This is a + * variable-offset stack read. + * + * Note that if it wasn't for the preceding variable-offset write, this + * read would be rejected because the stack slot cannot be verified as + * being initialized. With the preceding variable-offset write, the + * stack slot still cannot be verified, but the write inhibits the + * respective check on the reasoning that, if there was a + * variable-offset to a higher-or-equal spot, we're probably reading + * what we just wrote. + */ + probe_res = stack_buf[last]; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index d946252a25bb..0cda61da5d39 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -29,7 +29,7 @@ int main(int argc, char **argv) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1), BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c deleted file mode 100644 index ec53b1ef90d2..000000000000 --- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c +++ /dev/null @@ -1,160 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ -#define _GNU_SOURCE -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> -#include <sys/syscall.h> -#include <sched.h> -#include <sys/wait.h> -#include <sys/mount.h> -#include "test_progs.h" - -#define CHECK_NEWNS(condition, tag, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - printf("%s:FAIL:%s ", __func__, tag); \ - printf(format); \ - } else { \ - printf("%s:PASS:%s\n", __func__, tag); \ - } \ - __ret; \ -}) - -struct bss { - __u64 dev; - __u64 ino; - __u64 pid_tgid; - __u64 user_pid_tgid; -}; - -int main(int argc, char **argv) -{ - pid_t pid; - int exit_code = 1; - struct stat st; - - printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n"); - - if (stat("/proc/self/ns/pid", &st)) { - perror("stat failed on /proc/self/ns/pid ns\n"); - printf("%s:FAILED\n", argv[0]); - return exit_code; - } - - if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS), - "unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno)) - return exit_code; - - pid = fork(); - if (pid == -1) { - perror("Fork() failed\n"); - printf("%s:FAILED\n", argv[0]); - return exit_code; - } - - if (pid > 0) { - int status; - - usleep(5); - waitpid(pid, &status, 0); - return 0; - } else { - - pid = fork(); - if (pid == -1) { - perror("Fork() failed\n"); - printf("%s:FAILED\n", argv[0]); - return exit_code; - } - - if (pid > 0) { - int status; - waitpid(pid, &status, 0); - return 0; - } else { - if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL), - "Unmounting proc", "Cannot umount proc! errno=%d\n", errno)) - return exit_code; - - if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL), - "Mounting proc", "Cannot mount proc! errno=%d\n", errno)) - return exit_code; - - const char *probe_name = "raw_tracepoint/sys_enter"; - const char *file = "test_ns_current_pid_tgid.o"; - struct bpf_link *link = NULL; - struct bpf_program *prog; - struct bpf_map *bss_map; - struct bpf_object *obj; - int exit_code = 1; - int err, key = 0; - struct bss bss; - struct stat st; - __u64 id; - - obj = bpf_object__open_file(file, NULL); - if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) - return exit_code; - - err = bpf_object__load(obj); - if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno)) - goto cleanup; - - bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); - if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n")) - goto cleanup; - - prog = bpf_object__find_program_by_title(obj, probe_name); - if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n", - probe_name)) - goto cleanup; - - memset(&bss, 0, sizeof(bss)); - pid_t tid = syscall(SYS_gettid); - pid_t pid = getpid(); - - id = (__u64) tid << 32 | pid; - bss.user_pid_tgid = id; - - if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st), - "stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno)) - goto cleanup; - - bss.dev = st.st_dev; - bss.ino = st.st_ino; - - err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); - if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err)) - goto cleanup; - - link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); - if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n", - PTR_ERR(link))) { - link = NULL; - goto cleanup; - } - - /* trigger some syscalls */ - usleep(1); - - err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); - if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err)) - goto cleanup; - - if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", - "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) - goto cleanup; - - exit_code = 0; - printf("%s:PASS\n", argv[0]); -cleanup: - if (!link) { - bpf_link__destroy(link); - link = NULL; - } - bpf_object__close(obj); - } - } - return 0; -} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c index 01f0c634d548..571cc076dd7d 100644 --- a/tools/testing/selftests/bpf/test_flow_dissector.c +++ b/tools/testing/selftests/bpf/test_flow_dissector.c @@ -503,7 +503,7 @@ static int do_rx(int fd) if (rbuf != cfg_payload_char) error(1, 0, "recv: payload mismatch"); num++; - }; + } return num; } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 0ad3e6305ff0..51adc42b2b40 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1312,22 +1312,58 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 +#define MAP_RETRIES 20 + +static int map_update_retriable(int map_fd, const void *key, const void *value, + int flags, int attempts) +{ + while (bpf_map_update_elem(map_fd, key, value, flags)) { + if (!attempts || (errno != EAGAIN && errno != EBUSY)) + return -errno; + + usleep(1); + attempts--; + } + + return 0; +} + +static int map_delete_retriable(int map_fd, const void *key, int attempts) +{ + while (bpf_map_delete_elem(map_fd, key)) { + if (!attempts || (errno != EAGAIN && errno != EBUSY)) + return -errno; + + usleep(1); + attempts--; + } + + return 0; +} + static void test_update_delete(unsigned int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; - int i, key, value; + int i, key, value, err; for (i = fn; i < MAP_SIZE; i += TASKS) { key = value = i; if (do_update) { - assert(bpf_map_update_elem(fd, &key, &value, - BPF_NOEXIST) == 0); - assert(bpf_map_update_elem(fd, &key, &value, - BPF_EXIST) == 0); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); } else { - assert(bpf_map_delete_elem(fd, &key) == 0); + err = map_delete_retriable(fd, &key, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); } } } diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 7d077d48cadd..6396932b97e2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -11,6 +11,7 @@ #include <signal.h> #include <string.h> #include <execinfo.h> /* backtrace */ +#include <linux/membarrier.h> #define EXIT_NO_TEST 2 #define EXIT_ERR_SETUP_INFRA 3 @@ -370,8 +371,18 @@ static int delete_module(const char *name, int flags) return syscall(__NR_delete_module, name, flags); } +/* + * Trigger synchronize_rcu() in kernel. + */ +int kern_sync_rcu(void) +{ + return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); +} + static void unload_bpf_testmod(void) { + if (kern_sync_rcu()) + fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); if (delete_module("bpf_testmod", 0)) { if (errno == ENOENT) { if (env.verbosity > VERBOSE_NONE) @@ -379,7 +390,7 @@ static void unload_bpf_testmod(void) return; } fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); - exit(1); + return; } if (env.verbosity > VERBOSE_NONE) fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 115953243f62..f7c2fd89d01a 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -16,7 +16,6 @@ typedef __u16 __sum16; #include <linux/if_packet.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <netinet/tcp.h> #include <linux/filter.h> #include <linux/perf_event.h> #include <linux/socket.h> @@ -219,6 +218,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); int extract_build_id(char *build_id, size_t size); +int kern_sync_rcu(void); #ifdef __x86_64__ #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index dcb83ab02919..aa3f185fcb89 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -31,6 +31,8 @@ #define CONNECT6_PROG_PATH "./connect6_prog.o" #define SENDMSG4_PROG_PATH "./sendmsg4_prog.o" #define SENDMSG6_PROG_PATH "./sendmsg6_prog.o" +#define RECVMSG4_PROG_PATH "./recvmsg4_prog.o" +#define RECVMSG6_PROG_PATH "./recvmsg6_prog.o" #define BIND4_PROG_PATH "./bind4_prog.o" #define BIND6_PROG_PATH "./bind6_prog.o" @@ -94,10 +96,10 @@ static int sendmsg_deny_prog_load(const struct sock_addr_test *test); static int recvmsg_allow_prog_load(const struct sock_addr_test *test); static int recvmsg_deny_prog_load(const struct sock_addr_test *test); static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test); -static int recvmsg4_rw_asm_prog_load(const struct sock_addr_test *test); +static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test); -static int recvmsg6_rw_asm_prog_load(const struct sock_addr_test *test); +static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test); @@ -573,8 +575,8 @@ static struct sock_addr_test tests[] = { LOAD_REJECT, }, { - "recvmsg4: rewrite IP & port (asm)", - recvmsg4_rw_asm_prog_load, + "recvmsg4: rewrite IP & port (C)", + recvmsg4_rw_c_prog_load, BPF_CGROUP_UDP4_RECVMSG, BPF_CGROUP_UDP4_RECVMSG, AF_INET, @@ -587,8 +589,8 @@ static struct sock_addr_test tests[] = { SUCCESS, }, { - "recvmsg6: rewrite IP & port (asm)", - recvmsg6_rw_asm_prog_load, + "recvmsg6: rewrite IP & port (C)", + recvmsg6_rw_c_prog_load, BPF_CGROUP_UDP6_RECVMSG, BPF_CGROUP_UDP6_RECVMSG, AF_INET6, @@ -786,45 +788,9 @@ static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test) return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); } -static int recvmsg4_rw_asm_prog_load(const struct sock_addr_test *test) +static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test) { - struct sockaddr_in src4_rw_addr; - - if (mk_sockaddr(AF_INET, SERV4_IP, SERV4_PORT, - (struct sockaddr *)&src4_rw_addr, - sizeof(src4_rw_addr)) == -1) - return -1; - - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (sk.family == AF_INET && */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 6), - - /* sk.type == SOCK_DGRAM) { */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, type)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 4), - - /* user_ip4 = src4_rw_addr.sin_addr */ - BPF_MOV32_IMM(BPF_REG_7, src4_rw_addr.sin_addr.s_addr), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_ip4)), - - /* user_port = src4_rw_addr.sin_port */ - BPF_MOV32_IMM(BPF_REG_7, src4_rw_addr.sin_port), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_port)), - /* } */ - - /* return 1 */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; - - return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); + return load_path(test, RECVMSG4_PROG_PATH); } static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test) @@ -890,37 +856,9 @@ static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test) return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP); } -static int recvmsg6_rw_asm_prog_load(const struct sock_addr_test *test) +static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test) { - struct sockaddr_in6 src6_rw_addr; - - if (mk_sockaddr(AF_INET6, SERV6_IP, SERV6_PORT, - (struct sockaddr *)&src6_rw_addr, - sizeof(src6_rw_addr)) == -1) - return -1; - - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (sk.family == AF_INET6) { */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 10), - - STORE_IPV6(user_ip6, src6_rw_addr.sin6_addr.s6_addr32), - - /* user_port = dst6_rw_addr.sin6_port */ - BPF_MOV32_IMM(BPF_REG_7, src6_rw_addr.sin6_port), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_port)), - /* } */ - - /* return 1 */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; - - return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); + return load_path(test, RECVMSG6_PROG_PATH); } static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test) diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c deleted file mode 100644 index ca7ca87e91aa..000000000000 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ /dev/null @@ -1,208 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook - -#include <string.h> -#include <unistd.h> - -#include <arpa/inet.h> -#include <netinet/in.h> -#include <sys/types.h> -#include <sys/socket.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "bpf_rlimit.h" -#include "cgroup_helpers.h" - -#define CG_PATH "/foo" -#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o" - -struct socket_cookie { - __u64 cookie_key; - __u32 cookie_value; -}; - -static int start_server(void) -{ - struct sockaddr_in6 addr; - int fd; - - fd = socket(AF_INET6, SOCK_STREAM, 0); - if (fd == -1) { - log_err("Failed to create server socket"); - goto out; - } - - memset(&addr, 0, sizeof(addr)); - addr.sin6_family = AF_INET6; - addr.sin6_addr = in6addr_loopback; - addr.sin6_port = 0; - - if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) { - log_err("Failed to bind server socket"); - goto close_out; - } - - if (listen(fd, 128) == -1) { - log_err("Failed to listen on server socket"); - goto close_out; - } - - goto out; - -close_out: - close(fd); - fd = -1; -out: - return fd; -} - -static int connect_to_server(int server_fd) -{ - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int fd; - - fd = socket(AF_INET6, SOCK_STREAM, 0); - if (fd == -1) { - log_err("Failed to create client socket"); - goto out; - } - - if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { - log_err("Failed to get server addr"); - goto close_out; - } - - if (connect(fd, (const struct sockaddr *)&addr, len) == -1) { - log_err("Fail to connect to server"); - goto close_out; - } - - goto out; - -close_out: - close(fd); - fd = -1; -out: - return fd; -} - -static int validate_map(struct bpf_map *map, int client_fd) -{ - __u32 cookie_expected_value; - struct sockaddr_in6 addr; - socklen_t len = sizeof(addr); - struct socket_cookie val; - int err = 0; - int map_fd; - - if (!map) { - log_err("Map not found in BPF object"); - goto err; - } - - map_fd = bpf_map__fd(map); - - err = bpf_map_lookup_elem(map_fd, &client_fd, &val); - - err = getsockname(client_fd, (struct sockaddr *)&addr, &len); - if (err) { - log_err("Can't get client local addr"); - goto out; - } - - cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; - if (val.cookie_value != cookie_expected_value) { - log_err("Unexpected value in map: %x != %x", val.cookie_value, - cookie_expected_value); - goto err; - } - - goto out; -err: - err = -1; -out: - return err; -} - -static int run_test(int cgfd) -{ - enum bpf_attach_type attach_type; - struct bpf_prog_load_attr attr; - struct bpf_program *prog; - struct bpf_object *pobj; - const char *prog_name; - int server_fd = -1; - int client_fd = -1; - int prog_fd = -1; - int err = 0; - - memset(&attr, 0, sizeof(attr)); - attr.file = SOCKET_COOKIE_PROG; - attr.prog_type = BPF_PROG_TYPE_UNSPEC; - attr.prog_flags = BPF_F_TEST_RND_HI32; - - err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd); - if (err) { - log_err("Failed to load %s", attr.file); - goto out; - } - - bpf_object__for_each_program(prog, pobj) { - prog_name = bpf_program__section_name(prog); - - if (libbpf_attach_type_by_name(prog_name, &attach_type)) - goto err; - - err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type, - BPF_F_ALLOW_OVERRIDE); - if (err) { - log_err("Failed to attach prog %s", prog_name); - goto out; - } - } - - server_fd = start_server(); - if (server_fd == -1) - goto err; - - client_fd = connect_to_server(server_fd); - if (client_fd == -1) - goto err; - - if (validate_map(bpf_map__next(NULL, pobj), client_fd)) - goto err; - - goto out; -err: - err = -1; -out: - close(client_fd); - close(server_fd); - bpf_object__close(pobj); - printf("%s\n", err ? "FAILED" : "PASSED"); - return err; -} - -int main(int argc, char **argv) -{ - int cgfd = -1; - int err = 0; - - cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; - - if (run_test(cgfd)) - goto err; - - goto out; -err: - err = -1; -out: - close(cgfd); - cleanup_cgroup_environment(); - return err; -} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 777a81404fdb..58b5a349d3ba 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -50,7 +50,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 20 +#define MAX_NR_MAPS 21 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -87,6 +87,11 @@ struct bpf_test { int fixup_sk_storage_map[MAX_FIXUPS]; int fixup_map_event_output[MAX_FIXUPS]; int fixup_map_reuseport_array[MAX_FIXUPS]; + int fixup_map_ringbuf[MAX_FIXUPS]; + /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. + * Can be a tab-separated sequence of expected strings. An empty string + * means no log verification. + */ const char *errstr; const char *errstr_unpriv; uint32_t insn_processed; @@ -296,6 +301,78 @@ static void bpf_fill_scale(struct bpf_test *self) } } +static int bpf_fill_torturous_jumps_insn_1(struct bpf_insn *insn) +{ + unsigned int len = 259, hlen = 128; + int i; + + insn[0] = BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32); + for (i = 1; i <= hlen; i++) { + insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, i, hlen); + insn[i + hlen] = BPF_JMP_A(hlen - i); + } + insn[len - 2] = BPF_MOV64_IMM(BPF_REG_0, 1); + insn[len - 1] = BPF_EXIT_INSN(); + + return len; +} + +static int bpf_fill_torturous_jumps_insn_2(struct bpf_insn *insn) +{ + unsigned int len = 4100, jmp_off = 2048; + int i, j; + + insn[0] = BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32); + for (i = 1; i <= jmp_off; i++) { + insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, i, jmp_off); + } + insn[i++] = BPF_JMP_A(jmp_off); + for (; i <= jmp_off * 2 + 1; i+=16) { + for (j = 0; j < 16; j++) { + insn[i + j] = BPF_JMP_A(16 - j - 1); + } + } + + insn[len - 2] = BPF_MOV64_IMM(BPF_REG_0, 2); + insn[len - 1] = BPF_EXIT_INSN(); + + return len; +} + +static void bpf_fill_torturous_jumps(struct bpf_test *self) +{ + struct bpf_insn *insn = self->fill_insns; + int i = 0; + + switch (self->retval) { + case 1: + self->prog_len = bpf_fill_torturous_jumps_insn_1(insn); + return; + case 2: + self->prog_len = bpf_fill_torturous_jumps_insn_2(insn); + return; + case 3: + /* main */ + insn[i++] = BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4); + insn[i++] = BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 262); + insn[i++] = BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0); + insn[i++] = BPF_MOV64_IMM(BPF_REG_0, 3); + insn[i++] = BPF_EXIT_INSN(); + + /* subprog 1 */ + i += bpf_fill_torturous_jumps_insn_1(insn + i); + + /* subprog 2 */ + i += bpf_fill_torturous_jumps_insn_2(insn + i); + + self->prog_len = i; + return; + default: + self->prog_len = 0; + break; + } +} + /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ #define BPF_SK_LOOKUP(func) \ /* struct bpf_sock_tuple tuple = {} */ \ @@ -640,6 +717,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_sk_storage_map = test->fixup_sk_storage_map; int *fixup_map_event_output = test->fixup_map_event_output; int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; + int *fixup_map_ringbuf = test->fixup_map_ringbuf; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -817,6 +895,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_reuseport_array++; } while (*fixup_map_reuseport_array); } + if (*fixup_map_ringbuf) { + map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0, + 0, 4096); + do { + prog[*fixup_map_ringbuf].imm = map_fds[20]; + fixup_map_ringbuf++; + } while (*fixup_map_ringbuf); + } } struct libcap { @@ -913,13 +999,19 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, return 0; } +/* Returns true if every part of exp (tab-separated) appears in log, in order. + * + * If exp is an empty string, returns true. + */ static bool cmp_str_seq(const char *log, const char *exp) { - char needle[80]; + char needle[200]; const char *p, *q; int len; do { + if (!strlen(exp)) + break; p = strchr(exp, '\t'); if (!p) p = exp + strlen(exp); @@ -933,7 +1025,7 @@ static bool cmp_str_seq(const char *log, const char *exp) needle[len] = 0; q = strstr(log, needle); if (!q) { - printf("FAIL\nUnexpected verifier log in successful load!\n" + printf("FAIL\nUnexpected verifier log!\n" "EXP: %s\nRES:\n", needle); return false; } @@ -1048,7 +1140,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!expected_err || !strstr(bpf_vlog, expected_err)) { + if (!expected_err || !cmp_str_seq(bpf_vlog, expected_err)) { printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n", expected_err, bpf_vlog); goto fail_log; diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index dd80f0c84afb..c033850886f4 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Create 2 namespaces with two veth peers, and # forward packets in-between using generic XDP # @@ -57,12 +57,8 @@ test_xdp_redirect() ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null - ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null - local ret1=$? - ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null - local ret2=$? - - if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then + if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null && + ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then echo "selftests: test_xdp_redirect $xdpmode [PASS]"; else ret=1 diff --git a/tools/testing/selftests/bpf/verifier/atomic_and.c b/tools/testing/selftests/bpf/verifier/atomic_and.c new file mode 100644 index 000000000000..1bdc8e6684f7 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_and.c @@ -0,0 +1,77 @@ +{ + "BPF_ATOMIC_AND without fetch", + .insns = { + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* atomic_and(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_AND, BPF_REG_10, BPF_REG_1, -8), + /* if (val != 0x010) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x010, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* r1 should not be clobbered, no BPF_FETCH flag */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x011, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC_AND with fetch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 123), + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* old = atomic_fetch_and(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 0x110) exit(3); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x010) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (for fear of x86 JIT bug) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 123, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC_AND with fetch 32bit", + .insns = { + /* r0 = (s64) -1 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + /* val = 0x110; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x110), + /* old = atomic_fetch_and(&val, 0x011); */ + BPF_MOV32_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 0x110) exit(3); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x010) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (for fear of x86 JIT bug) + * It should be -1 so add 1 to get exit code. + */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_bounds.c b/tools/testing/selftests/bpf/verifier/atomic_bounds.c new file mode 100644 index 000000000000..e82183e4914f --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_bounds.c @@ -0,0 +1,27 @@ +{ + "BPF_ATOMIC bounds propagation, mem->reg", + .insns = { + /* a = 0; */ + /* + * Note this is implemented with two separate instructions, + * where you might think one would suffice: + * + * BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + * + * This is because BPF_ST_MEM doesn't seem to set the stack slot + * type to 0 when storing an immediate. + */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* b = atomic_fetch_add(&a, 1); */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* Verifier should be able to tell that this infinite loop isn't reachable. */ + /* if (b) while (true) continue; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, -1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "back-edge", +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c new file mode 100644 index 000000000000..2efd8bcf57a1 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -0,0 +1,96 @@ +{ + "atomic compare-and-exchange smoketest - 64bit", + .insns = { + /* val = 3; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + /* old = atomic_cmpxchg(&val, 2, 4); */ + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 3) exit(2); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* if (val != 3) exit(3); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* old = atomic_cmpxchg(&val, 3, 4); */ + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 3) exit(4); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_EXIT_INSN(), + /* if (val != 4) exit(5); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, 2), + BPF_MOV64_IMM(BPF_REG_0, 5), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "atomic compare-and-exchange smoketest - 32bit", + .insns = { + /* val = 3; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 3), + /* old = atomic_cmpxchg(&val, 2, 4); */ + BPF_MOV32_IMM(BPF_REG_1, 4), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 3) exit(2); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* if (val != 3) exit(3); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* old = atomic_cmpxchg(&val, 3, 4); */ + BPF_MOV32_IMM(BPF_REG_1, 4), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 3) exit(4); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV32_IMM(BPF_REG_0, 4), + BPF_EXIT_INSN(), + /* if (val != 4) exit(5); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 4, 2), + BPF_MOV32_IMM(BPF_REG_0, 5), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "Can't use cmpxchg on uninit src reg", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "!read_ok", +}, +{ + "Can't use cmpxchg on uninit memory", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid read from stack", +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch_add.c b/tools/testing/selftests/bpf/verifier/atomic_fetch_add.c new file mode 100644 index 000000000000..a91de8cd9def --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_fetch_add.c @@ -0,0 +1,106 @@ +{ + "BPF_ATOMIC_FETCH_ADD smoketest - 64bit", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + /* Write 3 to stack */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + /* Put a 1 in R1, add it to the 3 on the stack, and load the value back into R1 */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* Check the value we loaded back was 3 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* Load value from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + /* Check value loaded from stack was 4 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC_FETCH_ADD smoketest - 32bit", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + /* Write 3 to stack */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 3), + /* Put a 1 in R1, add it to the 3 on the stack, and load the value back into R1 */ + BPF_MOV32_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_W, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4), + /* Check the value we loaded back was 3 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* Load value from stack */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4), + /* Check value loaded from stack was 4 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "Can't use ATM_FETCH_ADD on frame pointer", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr_unpriv = "R10 leaks addr into mem", + .errstr = "frame pointer is read only", +}, +{ + "Can't use ATM_FETCH_ADD on uninit src reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + /* It happens that the address leak check is first, but it would also be + * complain about the fact that we're trying to modify R10. + */ + .errstr = "!read_ok", +}, +{ + "Can't use ATM_FETCH_ADD on uninit dst reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_2, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + /* It happens that the address leak check is first, but it would also be + * complain about the fact that we're trying to modify R10. + */ + .errstr = "!read_ok", +}, +{ + "Can't use ATM_FETCH_ADD on kernel memory", + .insns = { + /* This is an fentry prog, context is array of the args of the + * kernel function being called. Load first arg into R2. + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 0), + /* First arg of bpf_fentry_test7 is a pointer to a struct. + * Attempt to modify that struct. Verifier shouldn't let us + * because it's kernel memory. + */ + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_2, BPF_REG_3, 0), + /* Done */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "bpf_fentry_test7", + .result = REJECT, + .errstr = "only read is supported", +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_or.c b/tools/testing/selftests/bpf/verifier/atomic_or.c new file mode 100644 index 000000000000..70f982e1f9f0 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_or.c @@ -0,0 +1,77 @@ +{ + "BPF_ATOMIC OR without fetch", + .insns = { + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* atomic_or(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_OR, BPF_REG_10, BPF_REG_1, -8), + /* if (val != 0x111) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x111, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* r1 should not be clobbered, no BPF_FETCH flag */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x011, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC OR with fetch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 123), + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* old = atomic_fetch_or(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 0x110) exit(3); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x111) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x111, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (for fear of x86 JIT bug) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 123, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC OR with fetch 32bit", + .insns = { + /* r0 = (s64) -1 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + /* val = 0x110; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x110), + /* old = atomic_fetch_or(&val, 0x011); */ + BPF_MOV32_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 0x110) exit(3); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x111) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x111, 2), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (for fear of x86 JIT bug) + * It should be -1 so add 1 to get exit code. + */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_xchg.c b/tools/testing/selftests/bpf/verifier/atomic_xchg.c new file mode 100644 index 000000000000..33e2d6c973ee --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_xchg.c @@ -0,0 +1,46 @@ +{ + "atomic exchange smoketest - 64bit", + .insns = { + /* val = 3; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + /* old = atomic_xchg(&val, 4); */ + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 3) exit(1); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* if (val != 4) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "atomic exchange smoketest - 32bit", + .insns = { + /* val = 3; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 3), + /* old = atomic_xchg(&val, 4); */ + BPF_MOV32_IMM(BPF_REG_1, 4), + BPF_ATOMIC_OP(BPF_W, BPF_XCHG, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 3) exit(1); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* if (val != 4) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 4, 2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_xor.c b/tools/testing/selftests/bpf/verifier/atomic_xor.c new file mode 100644 index 000000000000..74e8fb46694b --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_xor.c @@ -0,0 +1,77 @@ +{ + "BPF_ATOMIC XOR without fetch", + .insns = { + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* atomic_xor(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_XOR, BPF_REG_10, BPF_REG_1, -8), + /* if (val != 0x101) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x101, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* r1 should not be clobbered, no BPF_FETCH flag */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x011, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC XOR with fetch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 123), + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* old = atomic_fetch_xor(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_XOR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 0x110) exit(3); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x101) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x101, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (fxor fear of x86 JIT bug) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 123, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC XOR with fetch 32bit", + .insns = { + /* r0 = (s64) -1 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + /* val = 0x110; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x110), + /* old = atomic_fetch_xor(&val, 0x011); */ + BPF_MOV32_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_W, BPF_XOR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 0x110) exit(3); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x101) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x101, 2), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (fxor fear of x86 JIT bug) + * It should be -1 so add 1 to get exit code. + */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/basic_stack.c b/tools/testing/selftests/bpf/verifier/basic_stack.c index b56f8117c09d..f995777dddb3 100644 --- a/tools/testing/selftests/bpf/verifier/basic_stack.c +++ b/tools/testing/selftests/bpf/verifier/basic_stack.c @@ -4,7 +4,7 @@ BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid stack", + .errstr = "invalid write to stack", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index c4f5d909e58a..eb888c8479c3 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1228,7 +1228,7 @@ .prog_type = BPF_PROG_TYPE_XDP, .fixup_map_hash_8b = { 23 }, .result = REJECT, - .errstr = "invalid read from stack off -16+0 size 8", + .errstr = "invalid read from stack R7 off=-16 size=8", }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", @@ -1958,7 +1958,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 6 }, - .errstr = "invalid indirect read from stack off -8+0 size 8", + .errstr = "invalid indirect read from stack R2 off -8+0 size 8", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c index 6c214c58e8d4..0719b0ddec04 100644 --- a/tools/testing/selftests/bpf/verifier/const_or.c +++ b/tools/testing/selftests/bpf/verifier/const_or.c @@ -23,7 +23,7 @@ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-48 access_size=58", + .errstr = "invalid indirect access to stack R1 off=-48 size=58", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -54,7 +54,7 @@ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-48 access_size=58", + .errstr = "invalid indirect access to stack R1 off=-48 size=58", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c index 93d6b1641481..23080862aafd 100644 --- a/tools/testing/selftests/bpf/verifier/ctx.c +++ b/tools/testing/selftests/bpf/verifier/ctx.c @@ -10,14 +10,13 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "context stores via XADD", + "context stores via BPF_ATOMIC", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1, - BPF_REG_0, offsetof(struct __sk_buff, mark), 0), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .errstr = "BPF_XADD stores into R1 ctx is not allowed", + .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index ae72536603fe..ac1e19d0f520 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -333,7 +333,7 @@ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_4, BPF_REG_5, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -488,7 +488,7 @@ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_4, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49), BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c index 87c4e7900083..0ab7f1dfc97a 100644 --- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c +++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c @@ -39,7 +39,7 @@ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack off -64+0 size 64", + .errstr = "invalid indirect read from stack R1 off -64+0 size 64", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -59,7 +59,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-64 access_size=65", + .errstr = "invalid indirect access to stack R1 off=-64 size=65", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -136,7 +136,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-64 access_size=65", + .errstr = "invalid indirect access to stack R1 off=-64 size=65", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -156,7 +156,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-64 access_size=65", + .errstr = "invalid indirect access to stack R1 off=-64 size=65", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -194,7 +194,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack off -64+0 size 64", + .errstr = "invalid indirect read from stack R1 off -64+0 size 64", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -584,7 +584,7 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack off -64+32 size 64", + .errstr = "invalid indirect read from stack R1 off -64+32 size 64", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c index ca3b4729df66..070893fb2900 100644 --- a/tools/testing/selftests/bpf/verifier/int_ptr.c +++ b/tools/testing/selftests/bpf/verifier/int_ptr.c @@ -27,7 +27,7 @@ }, .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack off -16+0 size 8", + .errstr = "invalid indirect read from stack R4 off -16+0 size 8", }, { "ARG_PTR_TO_LONG half-uninitialized", @@ -59,7 +59,7 @@ }, .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack off -16+4 size 8", + .errstr = "invalid indirect read from stack R4 off -16+4 size 8", }, { "ARG_PTR_TO_LONG misaligned", @@ -125,7 +125,7 @@ }, .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid stack type R4 off=-4 access_size=8", + .errstr = "invalid indirect access to stack R4 off=-4 size=8", }, { "ARG_PTR_TO_LONG initialized", diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index c33adf344fae..df215e004566 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -105,3 +105,27 @@ .result = ACCEPT, .retval = 2, }, +{ + "jit: torturous jumps, imm8 nop jmp and pure jump padding", + .insns = { }, + .fill_helper = bpf_fill_torturous_jumps, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, +{ + "jit: torturous jumps, imm32 nop jmp and jmp_cond padding", + .insns = { }, + .fill_helper = bpf_fill_torturous_jumps, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, +}, +{ + "jit: torturous jumps in subprog", + .insns = { }, + .fill_helper = bpf_fill_torturous_jumps, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 3, +}, diff --git a/tools/testing/selftests/bpf/verifier/leak_ptr.c b/tools/testing/selftests/bpf/verifier/leak_ptr.c index d6eec17f2cd2..73f0dea95546 100644 --- a/tools/testing/selftests/bpf/verifier/leak_ptr.c +++ b/tools/testing/selftests/bpf/verifier/leak_ptr.c @@ -5,7 +5,7 @@ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[0])), BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2, + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_2, offsetof(struct __sk_buff, cb[0])), BPF_EXIT_INSN(), }, @@ -13,7 +13,7 @@ .errstr_unpriv = "R2 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 ctx is not allowed", + .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed", }, { "leak pointer into ctx 2", @@ -21,14 +21,14 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[0])), - BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10, + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_10, offsetof(struct __sk_buff, cb[0])), BPF_EXIT_INSN(), }, .errstr_unpriv = "R10 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 ctx is not allowed", + .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed", }, { "leak pointer into ctx 3", @@ -56,7 +56,7 @@ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_6, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, diff --git a/tools/testing/selftests/bpf/verifier/meta_access.c b/tools/testing/selftests/bpf/verifier/meta_access.c index 205292b8dd65..b45e8af41420 100644 --- a/tools/testing/selftests/bpf/verifier/meta_access.c +++ b/tools/testing/selftests/bpf/verifier/meta_access.c @@ -171,7 +171,7 @@ BPF_MOV64_IMM(BPF_REG_5, 42), BPF_MOV64_IMM(BPF_REG_6, 24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5), @@ -196,7 +196,7 @@ BPF_MOV64_IMM(BPF_REG_5, 42), BPF_MOV64_IMM(BPF_REG_6, 24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5), diff --git a/tools/testing/selftests/bpf/verifier/raw_stack.c b/tools/testing/selftests/bpf/verifier/raw_stack.c index 193d9e87d5a9..cc8e8c3cdc03 100644 --- a/tools/testing/selftests/bpf/verifier/raw_stack.c +++ b/tools/testing/selftests/bpf/verifier/raw_stack.c @@ -11,7 +11,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid read from stack off -8+0 size 8", + .errstr = "invalid read from stack R6 off=-8 size=8", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -59,7 +59,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3", + .errstr = "invalid zero-sized read", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -205,7 +205,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3 off=-513 access_size=8", + .errstr = "invalid indirect access to stack R3 off=-513 size=8", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -221,7 +221,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3 off=-1 access_size=8", + .errstr = "invalid indirect access to stack R3 off=-1 size=8", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -285,7 +285,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3 off=-512 access_size=0", + .errstr = "invalid zero-sized read", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 45d43bf82f26..0b943897aaf6 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -29,6 +29,36 @@ .result_unpriv = ACCEPT, }, { + "check valid spill/fill, ptr to mem", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = ACCEPT, + .result_unpriv = ACCEPT, +}, +{ "check corrupted spill/fill", .insns = { /* spill R1(ctx) into stack */ diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c index 8bfeb77c60bd..07eaa04412ae 100644 --- a/tools/testing/selftests/bpf/verifier/stack_ptr.c +++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c @@ -44,7 +44,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off=-79992 size=8", + .errstr = "invalid write to stack R1 off=-79992 size=8", .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", }, { @@ -57,7 +57,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off=0 size=8", + .errstr = "invalid write to stack R1 off=0 size=8", }, { "PTR_TO_STACK check high 1", @@ -106,7 +106,7 @@ BPF_EXIT_INSN(), }, .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid stack off=0 size=1", + .errstr = "invalid write to stack R1 off=0 size=1", .result = REJECT, }, { @@ -119,7 +119,8 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid write to stack R1", }, { "PTR_TO_STACK check high 6", @@ -131,7 +132,8 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid write to stack", }, { "PTR_TO_STACK check high 7", @@ -183,7 +185,7 @@ BPF_EXIT_INSN(), }, .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid stack off=-513 size=1", + .errstr = "invalid write to stack R1 off=-513 size=1", .result = REJECT, }, { @@ -208,7 +210,8 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid write to stack", }, { "PTR_TO_STACK check low 6", @@ -220,7 +223,8 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off", + .errstr = "invalid write to stack", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", }, { "PTR_TO_STACK check low 7", @@ -292,7 +296,7 @@ BPF_EXIT_INSN(), }, .result_unpriv = REJECT, - .errstr_unpriv = "invalid stack off=0 size=1", + .errstr_unpriv = "invalid write to stack R1 off=0 size=1", .result = ACCEPT, .retval = 42, }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index a3fe0fbaed41..b018ad71e0a8 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -108,7 +108,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr_unpriv = "invalid indirect read from stack off -8+0 size 8", + .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8", .result_unpriv = REJECT, .result = ACCEPT, }, @@ -207,7 +207,8 @@ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_10, BPF_REG_0, -8, 0), + BPF_RAW_INSN(BPF_STX | BPF_ATOMIC | BPF_DW, + BPF_REG_10, BPF_REG_0, -8, BPF_ADD), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), BPF_EXIT_INSN(), diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c index ed1c2cea1dea..489062867218 100644 --- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c +++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c @@ -82,7 +82,7 @@ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_2, BPF_REG_3, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index 8504ac937809..eab1f7f56e2f 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -18,7 +18,7 @@ .prog_type = BPF_PROG_TYPE_LWT_IN, }, { - "variable-offset stack access", + "variable-offset stack read, priv vs unpriv", .insns = { /* Fill the top 8 bytes of the stack */ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -31,15 +31,110 @@ * we don't know which */ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* dereference it */ + /* dereference it for a stack read */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R2 variable stack access prohibited for !root", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "variable-offset stack read, uninitialized", + .insns = { + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), + /* add it to fp. We now have either fp-4 or fp-8, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* dereference it for a stack read */ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)", .result = REJECT, + .errstr = "invalid variable-offset read from stack R2", .prog_type = BPF_PROG_TYPE_LWT_IN, }, { + "variable-offset stack write, priv vs unpriv", + .insns = { + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 8-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), + /* Add it to fp. We now have either fp-8 or fp-16, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* Dereference it for a stack write */ + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + /* Now read from the address we just wrote. This shows + * that, after a variable-offset write, a priviledged + * program can read the slots that were in the range of + * that write (even if the verifier doesn't actually know + * if the slot being read was really written to or not. + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + /* Variable stack access is rejected for unprivileged. + */ + .errstr_unpriv = "R2 variable stack access prohibited for !root", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "variable-offset stack write clobbers spilled regs", + .insns = { + /* Dummy instruction; needed because we need to patch the next one + * and we can't patch the first instruction. + */ + BPF_MOV64_IMM(BPF_REG_6, 0), + /* Make R0 a map ptr */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 8-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), + /* Add it to fp. We now have either fp-8 or fp-16, but + * we don't know which. + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* Spill R0(map ptr) into stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* Dereference the unknown value for a stack write */ + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + /* Fill the register back into R2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), + /* Try to dereference R2 for a memory load */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + /* The unpriviledged case is not too interesting; variable + * stack access is rejected. + */ + .errstr_unpriv = "R2 variable stack access prohibited for !root", + .result_unpriv = REJECT, + /* In the priviledged case, dereferencing a spilled-and-then-filled + * register is rejected because the previous variable offset stack + * write might have overwritten the spilled pointer (i.e. we lose track + * of the spilled register when we analyze the write). + */ + .errstr = "R2 invalid mem access 'inv'", + .result = REJECT, +}, +{ "indirect variable-offset stack access, unbounded", .insns = { BPF_MOV64_IMM(BPF_REG_2, 6), @@ -63,7 +158,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R4 unbounded indirect variable offset stack access", + .errstr = "invalid unbounded variable-offset indirect access to stack R4", .result = REJECT, .prog_type = BPF_PROG_TYPE_SOCK_OPS, }, @@ -88,7 +183,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "R2 max value is outside of stack bound", + .errstr = "invalid variable-offset indirect access to stack R2", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, @@ -113,7 +208,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "R2 min value is outside of stack bound", + .errstr = "invalid variable-offset indirect access to stack R2", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, @@ -138,7 +233,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack var_off", + .errstr = "invalid indirect read from stack R2 var_off", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, @@ -163,7 +258,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack var_off", + .errstr = "invalid indirect read from stack R2 var_off", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, @@ -189,7 +284,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 6 }, - .errstr_unpriv = "R2 stack pointer arithmetic goes out of range, prohibited for !root", + .errstr_unpriv = "R2 variable stack access prohibited for !root", .result_unpriv = REJECT, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_CGROUP_SKB, @@ -217,7 +312,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack var_off", + .errstr = "invalid indirect read from stack R4 var_off", .result = REJECT, .prog_type = BPF_PROG_TYPE_SOCK_OPS, }, diff --git a/tools/testing/selftests/bpf/verifier/xadd.c b/tools/testing/selftests/bpf/verifier/xadd.c index c5de2e62cc8b..b96ef3526815 100644 --- a/tools/testing/selftests/bpf/verifier/xadd.c +++ b/tools/testing/selftests/bpf/verifier/xadd.c @@ -3,7 +3,7 @@ .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -7), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), BPF_EXIT_INSN(), }, @@ -22,7 +22,7 @@ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 3), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), BPF_EXIT_INSN(), }, @@ -45,13 +45,13 @@ BPF_MOV64_IMM(BPF_REG_0, 1), BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0), - BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1), - BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 1), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 2), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1), BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_XADD stores into R2 pkt is not allowed", + .errstr = "BPF_ATOMIC stores into R2 pkt is not allowed", .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -62,8 +62,8 @@ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), @@ -82,8 +82,8 @@ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh new file mode 100755 index 000000000000..26ae8d0b6ce3 --- /dev/null +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -0,0 +1,368 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -u +set -e + +# This script currently only works for x86_64, as +# it is based on the VM image used by the BPF CI which is +# x86_64. +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}" +X86_BZIMAGE="arch/x86/boot/bzImage" +DEFAULT_COMMAND="./test_progs" +MOUNT_DIR="mnt" +ROOTFS_IMAGE="root.img" +OUTPUT_DIR="$HOME/.bpf_selftests" +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config" +KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config" +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX" +NUM_COMPILE_JOBS="$(nproc)" + +usage() +{ + cat <<EOF +Usage: $0 [-i] [-d <output_dir>] -- [<command>] + +<command> is the command you would normally run when you are in +tools/testing/selftests/bpf. e.g: + + $0 -- ./test_progs -t test_lsm + +If no command is specified, "${DEFAULT_COMMAND}" will be run by +default. + +If you build your kernel using KBUILD_OUTPUT= or O= options, these +can be passed as environment variables to the script: + + O=<kernel_build_path> $0 -- ./test_progs -t test_lsm + +or + + KBUILD_OUTPUT=<kernel_build_path> $0 -- ./test_progs -t test_lsm + +Options: + + -i) Update the rootfs image with a newer version. + -d) Update the output directory (default: ${OUTPUT_DIR}) + -j) Number of jobs for compilation, similar to -j in make + (default: ${NUM_COMPILE_JOBS}) +EOF +} + +unset URLS +populate_url_map() +{ + if ! declare -p URLS &> /dev/null; then + # URLS contain the mapping from file names to URLs where + # those files can be downloaded from. + declare -gA URLS + while IFS=$'\t' read -r name url; do + URLS["$name"]="$url" + done < <(curl -Lsf ${INDEX_URL}) + fi +} + +download() +{ + local file="$1" + + if [[ ! -v URLS[$file] ]]; then + echo "$file not found" >&2 + return 1 + fi + + echo "Downloading $file..." >&2 + curl -Lsf "${URLS[$file]}" "${@:2}" +} + +newest_rootfs_version() +{ + { + for file in "${!URLS[@]}"; do + if [[ $file =~ ^libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then + echo "${BASH_REMATCH[1]}" + fi + done + } | sort -rV | head -1 +} + +download_rootfs() +{ + local rootfsversion="$1" + local dir="$2" + + if ! which zstd &> /dev/null; then + echo 'Could not find "zstd" on the system, please install zstd' + exit 1 + fi + + download "libbpf-vmtest-rootfs-$rootfsversion.tar.zst" | + zstd -d | sudo tar -C "$dir" -x +} + +recompile_kernel() +{ + local kernel_checkout="$1" + local make_command="$2" + + cd "${kernel_checkout}" + + ${make_command} olddefconfig + ${make_command} +} + +mount_image() +{ + local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}" + local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}" + + sudo mount -o loop "${rootfs_img}" "${mount_dir}" +} + +unmount_image() +{ + local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}" + + sudo umount "${mount_dir}" &> /dev/null +} + +update_selftests() +{ + local kernel_checkout="$1" + local selftests_dir="${kernel_checkout}/tools/testing/selftests/bpf" + + cd "${selftests_dir}" + ${make_command} + + # Mount the image and copy the selftests to the image. + mount_image + sudo rm -rf "${mount_dir}/root/bpf" + sudo cp -r "${selftests_dir}" "${mount_dir}/root" + unmount_image +} + +update_init_script() +{ + local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d" + local init_script="${init_script_dir}/S50-startup" + local command="$1" + local log_file="$2" + + mount_image + + if [[ ! -d "${init_script_dir}" ]]; then + cat <<EOF +Could not find ${init_script_dir} in the mounted image. +This likely indicates a bad rootfs image, Please download +a new image by passing "-i" to the script +EOF + exit 1 + + fi + + sudo bash -c "cat >${init_script}" <<EOF +#!/bin/bash + +{ + cd /root/bpf + echo ${command} + stdbuf -oL -eL ${command} +} 2>&1 | tee /root/${log_file} +poweroff -f +EOF + + sudo chmod a+x "${init_script}" + unmount_image +} + +create_vm_image() +{ + local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}" + local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}" + + rm -rf "${rootfs_img}" + touch "${rootfs_img}" + chattr +C "${rootfs_img}" >/dev/null 2>&1 || true + + truncate -s 2G "${rootfs_img}" + mkfs.ext4 -q "${rootfs_img}" + + mount_image + download_rootfs "$(newest_rootfs_version)" "${mount_dir}" + unmount_image +} + +run_vm() +{ + local kernel_bzimage="$1" + local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}" + + if ! which "${QEMU_BINARY}" &> /dev/null; then + cat <<EOF +Could not find ${QEMU_BINARY} +Please install qemu or set the QEMU_BINARY environment variable. +EOF + exit 1 + fi + + ${QEMU_BINARY} \ + -nodefaults \ + -display none \ + -serial mon:stdio \ + -cpu kvm64 \ + -enable-kvm \ + -smp 4 \ + -m 2G \ + -drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \ + -kernel "${kernel_bzimage}" \ + -append "root=/dev/vda rw console=ttyS0,115200" +} + +copy_logs() +{ + local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}" + local log_file="${mount_dir}/root/$1" + + mount_image + sudo cp ${log_file} "${OUTPUT_DIR}" + sudo rm -f ${log_file} + unmount_image +} + +is_rel_path() +{ + local path="$1" + + [[ ${path:0:1} != "/" ]] +} + +update_kconfig() +{ + local kconfig_file="$1" + local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}" + # Github does not return the "last-modified" header when retrieving the + # raw contents of the file. Use the API call to get the last-modified + # time of the kernel config and only update the config if it has been + # updated after the previously cached config was created. This avoids + # unnecessarily compiling the kernel and selftests. + if [[ -f "${kconfig_file}" ]]; then + local last_modified_date="$(curl -sL -D - "${KCONFIG_API_URL}" -o /dev/null | \ + grep "last-modified" | awk -F ': ' '{print $2}')" + local remote_modified_timestamp="$(date -d "${last_modified_date}" +"%s")" + local local_creation_timestamp="$(stat -c %Y "${kconfig_file}")" + + if [[ "${remote_modified_timestamp}" -gt "${local_creation_timestamp}" ]]; then + ${update_command} + fi + else + ${update_command} + fi +} + +main() +{ + local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" + local kernel_checkout=$(realpath "${script_dir}"/../../../../) + local log_file="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S.log")" + # By default the script searches for the kernel in the checkout directory but + # it also obeys environment variables O= and KBUILD_OUTPUT= + local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}" + local command="${DEFAULT_COMMAND}" + local update_image="no" + + while getopts 'hkid:j:' opt; do + case ${opt} in + i) + update_image="yes" + ;; + d) + OUTPUT_DIR="$OPTARG" + ;; + j) + NUM_COMPILE_JOBS="$OPTARG" + ;; + h) + usage + exit 0 + ;; + \? ) + echo "Invalid Option: -$OPTARG" + usage + exit 1 + ;; + : ) + echo "Invalid Option: -$OPTARG requires an argument" + usage + exit 1 + ;; + esac + done + shift $((OPTIND -1)) + + if [[ $# -eq 0 ]]; then + echo "No command specified, will run ${DEFAULT_COMMAND} in the vm" + else + command="$@" + fi + + local kconfig_file="${OUTPUT_DIR}/latest.config" + local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}" + + # Figure out where the kernel is being built. + # O takes precedence over KBUILD_OUTPUT. + if [[ "${O:=""}" != "" ]]; then + if is_rel_path "${O}"; then + O="$(realpath "${PWD}/${O}")" + fi + kernel_bzimage="${O}/${X86_BZIMAGE}" + make_command="${make_command} O=${O}" + elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then + if is_rel_path "${KBUILD_OUTPUT}"; then + KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")" + fi + kernel_bzimage="${KBUILD_OUTPUT}/${X86_BZIMAGE}" + make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}" + fi + + populate_url_map + + local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}" + local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}" + + echo "Output directory: ${OUTPUT_DIR}" + + mkdir -p "${OUTPUT_DIR}" + mkdir -p "${mount_dir}" + update_kconfig "${kconfig_file}" + + recompile_kernel "${kernel_checkout}" "${make_command}" + + if [[ "${update_image}" == "no" && ! -f "${rootfs_img}" ]]; then + echo "rootfs image not found in ${rootfs_img}" + update_image="yes" + fi + + if [[ "${update_image}" == "yes" ]]; then + create_vm_image + fi + + update_selftests "${kernel_checkout}" "${make_command}" + update_init_script "${command}" "${log_file}" + run_vm "${kernel_bzimage}" + copy_logs "${log_file}" + echo "Logs saved in ${OUTPUT_DIR}/${log_file}" +} + +catch() +{ + local exit_code=$1 + # This is just a cleanup and the directory may + # have already been unmounted. So, don't let this + # clobber the error code we intend to return. + unmount_image || true + exit ${exit_code} +} + +trap 'catch "$?"' EXIT + +main "$@" diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 014dedaa4dd2..f4a96d5ff524 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -224,14 +224,14 @@ static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt return csum_tcpudp_magic(saddr, daddr, len, proto, csum); } -static void gen_eth_hdr(void *data, struct ethhdr *eth_hdr) +static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) { - memcpy(eth_hdr->h_dest, ((struct ifobject *)data)->dst_mac, ETH_ALEN); - memcpy(eth_hdr->h_source, ((struct ifobject *)data)->src_mac, ETH_ALEN); + memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); + memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); eth_hdr->h_proto = htons(ETH_P_IP); } -static void gen_ip_hdr(void *data, struct iphdr *ip_hdr) +static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) { ip_hdr->version = IP_PKT_VER; ip_hdr->ihl = 0x5; @@ -241,18 +241,18 @@ static void gen_ip_hdr(void *data, struct iphdr *ip_hdr) ip_hdr->frag_off = 0; ip_hdr->ttl = IPDEFTTL; ip_hdr->protocol = IPPROTO_UDP; - ip_hdr->saddr = ((struct ifobject *)data)->src_ip; - ip_hdr->daddr = ((struct ifobject *)data)->dst_ip; + ip_hdr->saddr = ifobject->src_ip; + ip_hdr->daddr = ifobject->dst_ip; ip_hdr->check = 0; } -static void gen_udp_hdr(void *data, void *arg, struct udphdr *udp_hdr) +static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject, + struct udphdr *udp_hdr) { - udp_hdr->source = htons(((struct ifobject *)arg)->src_port); - udp_hdr->dest = htons(((struct ifobject *)arg)->dst_port); + udp_hdr->source = htons(ifobject->src_port); + udp_hdr->dest = htons(ifobject->dst_port); udp_hdr->len = htons(UDP_PKT_SIZE); - memset32_htonl(pkt_data + PKT_HDR_SIZE, - htonl(((struct generic_data *)data)->seqnum), UDP_PKT_DATA_SIZE); + memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE); } static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) @@ -382,21 +382,19 @@ static bool switch_namespace(int idx) static void *nsswitchthread(void *args) { - if (switch_namespace(((struct targs *)args)->idx)) { - ifdict[((struct targs *)args)->idx]->ifindex = - if_nametoindex(ifdict[((struct targs *)args)->idx]->ifname); - if (!ifdict[((struct targs *)args)->idx]->ifindex) { - ksft_test_result_fail - ("ERROR: [%s] interface \"%s\" does not exist\n", - __func__, ifdict[((struct targs *)args)->idx]->ifname); - ((struct targs *)args)->retptr = false; + struct targs *targs = args; + + targs->retptr = false; + + if (switch_namespace(targs->idx)) { + ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname); + if (!ifdict[targs->idx]->ifindex) { + ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n", + __func__, ifdict[targs->idx]->ifname); } else { - ksft_print_msg("Interface found: %s\n", - ifdict[((struct targs *)args)->idx]->ifname); - ((struct targs *)args)->retptr = true; + ksft_print_msg("Interface found: %s\n", ifdict[targs->idx]->ifname); + targs->retptr = true; } - } else { - ((struct targs *)args)->retptr = false; } pthread_exit(NULL); } @@ -413,12 +411,12 @@ static int validate_interfaces(void) if (strcmp(ifdict[i]->nsname, "")) { struct targs *targs; - targs = (struct targs *)malloc(sizeof(struct targs)); + targs = malloc(sizeof(*targs)); if (!targs) exit_with_error(errno); targs->idx = i; - if (pthread_create(&ns_thread, NULL, nsswitchthread, (void *)targs)) + if (pthread_create(&ns_thread, NULL, nsswitchthread, targs)) exit_with_error(errno); pthread_join(ns_thread, NULL); @@ -569,16 +567,18 @@ static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds) } for (i = 0; i < rcvd; i++) { - u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; - (void)xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; - u64 orig = xsk_umem__extract_addr(addr); + u64 addr, orig; + + addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; + xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); + orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE); if (!pkt_node_rx) exit_with_error(errno); - pkt_node_rx->pkt_frame = (char *)malloc(PKT_SIZE); + pkt_node_rx->pkt_frame = malloc(PKT_SIZE); if (!pkt_node_rx->pkt_frame) exit_with_error(errno); @@ -628,28 +628,27 @@ static inline int get_batch_size(int pkt_cnt) return opt_pkt_count - pkt_cnt; } -static void complete_tx_only_all(void *arg) +static void complete_tx_only_all(struct ifobject *ifobject) { bool pending; do { pending = false; - if (((struct ifobject *)arg)->xsk->outstanding_tx) { - complete_tx_only(((struct ifobject *) - arg)->xsk, BATCH_SIZE); - pending = !!((struct ifobject *)arg)->xsk->outstanding_tx; + if (ifobject->xsk->outstanding_tx) { + complete_tx_only(ifobject->xsk, BATCH_SIZE); + pending = !!ifobject->xsk->outstanding_tx; } } while (pending); } -static void tx_only_all(void *arg) +static void tx_only_all(struct ifobject *ifobject) { struct pollfd fds[MAX_SOCKS] = { }; u32 frame_nb = 0; int pkt_cnt = 0; int ret; - fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk); + fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); fds[0].events = POLLOUT; while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { @@ -664,12 +663,12 @@ static void tx_only_all(void *arg) continue; } - tx_only(((struct ifobject *)arg)->xsk, &frame_nb, batch_size); + tx_only(ifobject->xsk, &frame_nb, batch_size); pkt_cnt += batch_size; } if (opt_pkt_count) - complete_tx_only_all(arg); + complete_tx_only_all(ifobject); } static void worker_pkt_dump(void) @@ -715,7 +714,7 @@ static void worker_pkt_dump(void) int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE)); if (payload == EOT) { - ksft_print_msg("End-of-tranmission frame received\n"); + ksft_print_msg("End-of-transmission frame received\n"); fprintf(stdout, "---------------------------------------\n"); break; } @@ -727,27 +726,27 @@ static void worker_pkt_dump(void) static void worker_pkt_validate(void) { u32 payloadseqnum = -2; + struct iphdr *iphdr; while (1) { - pkt_node_rx_q = malloc(sizeof(struct pkt)); pkt_node_rx_q = TAILQ_LAST(&head, head_s); if (!pkt_node_rx_q) break; + + iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)); + /*do not increment pktcounter if !(tos=0x9 and ipv4) */ - if ((((struct iphdr *)(pkt_node_rx_q->pkt_frame + - sizeof(struct ethhdr)))->version == IP_PKT_VER) - && (((struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)))->tos == - IP_PKT_TOS)) { - payloadseqnum = *((uint32_t *) (pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE)); + if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { + payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE)); if (debug_pkt_dump && payloadseqnum != EOT) { - pkt_obj = (struct pkt_frame *)malloc(sizeof(struct pkt_frame)); - pkt_obj->payload = (char *)malloc(PKT_SIZE); + pkt_obj = malloc(sizeof(*pkt_obj)); + pkt_obj->payload = malloc(PKT_SIZE); memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE); pkt_buf[payloadseqnum] = pkt_obj; } if (payloadseqnum == EOT) { - ksft_print_msg("End-of-tranmission frame received: PASS\n"); + ksft_print_msg("End-of-transmission frame received: PASS\n"); sigvar = 1; break; } @@ -759,35 +758,29 @@ static void worker_pkt_validate(void) ksft_exit_xfail(); } - TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); - free(pkt_node_rx_q->pkt_frame); - free(pkt_node_rx_q); - pkt_node_rx_q = NULL; prev_pkt = payloadseqnum; pkt_counter++; } else { ksft_print_msg("Invalid frame received: "); - ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", - ((struct iphdr *)(pkt_node_rx_q->pkt_frame + - sizeof(struct ethhdr)))->version, - ((struct iphdr *)(pkt_node_rx_q->pkt_frame + - sizeof(struct ethhdr)))->tos); - TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); - free(pkt_node_rx_q->pkt_frame); - free(pkt_node_rx_q); - pkt_node_rx_q = NULL; + ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, + iphdr->tos); } + + TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); + free(pkt_node_rx_q->pkt_frame); + free(pkt_node_rx_q); + pkt_node_rx_q = NULL; } } -static void thread_common_ops(void *arg, void *bufs, pthread_mutex_t *mutexptr, +static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr, atomic_int *spinningptr) { int ctr = 0; int ret; - xsk_configure_umem((struct ifobject *)arg, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); - ret = xsk_configure_socket((struct ifobject *)arg); + xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); + ret = xsk_configure_socket(ifobject); /* Retry Create Socket if it fails as xsk_socket__create() * is asynchronous @@ -798,9 +791,8 @@ static void thread_common_ops(void *arg, void *bufs, pthread_mutex_t *mutexptr, pthread_mutex_lock(mutexptr); while (ret && ctr < SOCK_RECONF_CTR) { atomic_store(spinningptr, 1); - xsk_configure_umem((struct ifobject *)arg, - bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); - ret = xsk_configure_socket((struct ifobject *)arg); + xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); + ret = xsk_configure_socket(ifobject); usleep(USLEEP_MAX); ctr++; } @@ -815,9 +807,10 @@ static void *worker_testapp_validate(void *arg) { struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr)); - struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data)); struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr)); struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; + struct ifobject *ifobject = (struct ifobject *)arg; + struct generic_data data; void *bufs = NULL; pthread_attr_setstacksize(&attr, THREAD_STACK); @@ -828,58 +821,56 @@ static void *worker_testapp_validate(void *arg) if (bufs == MAP_FAILED) exit_with_error(errno); - if (strcmp(((struct ifobject *)arg)->nsname, "")) - switch_namespace(((struct ifobject *)arg)->ifdict_index); + if (strcmp(ifobject->nsname, "")) + switch_namespace(ifobject->ifdict_index); } - if (((struct ifobject *)arg)->fv.vector == tx) { + if (ifobject->fv.vector == tx) { int spinningrxctr = 0; if (!bidi_pass) - thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx); + thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx); while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) { spinningrxctr++; usleep(USLEEP_MAX); } - ksft_print_msg("Interface [%s] vector [Tx]\n", ((struct ifobject *)arg)->ifname); + ksft_print_msg("Interface [%s] vector [Tx]\n", ifobject->ifname); for (int i = 0; i < num_frames; i++) { /*send EOT frame */ if (i == (num_frames - 1)) - data->seqnum = -1; + data.seqnum = -1; else - data->seqnum = i; - gen_udp_hdr((void *)data, (void *)arg, udp_hdr); - gen_ip_hdr((void *)arg, ip_hdr); + data.seqnum = i; + gen_udp_hdr(&data, ifobject, udp_hdr); + gen_ip_hdr(ifobject, ip_hdr); gen_udp_csum(udp_hdr, ip_hdr); - gen_eth_hdr((void *)arg, eth_hdr); - gen_eth_frame(((struct ifobject *)arg)->umem, - i * XSK_UMEM__DEFAULT_FRAME_SIZE); + gen_eth_hdr(ifobject, eth_hdr); + gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE); } - free(data); ksft_print_msg("Sending %d packets on interface %s\n", - (opt_pkt_count - 1), ((struct ifobject *)arg)->ifname); - tx_only_all(arg); - } else if (((struct ifobject *)arg)->fv.vector == rx) { + (opt_pkt_count - 1), ifobject->ifname); + tx_only_all(ifobject); + } else if (ifobject->fv.vector == rx) { struct pollfd fds[MAX_SOCKS] = { }; int ret; if (!bidi_pass) - thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx); + thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx); - ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname); - xsk_populate_fill_ring(((struct ifobject *)arg)->umem); + ksft_print_msg("Interface [%s] vector [Rx]\n", ifobject->ifname); + xsk_populate_fill_ring(ifobject->umem); TAILQ_INIT(&head); if (debug_pkt_dump) { - pkt_buf = malloc(sizeof(struct pkt_frame **) * num_frames); + pkt_buf = calloc(num_frames, sizeof(*pkt_buf)); if (!pkt_buf) exit_with_error(errno); } - fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk); + fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); fds[0].events = POLLIN; pthread_mutex_lock(&sync_mutex); @@ -892,7 +883,7 @@ static void *worker_testapp_validate(void *arg) if (ret <= 0) continue; } - rx_pkt(((struct ifobject *)arg)->xsk, fds); + rx_pkt(ifobject->xsk, fds); worker_pkt_validate(); if (sigvar) @@ -900,21 +891,23 @@ static void *worker_testapp_validate(void *arg) } ksft_print_msg("Received %d packets on interface %s\n", - pkt_counter, ((struct ifobject *)arg)->ifname); + pkt_counter, ifobject->ifname); if (opt_teardown) ksft_print_msg("Destroying socket\n"); } - if (!opt_bidi || (opt_bidi && bidi_pass)) { - xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk); - (void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem); + if (!opt_bidi || bidi_pass) { + xsk_socket__delete(ifobject->xsk->xsk); + (void)xsk_umem__delete(ifobject->umem->umem); } pthread_exit(NULL); } static void testapp_validate(void) { + struct timespec max_wait = { 0, 0 }; + pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, THREAD_STACK); @@ -929,18 +922,16 @@ static void testapp_validate(void) pthread_mutex_lock(&sync_mutex); /*Spawn RX thread */ - if (!opt_bidi || (opt_bidi && !bidi_pass)) { - if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1])) + if (!opt_bidi || !bidi_pass) { + if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1])) exit_with_error(errno); } else if (opt_bidi && bidi_pass) { /*switch Tx/Rx vectors */ ifdict[0]->fv.vector = rx; - if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[0])) + if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0])) exit_with_error(errno); } - struct timespec max_wait = { 0, 0 }; - if (clock_gettime(CLOCK_REALTIME, &max_wait)) exit_with_error(errno); max_wait.tv_sec += TMOUT_SEC; @@ -951,13 +942,13 @@ static void testapp_validate(void) pthread_mutex_unlock(&sync_mutex); /*Spawn TX thread */ - if (!opt_bidi || (opt_bidi && !bidi_pass)) { - if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0])) + if (!opt_bidi || !bidi_pass) { + if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0])) exit_with_error(errno); } else if (opt_bidi && bidi_pass) { /*switch Tx/Rx vectors */ ifdict[1]->fv.vector = tx; - if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[1])) + if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1])) exit_with_error(errno); } @@ -991,25 +982,25 @@ static void testapp_sockets(void) print_ksft_result(); } -static void init_iface_config(void *ifaceconfig) +static void init_iface_config(struct ifaceconfigobj *ifaceconfig) { /*Init interface0 */ ifdict[0]->fv.vector = tx; - memcpy(ifdict[0]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN); - memcpy(ifdict[0]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN); - ifdict[0]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr; - ifdict[0]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr; - ifdict[0]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port; - ifdict[0]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port; + memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN); + memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN); + ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr; + ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr; + ifdict[0]->dst_port = ifaceconfig->dst_port; + ifdict[0]->src_port = ifaceconfig->src_port; /*Init interface1 */ ifdict[1]->fv.vector = rx; - memcpy(ifdict[1]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN); - memcpy(ifdict[1]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN); - ifdict[1]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr; - ifdict[1]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr; - ifdict[1]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port; - ifdict[1]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port; + memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN); + memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN); + ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr; + ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr; + ifdict[1]->dst_port = ifaceconfig->src_port; + ifdict[1]->src_port = ifaceconfig->dst_port; } int main(int argc, char **argv) @@ -1026,7 +1017,7 @@ int main(int argc, char **argv) u16 UDP_DST_PORT = 2020; u16 UDP_SRC_PORT = 2121; - ifaceconfig = (struct ifaceconfigobj *)malloc(sizeof(struct ifaceconfigobj)); + ifaceconfig = malloc(sizeof(struct ifaceconfigobj)); memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN); memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN); inet_aton(IP1, &ifaceconfig->dst_ip); @@ -1035,7 +1026,7 @@ int main(int argc, char **argv) ifaceconfig->src_port = UDP_SRC_PORT; for (int i = 0; i < MAX_INTERFACES; i++) { - ifdict[i] = (struct ifobject *)malloc(sizeof(struct ifobject)); + ifdict[i] = malloc(sizeof(struct ifobject)); if (!ifdict[i]) exit_with_error(errno); @@ -1048,7 +1039,7 @@ int main(int argc, char **argv) num_frames = ++opt_pkt_count; - init_iface_config((void *)ifaceconfig); + init_iface_config(ifaceconfig); pthread_init_mutex(); diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 61f595b6f200..0e9f9b7e61c2 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -92,8 +92,6 @@ struct flow_vector { enum fvector { tx, rx, - bidi, - undef, } vector; }; diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c index ad41ea69001b..e7041816085a 100644 --- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c +++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c @@ -145,7 +145,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp) if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) { ksft_print_msg( - "ptrace(PTRACE_SINGLESTEP) failed: %s\n", + "ptrace(PTRACE_CONT) failed: %s\n", strerror(errno)); return false; } @@ -159,7 +159,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp) } alarm(0); if (WIFEXITED(status)) { - ksft_print_msg("child did not single-step\n"); + ksft_print_msg("child exited prematurely\n"); return false; } if (!WIFSTOPPED(status)) { diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 7065163a8388..537d65968c48 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -6,6 +6,7 @@ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <unistd.h> #include <sys/ioctl.h> #include <sys/mman.h> @@ -35,7 +36,7 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u64 expansion[10]; /* For future use */ + __u8 expansion[84]; /* For future use */ }; int main(int argc, char **argv) @@ -102,6 +103,7 @@ int main(int argc, char **argv) exit(1); } + memset(&map, 0, sizeof(map)); map.seconds = seconds; map.threads = threads; map.node = node; diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile index 607c2acd2082..604b43ece15f 100644 --- a/tools/testing/selftests/dmabuf-heaps/Makefile +++ b/tools/testing/selftests/dmabuf-heaps/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -static -O3 -Wl,-no-as-needed -Wall -I../../../../usr/include +CFLAGS += -static -O3 -Wl,-no-as-needed -Wall TEST_GEN_PROGS = dmabuf-heap diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 909da9cdda97..29af27acd40e 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -130,16 +130,13 @@ static int dmabuf_heap_alloc(int fd, size_t len, unsigned int flags, dmabuf_fd); } -static void dmabuf_sync(int fd, int start_stop) +static int dmabuf_sync(int fd, int start_stop) { struct dma_buf_sync sync = { .flags = start_stop | DMA_BUF_SYNC_RW, }; - int ret; - ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync); - if (ret) - printf("sync failed %d\n", errno); + return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync); } #define ONE_MEG (1024 * 1024) @@ -151,16 +148,14 @@ static int test_alloc_and_import(char *heap_name) void *p = NULL; int ret; - printf("Testing heap: %s\n", heap_name); - heap_fd = dmabuf_heap_open(heap_name); if (heap_fd < 0) return -1; - printf("Allocating 1 MEG\n"); + printf(" Testing allocation and importing: "); ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("Allocation Failed!\n"); + printf("FAIL (Allocation Failed!)\n"); ret = -1; goto out; } @@ -172,11 +167,10 @@ static int test_alloc_and_import(char *heap_name) dmabuf_fd, 0); if (p == MAP_FAILED) { - printf("mmap() failed: %m\n"); + printf("FAIL (mmap() failed)\n"); ret = -1; goto out; } - printf("mmap passed\n"); dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); memset(p, 1, ONE_MEG / 2); @@ -186,25 +180,31 @@ static int test_alloc_and_import(char *heap_name) importer_fd = open_vgem(); if (importer_fd < 0) { ret = importer_fd; - printf("Failed to open vgem\n"); - goto out; + printf("(Could not open vgem - skipping): "); + } else { + ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle); + if (ret < 0) { + printf("FAIL (Failed to import buffer)\n"); + goto out; + } } - ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle); + ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); if (ret < 0) { - printf("Failed to import buffer\n"); + printf("FAIL (DMA_BUF_SYNC_START failed!)\n"); goto out; } - printf("import passed\n"); - dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); memset(p, 0xff, ONE_MEG); - dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); - printf("syncs passed\n"); + ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); + if (ret < 0) { + printf("FAIL (DMA_BUF_SYNC_END failed!)\n"); + goto out; + } close_handle(importer_fd, handle); ret = 0; - + printf(" OK\n"); out: if (p) munmap(p, ONE_MEG); @@ -218,6 +218,84 @@ out: return ret; } +static int test_alloc_zeroed(char *heap_name, size_t size) +{ + int heap_fd = -1, dmabuf_fd[32]; + int i, j, ret; + void *p = NULL; + char *c; + + printf(" Testing alloced %ldk buffers are zeroed: ", size / 1024); + heap_fd = dmabuf_heap_open(heap_name); + if (heap_fd < 0) + return -1; + + /* Allocate and fill a bunch of buffers */ + for (i = 0; i < 32; i++) { + ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]); + if (ret < 0) { + printf("FAIL (Allocation (%i) failed)\n", i); + goto out; + } + /* mmap and fill with simple pattern */ + p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0); + if (p == MAP_FAILED) { + printf("FAIL (mmap() failed!)\n"); + ret = -1; + goto out; + } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START); + memset(p, 0xff, size); + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END); + munmap(p, size); + } + /* close them all */ + for (i = 0; i < 32; i++) + close(dmabuf_fd[i]); + + /* Allocate and validate all buffers are zeroed */ + for (i = 0; i < 32; i++) { + ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]); + if (ret < 0) { + printf("FAIL (Allocation (%i) failed)\n", i); + goto out; + } + + /* mmap and validate everything is zero */ + p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0); + if (p == MAP_FAILED) { + printf("FAIL (mmap() failed!)\n"); + ret = -1; + goto out; + } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START); + c = (char *)p; + for (j = 0; j < size; j++) { + if (c[j] != 0) { + printf("FAIL (Allocated buffer not zeroed @ %i)\n", j); + break; + } + } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END); + munmap(p, size); + } + /* close them all */ + for (i = 0; i < 32; i++) + close(dmabuf_fd[i]); + + close(heap_fd); + printf("OK\n"); + return 0; + +out: + while (i > 0) { + close(dmabuf_fd[i]); + i--; + } + close(heap_fd); + return ret; +} + /* Test the ioctl version compatibility w/ a smaller structure then expected */ static int dmabuf_heap_alloc_older(int fd, size_t len, unsigned int flags, int *dmabuf_fd) @@ -292,23 +370,24 @@ static int test_alloc_compat(char *heap_name) if (heap_fd < 0) return -1; - printf("Testing (theoretical)older alloc compat\n"); + printf(" Testing (theoretical)older alloc compat: "); ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("Older compat allocation failed!\n"); + printf("FAIL (Older compat allocation failed!)\n"); ret = -1; goto out; } close(dmabuf_fd); + printf("OK\n"); - printf("Testing (theoretical)newer alloc compat\n"); + printf(" Testing (theoretical)newer alloc compat: "); ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("Newer compat allocation failed!\n"); + printf("FAIL (Newer compat allocation failed!)\n"); ret = -1; goto out; } - printf("Ioctl compatibility tests passed\n"); + printf("OK\n"); out: if (dmabuf_fd >= 0) close(dmabuf_fd); @@ -327,17 +406,17 @@ static int test_alloc_errors(char *heap_name) if (heap_fd < 0) return -1; - printf("Testing expected error cases\n"); + printf(" Testing expected error cases: "); ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd); if (!ret) { - printf("Did not see expected error (invalid fd)!\n"); + printf("FAIL (Did not see expected error (invalid fd)!)\n"); ret = -1; goto out; } ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd); if (!ret) { - printf("Did not see expected error (invalid heap flags)!\n"); + printf("FAIL (Did not see expected error (invalid heap flags)!)\n"); ret = -1; goto out; } @@ -345,12 +424,12 @@ static int test_alloc_errors(char *heap_name) ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG, ~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd); if (!ret) { - printf("Did not see expected error (invalid fd flags)!\n"); + printf("FAIL (Did not see expected error (invalid fd flags)!)\n"); ret = -1; goto out; } - printf("Expected error checking passed\n"); + printf("OK\n"); ret = 0; out: if (dmabuf_fd >= 0) @@ -379,10 +458,20 @@ int main(void) if (!strncmp(dir->d_name, "..", 3)) continue; + printf("Testing heap: %s\n", dir->d_name); + printf("=======================================\n"); ret = test_alloc_and_import(dir->d_name); if (ret) break; + ret = test_alloc_zeroed(dir->d_name, 4 * 1024); + if (ret) + break; + + ret = test_alloc_zeroed(dir->d_name, ONE_MEG); + if (ret) + break; + ret = test_alloc_compat(dir->d_name); if (ret) break; diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh new file mode 100755 index 000000000000..91891b9418d7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -0,0 +1,187 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + autoneg + autoneg_force_mode +" + +NUM_NETIFS=2 +: ${TIMEOUT:=30000} # ms +source $lib_dir/lib.sh +source $lib_dir/ethtool_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up + + busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 + check_err $? "ports did not come up" + + local lanes_exist=$(ethtool $swp1 | grep 'Lanes:') + if [[ -z $lanes_exist ]]; then + log_test "SKIP: driver does not support lanes setting" + exit 1 + fi + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +check_lanes() +{ + local dev=$1; shift + local lanes=$1; shift + local max_speed=$1; shift + local chosen_lanes + + chosen_lanes=$(ethtool $dev | grep 'Lanes:') + chosen_lanes=${chosen_lanes#*"Lanes: "} + + ((chosen_lanes == lanes)) + check_err $? "swp1 advertise $max_speed and $lanes, devs sync to $chosen_lanes" +} + +check_unsupported_lanes() +{ + local dev=$1; shift + local max_speed=$1; shift + local max_lanes=$1; shift + local autoneg=$1; shift + local autoneg_str="" + + local unsupported_lanes=$((max_lanes *= 2)) + + if [[ $autoneg -eq 0 ]]; then + autoneg_str="autoneg off" + fi + + ethtool -s $swp1 speed $max_speed lanes $unsupported_lanes $autoneg_str &> /dev/null + check_fail $? "Unsuccessful $unsupported_lanes lanes setting was expected" +} + +max_speed_and_lanes_get() +{ + local dev=$1; shift + local arr=("$@") + local max_lanes + local max_speed + local -a lanes_arr + local -a speeds_arr + local -a max_values + + for ((i=0; i<${#arr[@]}; i+=2)); do + speeds_arr+=("${arr[$i]}") + lanes_arr+=("${arr[i+1]}") + done + + max_values+=($(get_max "${speeds_arr[@]}")) + max_values+=($(get_max "${lanes_arr[@]}")) + + echo ${max_values[@]} +} + +search_linkmode() +{ + local speed=$1; shift + local lanes=$1; shift + local arr=("$@") + + for ((i=0; i<${#arr[@]}; i+=2)); do + if [[ $speed -eq ${arr[$i]} && $lanes -eq ${arr[i+1]} ]]; then + return 1 + fi + done + return 0 +} + +autoneg() +{ + RET=0 + + local lanes + local max_speed + local max_lanes + + local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1)) + local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}")) + max_speed=${max_values[0]} + max_lanes=${max_values[1]} + + lanes=$max_lanes + + while [[ $lanes -ge 1 ]]; do + search_linkmode $max_speed $lanes "${linkmodes_params[@]}" + if [[ $? -eq 1 ]]; then + ethtool_set $swp1 speed $max_speed lanes $lanes + ip link set dev $swp1 up + ip link set dev $swp2 up + busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 + check_err $? "ports did not come up" + + check_lanes $swp1 $lanes $max_speed + log_test "$lanes lanes is autonegotiated" + fi + let $((lanes /= 2)) + done + + check_unsupported_lanes $swp1 $max_speed $max_lanes 1 + log_test "Lanes number larger than max width is not set" + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +autoneg_force_mode() +{ + RET=0 + + local lanes + local max_speed + local max_lanes + + local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1)) + local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}")) + max_speed=${max_values[0]} + max_lanes=${max_values[1]} + + lanes=$max_lanes + + while [[ $lanes -ge 1 ]]; do + search_linkmode $max_speed $lanes "${linkmodes_params[@]}" + if [[ $? -eq 1 ]]; then + ethtool_set $swp1 speed $max_speed lanes $lanes autoneg off + ethtool_set $swp2 speed $max_speed lanes $lanes autoneg off + ip link set dev $swp1 up + ip link set dev $swp2 up + busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 + check_err $? "ports did not come up" + + check_lanes $swp1 $lanes $max_speed + log_test "Autoneg off, $lanes lanes detected during force mode" + fi + let $((lanes /= 2)) + done + + check_unsupported_lanes $swp1 $max_speed $max_lanes 0 + log_test "Lanes number larger than max width is not set" + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ethtool -s $swp2 autoneg on + ethtool -s $swp1 autoneg on +} + +check_ethtool_lanes_support +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh index eab79b9e58cd..dcbf32b99bb6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/fib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh @@ -225,6 +225,16 @@ ipv6_local_replace() ip -n $ns link del dev dummy1 } +fib_notify_on_flag_change_set() +{ + local notify=$1; shift + + ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify + ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify + + log_info "Set fib_notify_on_flag_change to $notify" +} + setup_prepare() { ip netns add testns1 @@ -251,6 +261,10 @@ trap cleanup EXIT setup_prepare +fib_notify_on_flag_change_set 1 +tests_run + +fib_notify_on_flag_change_set 0 tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh new file mode 100644 index 000000000000..f813ffefc07e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for physical ports resource. The test splits each splittable port +# to its width and checks that eventually the number of physical ports equals +# the maximum number of physical ports. + +PORT_NUM_NETIFS=0 + +port_setup_prepare() +{ + : +} + +port_cleanup() +{ + pre_cleanup + + for port in "${unsplit[@]}"; do + devlink port unsplit $port + check_err $? "Did not unsplit $netdev" + done +} + +split_all_ports() +{ + local should_fail=$1; shift + local -a unsplit + + # Loop over the splittable netdevs and create tuples of netdev along + # with its width. For example: + # '$netdev1 $count1 $netdev2 $count2...', when: + # $netdev1-2 are splittable netdevs in the device, and + # $count1-2 are the netdevs width respectively. + while read netdev count <<<$( + devlink -j port show | + jq -r '.[][] | select(.splittable==true) | "\(.netdev) \(.lanes)"' + ) + [[ ! -z $netdev ]] + do + devlink port split $netdev count $count + check_err $? "Did not split $netdev into $count" + unsplit+=( "${netdev}s0" ) + done +} + +port_test() +{ + local max_ports=$1; shift + local should_fail=$1; shift + + split_all_ports $should_fail + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="physical_ports") |.["occ"]') + + [[ $occ -eq $max_ports ]] + if [[ $should_fail -eq 0 ]]; then + check_err $? "Mismatch ports number: Expected $max_ports, got $occ." + else + check_err_fail $should_fail $? "Reached more ports than expected" + fi + +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh index 4d900bc1f76c..5c7700212f75 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -230,7 +230,7 @@ switch_create() __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which # is (-2*MTU) about 80K of delay provision. - __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null # bridges # ------- diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh new file mode 100644 index 000000000000..0b71dfbbb447 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_scale.sh + +port_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get physical_ports) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index d7cf33a3f18d..4a1c9328555f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -28,7 +28,7 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police" +ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh new file mode 100644 index 000000000000..0b71dfbbb447 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_scale.sh + +port_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get physical_ports) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 43f662401bc3..087a884f66cd 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,7 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police" +ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh index 2f87c3be76a9..251f228ce63e 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -302,6 +302,16 @@ ipv6_error_path() ipv6_error_path_replay } +fib_notify_on_flag_change_set() +{ + local notify=$1; shift + + ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify + ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify + + log_info "Set fib_notify_on_flag_change to $notify" +} + setup_prepare() { local netdev @@ -336,6 +346,10 @@ trap cleanup EXIT setup_prepare +fib_notify_on_flag_change_set 1 +tests_run + +fib_notify_on_flag_change_set 0 tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh new file mode 100755 index 000000000000..8d91191a098c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh @@ -0,0 +1,430 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv4_route_addition_test + ipv4_route_deletion_test + ipv4_route_replacement_test + ipv4_route_offload_failed_test + ipv6_route_addition_test + ipv6_route_deletion_test + ipv6_route_replacement_test + ipv6_route_offload_failed_test +" + +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ +NUM_NETIFS=0 +source $lib_dir/lib.sh + +check_rt_offload_failed() +{ + local outfile=$1; shift + local line + + # Make sure that the first notification was emitted without + # RTM_F_OFFLOAD_FAILED flag and the second with RTM_F_OFFLOAD_FAILED + # flag + head -n 1 $outfile | grep -q "rt_offload_failed" + if [[ $? -eq 0 ]]; then + return 1 + fi + + head -n 2 $outfile | tail -n 1 | grep -q "rt_offload_failed" +} + +check_rt_trap() +{ + local outfile=$1; shift + local line + + # Make sure that the first notification was emitted without RTM_F_TRAP + # flag and the second with RTM_F_TRAP flag + head -n 1 $outfile | grep -q "rt_trap" + if [[ $? -eq 0 ]]; then + return 1 + fi + + head -n 2 $outfile | tail -n 1 | grep -q "rt_trap" +} + +route_notify_check() +{ + local outfile=$1; shift + local expected_num_lines=$1; shift + local offload_failed=${1:-0}; shift + + # check the monitor results + lines=`wc -l $outfile | cut "-d " -f1` + test $lines -eq $expected_num_lines + check_err $? "$expected_num_lines notifications were expected but $lines were received" + + if [[ $expected_num_lines -eq 1 ]]; then + return + fi + + if [[ $offload_failed -eq 0 ]]; then + check_rt_trap $outfile + check_err $? "Wrong RTM_F_TRAP flags in notifications" + else + check_rt_offload_failed $outfile + check_err $? "Wrong RTM_F_OFFLOAD_FAILED flags in notifications" + fi +} + +route_addition_check() +{ + local ip=$1; shift + local notify=$1; shift + local route=$1; shift + local expected_num_notifications=$1; shift + local offload_failed=${1:-0}; shift + + ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify + + local outfile=$(mktemp) + + $IP monitor route &> $outfile & + sleep 1 + $IP route add $route dev dummy1 + sleep 1 + kill %% && wait %% &> /dev/null + + route_notify_check $outfile $expected_num_notifications $offload_failed + rm -f $outfile + + $IP route del $route dev dummy1 +} + +ipv4_route_addition_test() +{ + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + # route_addition_check will assign value to RET. + route_addition_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the programmed route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications + + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the programmed + # route. + notify=2 + expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications + + log_test "IPv4 route addition" +} + +route_deletion_check() +{ + local ip=$1; shift + local notify=$1; shift + local route=$1; shift + local expected_num_notifications=$1; shift + + ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify + $IP route add $route dev dummy1 + sleep 1 + + local outfile=$(mktemp) + + $IP monitor route &> $outfile & + sleep 1 + $IP route del $route dev dummy1 + sleep 1 + kill %% && wait %% &> /dev/null + + route_notify_check $outfile $expected_num_notifications + rm -f $outfile +} + +ipv4_route_deletion_test() +{ + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + local expected_num_notifications=1 + + # Make sure a single notification will be emitted for the deleted route, + # regardless of fib_notify_on_flag_change value. + local notify=0 + # route_deletion_check will assign value to RET. + route_deletion_check $ip $notify $route $expected_num_notifications + + notify=1 + route_deletion_check $ip $notify $route $expected_num_notifications + + log_test "IPv4 route deletion" +} + +route_replacement_check() +{ + local ip=$1; shift + local notify=$1; shift + local route=$1; shift + local expected_num_notifications=$1; shift + + ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify + $IP route add $route dev dummy1 + sleep 1 + + local outfile=$(mktemp) + + $IP monitor route &> $outfile & + sleep 1 + $IP route replace $route dev dummy2 + sleep 1 + kill %% && wait %% &> /dev/null + + route_notify_check $outfile $expected_num_notifications + rm -f $outfile + + $IP route del $route dev dummy2 +} + +ipv4_route_replacement_test() +{ + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + + $IP link add name dummy2 type dummy + $IP link set dev dummy2 up + + # Make sure a single notification will be emitted for the new route. + local notify=0 + local expected_num_notifications=1 + # route_replacement_check will assign value to RET. + route_replacement_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_replacement_check $ip $notify $route $expected_num_notifications + + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the new route. + notify=2 + expected_num_notifications=1 + route_replacement_check $ip $notify $route $expected_num_notifications + + $IP link del name dummy2 + + log_test "IPv4 route replacement" +} + +ipv4_route_offload_failed_test() +{ + + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + local offload_failed=1 + + echo "y"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload to fail" + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # notify=2 means emit notifications only for failed route installation, + # make sure two notifications will be emitted for the new route. + notify=2 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + echo "n"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload not to fail" + + log_test "IPv4 route offload failed" +} + +ipv6_route_addition_test() +{ + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the programmed route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications + + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the programmed + # route. + notify=2 + expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications + + log_test "IPv6 route addition" +} + +ipv6_route_deletion_test() +{ + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + local expected_num_notifications=1 + + # Make sure a single notification will be emitted for the deleted route, + # regardless of fib_notify_on_flag_change value. + local notify=0 + route_deletion_check $ip $notify $route $expected_num_notifications + + notify=1 + route_deletion_check $ip $notify $route $expected_num_notifications + + log_test "IPv6 route deletion" +} + +ipv6_route_replacement_test() +{ + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + + $IP link add name dummy2 type dummy + $IP link set dev dummy2 up + + # Make sure a single notification will be emitted for the new route. + local notify=0 + local expected_num_notifications=1 + route_replacement_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_replacement_check $ip $notify $route $expected_num_notifications + + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the new route. + notify=2 + expected_num_notifications=1 + route_replacement_check $ip $notify $route $expected_num_notifications + + $IP link del name dummy2 + + log_test "IPv6 route replacement" +} + +ipv6_route_offload_failed_test() +{ + + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + local offload_failed=1 + + echo "y"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload to fail" + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # notify=2 means emit notifications only for failed route installation, + # make sure two notifications will be emitted for the new route. + notify=2 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + echo "n"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload not to fail" + + log_test "IPv6 route offload failed" +} + +setup_prepare() +{ + modprobe netdevsim &> /dev/null + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + ip netns add testns1 + + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + devlink dev reload $DEVLINK_DEV netns testns1 + + if [ $? -ne 0 ]; then + echo "Failed to reload into netns \"testns1\"" + exit 1 + fi + + IP="ip -n testns1" + + $IP link add name dummy1 type dummy + $IP link set dev dummy1 up +} + +cleanup() +{ + pre_cleanup + + $IP link del name dummy1 + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc index 59216f3cfb12..2968cdc7df30 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc @@ -32,6 +32,10 @@ grep "myevent[[:space:]]u64 var1" synthetic_events # it is not possible to add same name event ! echo "myevent u64 var2" >> synthetic_events +# make sure !synthetic event doesn't require a field +echo "!myevent" >> synthetic_events +echo "myevent u64 var1" >> synthetic_events + # Non-append open will cleanup all events and add new one echo "myevent u64 var2" > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc index ada594fe16cb..955e3ceea44b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc @@ -1,19 +1,38 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test synthetic_events syntax parser errors -# requires: synthetic_events error_log +# requires: synthetic_events error_log "char name[]' >> synthetic_events":README check_error() { # command-with-error-pos-by-^ ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events' } +check_dyn_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'synthetic_events' "$1" 'dynamic_events' +} + check_error 'myevent ^chr arg' # INVALID_TYPE -check_error 'myevent ^char str[];; int v' # INVALID_TYPE -check_error 'myevent char ^str]; int v' # INVALID_NAME -check_error 'myevent char ^str;[]' # INVALID_NAME -check_error 'myevent ^char str[; int v' # INVALID_TYPE -check_error '^mye;vent char str[]' # BAD_NAME -check_error 'myevent char str[]; ^int' # INVALID_FIELD -check_error '^myevent' # INCOMPLETE_CMD +check_error 'myevent ^unsigned arg' # INCOMPLETE_TYPE + +check_error 'myevent char ^str]; int v' # BAD_NAME +check_error '^mye-vent char str[]' # BAD_NAME +check_error 'myevent char ^st-r[]' # BAD_NAME + +check_error 'myevent char str;^[]' # INVALID_FIELD +check_error 'myevent char str; ^int' # INVALID_FIELD + +check_error 'myevent char ^str[; int v' # INVALID_ARRAY_SPEC +check_error 'myevent char ^str[kdjdk]' # INVALID_ARRAY_SPEC +check_error 'myevent char ^str[257]' # INVALID_ARRAY_SPEC + +check_error '^mye;vent char str[]' # INVALID_CMD +check_error '^myevent ; char str[]' # INVALID_CMD +check_error '^myevent; char str[]' # INVALID_CMD +check_error '^myevent ;char str[]' # INVALID_CMD +check_error '^; char str[]' # INVALID_CMD +check_error '^;myevent char str[]' # INVALID_CMD +check_error '^myevent' # INVALID_CMD + +check_dyn_error '^s:junk/myevent char str[' # INVALID_DYN_CMD exit 0 diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 41582fe485ee..39f2bbe8dd3d 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -1,31 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -VAR_CFLAGS := $(shell pkg-config --cflags mount 2>/dev/null) -VAR_LDLIBS := $(shell pkg-config --libs mount 2>/dev/null) -ifeq ($(VAR_LDLIBS),) -VAR_LDLIBS := -lmount -I/usr/include/libmount -endif - -CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ $(VAR_CFLAGS) -LDLIBS += $(VAR_LDLIBS) - TEST_PROGS := gpio-mockup.sh TEST_FILES := gpio-mockup-sysfs.sh -TEST_GEN_PROGS_EXTENDED := gpio-mockup-chardev +TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev -KSFT_KHDR_INSTALL := 1 include ../lib.mk - -GPIODIR := $(realpath ../../../gpio) -GPIOOUT := $(OUTPUT)/tools-gpio/ -GPIOOBJ := $(GPIOOUT)/gpio-utils.o - -CLEAN += ; $(RM) -rf $(GPIOOUT) - -$(TEST_GEN_PROGS_EXTENDED): $(GPIOOBJ) - -$(GPIOOUT): - mkdir -p $@ - -$(GPIOOBJ): $(GPIOOUT) - $(MAKE) OUTPUT=$(GPIOOUT) -C $(GPIODIR) diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config index abaa6902b7b6..ce100342c20b 100644 --- a/tools/testing/selftests/gpio/config +++ b/tools/testing/selftests/gpio/config @@ -1,2 +1,3 @@ CONFIG_GPIOLIB=y +CONFIG_GPIO_CDEV=y CONFIG_GPIO_MOCKUP=m diff --git a/tools/testing/selftests/gpio/gpio-mockup-cdev.c b/tools/testing/selftests/gpio/gpio-mockup-cdev.c new file mode 100644 index 000000000000..e83eac71621a --- /dev/null +++ b/tools/testing/selftests/gpio/gpio-mockup-cdev.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GPIO mockup cdev test helper + * + * Copyright (C) 2020 Kent Gibson + */ + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <linux/gpio.h> + +#define CONSUMER "gpio-mockup-cdev" + +static int request_line_v2(int cfd, unsigned int offset, + uint64_t flags, unsigned int val) +{ + struct gpio_v2_line_request req; + int ret; + + memset(&req, 0, sizeof(req)); + req.num_lines = 1; + req.offsets[0] = offset; + req.config.flags = flags; + strcpy(req.consumer, CONSUMER); + if (flags & GPIO_V2_LINE_FLAG_OUTPUT) { + req.config.num_attrs = 1; + req.config.attrs[0].mask = 1; + req.config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES; + if (val) + req.config.attrs[0].attr.values = 1; + } + ret = ioctl(cfd, GPIO_V2_GET_LINE_IOCTL, &req); + if (ret == -1) + return -errno; + return req.fd; +} + + +static int get_value_v2(int lfd) +{ + struct gpio_v2_line_values vals; + int ret; + + memset(&vals, 0, sizeof(vals)); + vals.mask = 1; + ret = ioctl(lfd, GPIO_V2_LINE_GET_VALUES_IOCTL, &vals); + if (ret == -1) + return -errno; + return vals.bits & 0x1; +} + +static int request_line_v1(int cfd, unsigned int offset, + uint32_t flags, unsigned int val) +{ + struct gpiohandle_request req; + int ret; + + memset(&req, 0, sizeof(req)); + req.lines = 1; + req.lineoffsets[0] = offset; + req.flags = flags; + strcpy(req.consumer_label, CONSUMER); + if (flags & GPIOHANDLE_REQUEST_OUTPUT) + req.default_values[0] = val; + + ret = ioctl(cfd, GPIO_GET_LINEHANDLE_IOCTL, &req); + if (ret == -1) + return -errno; + return req.fd; +} + +static int get_value_v1(int lfd) +{ + struct gpiohandle_data vals; + int ret; + + memset(&vals, 0, sizeof(vals)); + ret = ioctl(lfd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &vals); + if (ret == -1) + return -errno; + return vals.values[0]; +} + +static void usage(char *prog) +{ + printf("Usage: %s [-l] [-b <bias>] [-s <value>] [-u <uAPI>] <gpiochip> <offset>\n", prog); + printf(" -b: set line bias to one of pull-down, pull-up, disabled\n"); + printf(" (default is to leave bias unchanged):\n"); + printf(" -l: set line active low (default is active high)\n"); + printf(" -s: set line value (default is to get line value)\n"); + printf(" -u: uAPI version to use (default is 2)\n"); + exit(-1); +} + +static int wait_signal(void) +{ + int sig; + sigset_t wset; + + sigemptyset(&wset); + sigaddset(&wset, SIGHUP); + sigaddset(&wset, SIGINT); + sigaddset(&wset, SIGTERM); + sigwait(&wset, &sig); + + return sig; +} + +int main(int argc, char *argv[]) +{ + char *chip; + int opt, ret, cfd, lfd; + unsigned int offset, val, abiv; + uint32_t flags_v1; + uint64_t flags_v2; + + abiv = 2; + ret = 0; + flags_v1 = GPIOHANDLE_REQUEST_INPUT; + flags_v2 = GPIO_V2_LINE_FLAG_INPUT; + + while ((opt = getopt(argc, argv, "lb:s:u:")) != -1) { + switch (opt) { + case 'l': + flags_v1 |= GPIOHANDLE_REQUEST_ACTIVE_LOW; + flags_v2 |= GPIO_V2_LINE_FLAG_ACTIVE_LOW; + break; + case 'b': + if (strcmp("pull-up", optarg) == 0) { + flags_v1 |= GPIOHANDLE_REQUEST_BIAS_PULL_UP; + flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_PULL_UP; + } else if (strcmp("pull-down", optarg) == 0) { + flags_v1 |= GPIOHANDLE_REQUEST_BIAS_PULL_DOWN; + flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN; + } else if (strcmp("disabled", optarg) == 0) { + flags_v1 |= GPIOHANDLE_REQUEST_BIAS_DISABLE; + flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_DISABLED; + } + break; + case 's': + val = atoi(optarg); + flags_v1 &= ~GPIOHANDLE_REQUEST_INPUT; + flags_v1 |= GPIOHANDLE_REQUEST_OUTPUT; + flags_v2 &= ~GPIO_V2_LINE_FLAG_INPUT; + flags_v2 |= GPIO_V2_LINE_FLAG_OUTPUT; + break; + case 'u': + abiv = atoi(optarg); + break; + default: + usage(argv[0]); + } + } + + if (argc < optind + 2) + usage(argv[0]); + + chip = argv[optind]; + offset = atoi(argv[optind + 1]); + + cfd = open(chip, 0); + if (cfd == -1) { + fprintf(stderr, "Failed to open %s: %s\n", chip, strerror(errno)); + return -errno; + } + + if (abiv == 1) + lfd = request_line_v1(cfd, offset, flags_v1, val); + else + lfd = request_line_v2(cfd, offset, flags_v2, val); + + close(cfd); + + if (lfd < 0) { + fprintf(stderr, "Failed to request %s:%d: %s\n", chip, offset, strerror(-lfd)); + return lfd; + } + + if (flags_v2 & GPIO_V2_LINE_FLAG_OUTPUT) { + wait_signal(); + } else { + if (abiv == 1) + ret = get_value_v1(lfd); + else + ret = get_value_v2(lfd); + } + + close(lfd); + + return ret; +} diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c deleted file mode 100644 index 73ead8828d3a..000000000000 --- a/tools/testing/selftests/gpio/gpio-mockup-chardev.c +++ /dev/null @@ -1,323 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * GPIO chardev test helper - * - * Copyright (C) 2016 Bamvor Jian Zhang - */ - -#define _GNU_SOURCE -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <getopt.h> -#include <sys/ioctl.h> -#include <libmount.h> -#include <err.h> -#include <dirent.h> -#include <linux/gpio.h> -#include "../../../gpio/gpio-utils.h" - -#define CONSUMER "gpio-selftest" -#define GC_NUM 10 -enum direction { - OUT, - IN -}; - -static int get_debugfs(char **path) -{ - struct libmnt_context *cxt; - struct libmnt_table *tb; - struct libmnt_iter *itr = NULL; - struct libmnt_fs *fs; - int found = 0, ret; - - cxt = mnt_new_context(); - if (!cxt) - err(EXIT_FAILURE, "libmount context allocation failed"); - - itr = mnt_new_iter(MNT_ITER_FORWARD); - if (!itr) - err(EXIT_FAILURE, "failed to initialize libmount iterator"); - - if (mnt_context_get_mtab(cxt, &tb)) - err(EXIT_FAILURE, "failed to read mtab"); - - while (mnt_table_next_fs(tb, itr, &fs) == 0) { - const char *type = mnt_fs_get_fstype(fs); - - if (!strcmp(type, "debugfs")) { - found = 1; - break; - } - } - if (found) { - ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs)); - if (ret < 0) - err(EXIT_FAILURE, "failed to format string"); - } - - mnt_free_iter(itr); - mnt_free_context(cxt); - - if (!found) - return -1; - - return 0; -} - -static int gpio_debugfs_get(const char *consumer, int *dir, int *value) -{ - char *debugfs; - FILE *f; - char *line = NULL; - size_t len = 0; - char *cur; - int found = 0; - - if (get_debugfs(&debugfs) != 0) - err(EXIT_FAILURE, "debugfs is not mounted"); - - f = fopen(debugfs, "r"); - if (!f) - err(EXIT_FAILURE, "read from gpio debugfs failed"); - - /* - * gpio-2 ( |gpio-selftest ) in lo - */ - while (getline(&line, &len, f) != -1) { - cur = strstr(line, consumer); - if (cur == NULL) - continue; - - cur = strchr(line, ')'); - if (!cur) - continue; - - cur += 2; - if (!strncmp(cur, "out", 3)) { - *dir = OUT; - cur += 4; - } else if (!strncmp(cur, "in", 2)) { - *dir = IN; - cur += 4; - } - - if (!strncmp(cur, "hi", 2)) - *value = 1; - else if (!strncmp(cur, "lo", 2)) - *value = 0; - - found = 1; - break; - } - free(debugfs); - fclose(f); - free(line); - - if (!found) - return -1; - - return 0; -} - -static struct gpiochip_info *list_gpiochip(const char *gpiochip_name, int *ret) -{ - struct gpiochip_info *cinfo; - struct gpiochip_info *current; - const struct dirent *ent; - DIR *dp; - char *chrdev_name; - int fd; - int i = 0; - - cinfo = calloc(sizeof(struct gpiochip_info) * 4, GC_NUM + 1); - if (!cinfo) - err(EXIT_FAILURE, "gpiochip_info allocation failed"); - - current = cinfo; - dp = opendir("/dev"); - if (!dp) { - *ret = -errno; - goto error_out; - } else { - *ret = 0; - } - - while (ent = readdir(dp), ent) { - if (check_prefix(ent->d_name, "gpiochip")) { - *ret = asprintf(&chrdev_name, "/dev/%s", ent->d_name); - if (*ret < 0) - goto error_out; - - fd = open(chrdev_name, 0); - if (fd == -1) { - *ret = -errno; - fprintf(stderr, "Failed to open %s\n", - chrdev_name); - goto error_close_dir; - } - *ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, current); - if (*ret == -1) { - perror("Failed to issue CHIPINFO IOCTL\n"); - goto error_close_dir; - } - close(fd); - if (strcmp(current->label, gpiochip_name) == 0 - || check_prefix(current->label, gpiochip_name)) { - *ret = 0; - current++; - i++; - } - } - } - - if ((!*ret && i == 0) || *ret < 0) { - free(cinfo); - cinfo = NULL; - } - if (!*ret && i > 0) { - cinfo = realloc(cinfo, sizeof(struct gpiochip_info) * 4 * i); - *ret = i; - } - -error_close_dir: - closedir(dp); -error_out: - if (*ret < 0) - err(EXIT_FAILURE, "list gpiochip failed: %s", strerror(*ret)); - - return cinfo; -} - -int gpio_pin_test(struct gpiochip_info *cinfo, int line, int flag, int value) -{ - struct gpiohandle_data data; - unsigned int lines[] = {line}; - int fd; - int debugfs_dir = IN; - int debugfs_value = 0; - int ret; - - data.values[0] = value; - ret = gpiotools_request_linehandle(cinfo->name, lines, 1, flag, &data, - CONSUMER); - if (ret < 0) - goto fail_out; - else - fd = ret; - - ret = gpio_debugfs_get(CONSUMER, &debugfs_dir, &debugfs_value); - if (ret) { - ret = -EINVAL; - goto fail_out; - } - if (flag & GPIOHANDLE_REQUEST_INPUT) { - if (debugfs_dir != IN) { - errno = -EINVAL; - ret = -errno; - } - } else if (flag & GPIOHANDLE_REQUEST_OUTPUT) { - if (flag & GPIOHANDLE_REQUEST_ACTIVE_LOW) - debugfs_value = !debugfs_value; - - if (!(debugfs_dir == OUT && value == debugfs_value)) { - errno = -EINVAL; - ret = -errno; - } - } - gpiotools_release_linehandle(fd); - -fail_out: - if (ret) - err(EXIT_FAILURE, "gpio<%s> line<%d> test flag<0x%x> value<%d>", - cinfo->name, line, flag, value); - - return ret; -} - -void gpio_pin_tests(struct gpiochip_info *cinfo, unsigned int line) -{ - printf("line<%d>", line); - gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 0); - printf("."); - gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 1); - printf("."); - gpio_pin_test(cinfo, line, - GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW, - 0); - printf("."); - gpio_pin_test(cinfo, line, - GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW, - 1); - printf("."); - gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_INPUT, 0); - printf("."); -} - -/* - * ./gpio-mockup-chardev gpio_chip_name_prefix is_valid_gpio_chip - * Return 0 if successful or exit with EXIT_FAILURE if test failed. - * gpio_chip_name_prefix: The prefix of gpiochip you want to test. E.g. - * gpio-mockup - * is_valid_gpio_chip: Whether the gpio_chip is valid. 1 means valid, - * 0 means invalid which could not be found by - * list_gpiochip. - */ -int main(int argc, char *argv[]) -{ - char *prefix; - int valid; - struct gpiochip_info *cinfo; - struct gpiochip_info *current; - int i; - int ret; - - if (argc < 3) { - printf("Usage: %s prefix is_valid", argv[0]); - exit(EXIT_FAILURE); - } - - prefix = argv[1]; - valid = strcmp(argv[2], "true") == 0 ? 1 : 0; - - printf("Test gpiochip %s: ", prefix); - cinfo = list_gpiochip(prefix, &ret); - if (!cinfo) { - if (!valid && ret == 0) { - printf("Invalid test successful\n"); - ret = 0; - goto out; - } else { - ret = -EINVAL; - goto out; - } - } else if (cinfo && !valid) { - ret = -EINVAL; - goto out; - } - current = cinfo; - for (i = 0; i < ret; i++) { - gpio_pin_tests(current, 0); - gpio_pin_tests(current, current->lines - 1); - gpio_pin_tests(current, random() % current->lines); - current++; - } - ret = 0; - printf("successful\n"); - -out: - if (ret) - fprintf(stderr, "gpio<%s> test failed\n", prefix); - - if (cinfo) - free(cinfo); - - if (ret) - exit(EXIT_FAILURE); - - return ret; -} diff --git a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh index dd269d877562..2d2e5d8763b6 100755 --- a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh +++ b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh @@ -1,135 +1,77 @@ # SPDX-License-Identifier: GPL-2.0 -is_consistent() -{ - val= - - active_low_sysfs=`cat $GPIO_SYSFS/gpio$nr/active_low` - val_sysfs=`cat $GPIO_SYSFS/gpio$nr/value` - dir_sysfs=`cat $GPIO_SYSFS/gpio$nr/direction` - gpio_this_debugfs=`cat $GPIO_DEBUGFS |grep "gpio-$nr" | sed "s/(.*)//g"` - dir_debugfs=`echo $gpio_this_debugfs | awk '{print $2}'` - val_debugfs=`echo $gpio_this_debugfs | awk '{print $3}'` - if [ $val_debugfs = "lo" ]; then - val=0 - elif [ $val_debugfs = "hi" ]; then - val=1 - fi +# Overrides functions in gpio-mockup.sh to test using the GPIO SYSFS uAPI - if [ $active_low_sysfs = "1" ]; then - if [ $val = "0" ]; then - val="1" - else - val="0" - fi - fi +SYSFS=`grep -w sysfs /proc/mounts | cut -f2 -d' '` +[ -d "$SYSFS" ] || skip "sysfs is not mounted" - if [ $val_sysfs = $val ] && [ $dir_sysfs = $dir_debugfs ]; then - echo -n "." - else - echo "test fail, exit" - die - fi -} +GPIO_SYSFS="${SYSFS}/class/gpio" +[ -d "$GPIO_SYSFS" ] || skip "CONFIG_GPIO_SYSFS is not selected" -test_pin_logic() -{ - nr=$1 - direction=$2 - active_low=$3 - value=$4 +PLATFORM_SYSFS=$SYSFS/devices/platform - echo $direction > $GPIO_SYSFS/gpio$nr/direction - echo $active_low > $GPIO_SYSFS/gpio$nr/active_low - if [ $direction = "out" ]; then - echo $value > $GPIO_SYSFS/gpio$nr/value - fi - is_consistent $nr -} +sysfs_nr= +sysfs_ldir= -test_one_pin() +# determine the sysfs GPIO number given the $chip and $offset +# e.g. gpiochip1:32 +find_sysfs_nr() { - nr=$1 - - echo -n "test pin<$nr>" - - echo $nr > $GPIO_SYSFS/export 2>/dev/null - - if [ X$? != X0 ]; then - echo "test GPIO pin $nr failed" - die - fi - - #"Checking if the sysfs is consistent with debugfs: " - is_consistent $nr - - #"Checking the logic of active_low: " - test_pin_logic $nr out 1 1 - test_pin_logic $nr out 1 0 - test_pin_logic $nr out 0 1 - test_pin_logic $nr out 0 0 - - #"Checking the logic of direction: " - test_pin_logic $nr in 1 1 - test_pin_logic $nr out 1 0 - test_pin_logic $nr low 0 1 - test_pin_logic $nr high 0 0 - - echo $nr > $GPIO_SYSFS/unexport - - echo "successful" + # e.g. /sys/devices/platform/gpio-mockup.1/gpiochip1 + local platform=$(find $PLATFORM_SYSFS -mindepth 2 -maxdepth 2 -type d -name $chip) + [ "$platform" ] || fail "can't find platform of $chip" + # e.g. /sys/devices/platform/gpio-mockup.1/gpio/gpiochip508/base + local base=$(find ${platform%/*}/gpio/ -mindepth 2 -maxdepth 2 -type f -name base) + [ "$base" ] || fail "can't find base of $chip" + sysfs_nr=$(($(< "$base") + $offset)) + sysfs_ldir="$GPIO_SYSFS/gpio$sysfs_nr" } -test_one_pin_fail() +acquire_line() { - nr=$1 - - echo $nr > $GPIO_SYSFS/export 2>/dev/null - - if [ X$? != X0 ]; then - echo "test invalid pin $nr successful" - else - echo "test invalid pin $nr failed" - echo $nr > $GPIO_SYSFS/unexport 2>/dev/null - die - fi + [ "$sysfs_nr" ] && return + find_sysfs_nr + echo "$sysfs_nr" > "$GPIO_SYSFS/export" } -list_chip() +# The helpers being overridden... +get_line() { - echo `ls -d $GPIO_DRV_SYSFS/gpiochip* 2>/dev/null` + [ -e "$sysfs_ldir/value" ] && echo $(< "$sysfs_ldir/value") } -test_chip() +set_line() { - chip=$1 - name=`basename $chip` - base=`cat $chip/base` - ngpio=`cat $chip/ngpio` - printf "%-10s %-5s %-5s\n" $name $base $ngpio - if [ $ngpio = "0" ]; then - echo "number of gpio is zero is not allowed". - fi - test_one_pin $base - test_one_pin $(($base + $ngpio - 1)) - test_one_pin $((( RANDOM % $ngpio ) + $base )) + acquire_line + + for option in $*; do + case $option in + active-high) + echo 0 > "$sysfs_ldir/active_low" + ;; + active-low) + echo 1 > "$sysfs_ldir/active_low" + ;; + input) + echo "in" > "$sysfs_ldir/direction" + ;; + 0) + echo "out" > "$sysfs_ldir/direction" + echo 0 > "$sysfs_ldir/value" + ;; + 1) + echo "out" > "$sysfs_ldir/direction" + echo 1 > "$sysfs_ldir/value" + ;; + esac + done } -test_chips_sysfs() +release_line() { - gpiochip=`list_chip $module` - if [ X"$gpiochip" = X ]; then - if [ X"$valid" = Xfalse ]; then - echo "successful" - else - echo "fail" - die - fi - else - for chip in $gpiochip; do - test_chip $chip - done - fi + [ "$sysfs_nr" ] || return 0 + echo "$sysfs_nr" > "$GPIO_SYSFS/unexport" + sysfs_nr= + sysfs_ldir= } - diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh index 7f35b9880485..0d6c5f7f95d2 100755 --- a/tools/testing/selftests/gpio/gpio-mockup.sh +++ b/tools/testing/selftests/gpio/gpio-mockup.sh @@ -1,72 +1,57 @@ -#!/bin/bash +#!/bin/bash -efu # SPDX-License-Identifier: GPL-2.0 #exit status -#1: Internal error -#2: sysfs/debugfs not mount -#3: insert module fail when gpio-mockup is a module. -#4: Skip test including run as non-root user. -#5: other reason. - -SYSFS= -GPIO_SYSFS= -GPIO_DRV_SYSFS= +#0: success +#1: fail +#4: skip test - including run as non-root user + +BASE=${0%/*} DEBUGFS= GPIO_DEBUGFS= -dev_type= -module= +dev_type="cdev" +module="gpio-mockup" +verbose= +full_test= +random= +uapi_opt= +active_opt= +bias_opt= +line_set_pid= -# Kselftest framework requirement - SKIP code is 4. +# Kselftest return codes +ksft_fail=1 ksft_skip=4 usage() { echo "Usage:" - echo "$0 [-f] [-m name] [-t type]" - echo "-f: full test. It maybe conflict with existence gpio device." - echo "-m: module name, default name is gpio-mockup. It could also test" - echo " other gpio device." - echo "-t: interface type: chardev(char device) and sysfs(being" - echo " deprecated). The first one is default" - echo "" - echo "$0 -h" - echo "This usage" + echo "$0 [-frv] [-t type]" + echo "-f: full test (minimal set run by default)" + echo "-r: test random lines as well as fence posts" + echo "-t: interface type:" + echo " cdev (character device ABI) - default" + echo " cdev_v1 (deprecated character device ABI)" + echo " sysfs (deprecated SYSFS ABI)" + echo "-v: verbose progress reporting" + exit $ksft_fail } -prerequisite() +skip() { - msg="skip all tests:" - if [ $UID != 0 ]; then - echo $msg must be run as root >&2 - exit $ksft_skip - fi - SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` - if [ ! -d "$SYSFS" ]; then - echo $msg sysfs is not mounted >&2 - exit 2 - fi - GPIO_SYSFS=`echo $SYSFS/class/gpio` - GPIO_DRV_SYSFS=`echo $SYSFS/devices/platform/$module/gpio` - DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'` - if [ ! -d "$DEBUGFS" ]; then - echo $msg debugfs is not mounted >&2 - exit 2 - fi - GPIO_DEBUGFS=`echo $DEBUGFS/gpio` - source gpio-mockup-sysfs.sh + echo "$*" >&2 + echo "GPIO $module test SKIP" + exit $ksft_skip } -try_insert_module() +prerequisite() { - if [ -d "$GPIO_DRV_SYSFS" ]; then - echo "$GPIO_DRV_SYSFS exist. Skip insert module" - else - modprobe -q $module $1 - if [ X$? != X0 ]; then - echo $msg insmod $module failed >&2 - exit 3 - fi - fi + [ $(id -u) -eq 0 ] || skip "must be run as root" + + DEBUGFS=$(grep -w debugfs /proc/mounts | cut -f2 -d' ') + [ -d "$DEBUGFS" ] || skip "debugfs is not mounted" + + GPIO_DEBUGFS=$DEBUGFS/$module } remove_module() @@ -74,133 +59,345 @@ remove_module() modprobe -r -q $module } -die() +cleanup() { + set +e + release_line remove_module - exit 5 + jobs -p | xargs -r kill > /dev/null 2>&1 } -test_chips() +fail() { - if [ X$dev_type = Xsysfs ]; then - echo "WARNING: sysfs ABI of gpio is going to deprecated." - test_chips_sysfs $* - else - $BASE/gpio-mockup-chardev $* - fi + echo "test failed: $*" >&2 + echo "GPIO $module test FAIL" + exit $ksft_fail +} + +try_insert_module() +{ + modprobe -q $module "$1" || fail "insert $module failed with error $?" +} + +log() +{ + [ -z "$verbose" ] || echo "$*" } -gpio_test() +# The following line helpers, release_Line, get_line and set_line, all +# make use of the global $chip and $offset variables. +# +# This implementation drives the GPIO character device (cdev) uAPI. +# Other implementations may override these to test different uAPIs. + +# Release any resources related to the line +release_line() { - param=$1 - valid=$2 + [ "$line_set_pid" ] && kill $line_set_pid && wait $line_set_pid || true + line_set_pid= +} - if [ X"$param" = X ]; then - die +# Read the current value of the line +get_line() +{ + release_line + + local cdev_opts=${uapi_opt}${active_opt} + $BASE/gpio-mockup-cdev $cdev_opts /dev/$chip $offset + echo $? +} + +# Set the state of the line +# +# Changes to line configuration are provided as parameters. +# The line is assumed to be an output if the line value 0 or 1 is +# specified, else an input. +set_line() +{ + local val= + + release_line + + # parse config options... + for option in $*; do + case $option in + active-low) + active_opt="-l " + ;; + active-high) + active_opt= + ;; + bias-none) + bias_opt= + ;; + pull-down) + bias_opt="-bpull-down " + ;; + pull-up) + bias_opt="-bpull-up " + ;; + 0) + val=0 + ;; + 1) + val=1 + ;; + esac + done + + local cdev_opts=${uapi_opt}${active_opt} + if [ "$val" ]; then + $BASE/gpio-mockup-cdev $cdev_opts -s$val /dev/$chip $offset & + # failure to set is detected by reading mockup and toggling values + line_set_pid=$! + # allow for gpio-mockup-cdev to launch and request line + # (there is limited value in checking if line has been requested) + sleep 0.01 + elif [ "$bias_opt" ]; then + cdev_opts=${cdev_opts}${bias_opt} + $BASE/gpio-mockup-cdev $cdev_opts /dev/$chip $offset || true fi - try_insert_module "gpio_mockup_ranges=$param" - echo -n "GPIO $module test with ranges: <" - echo "$param>: " - printf "%-10s %s\n" $param - test_chips $module $valid - remove_module } -BASE=`dirname $0` +assert_line() +{ + local val + # don't need any retry here as set_mock allows for propagation + val=$(get_line) + [ "$val" = "$1" ] || fail "line value is ${val:-empty} when $1 was expected" +} + +# The following mockup helpers all make use of the $mock_line +assert_mock() +{ + local backoff_wait=10 + local retry=0 + local val + # retry allows for set propagation from uAPI to mockup + while true; do + val=$(< $mock_line) + [ "$val" = "$1" ] && break + retry=$((retry + 1)) + [ $retry -lt 5 ] || fail "mockup $mock_line value ${val:-empty} when $1 expected" + sleep $(printf "%0.2f" $((backoff_wait))e-3) + backoff_wait=$((backoff_wait * 2)) + done +} + +set_mock() +{ + echo "$1" > $mock_line + # allow for set propagation - so we won't be in a race with set_line + assert_mock "$1" +} -dev_type= -TEMP=`getopt -o fhm:t: -n '$0' -- "$@"` +# test the functionality of a line +# +# The line is set from the mockup side and is read from the userspace side +# (input), and is set from the userspace side and is read from the mockup side +# (output). +# +# Setting the mockup pull using the userspace interface bias settings is +# tested where supported by the userspace interface (cdev). +test_line() +{ + chip=$1 + offset=$2 + log "test_line $chip $offset" + mock_line=$GPIO_DEBUGFS/$chip/$offset + [ -e "$mock_line" ] || fail "missing line $chip:$offset" -if [ "$?" != "0" ]; then - echo "Parameter process failed, Terminating..." >&2 - exit 1 -fi + # test input active-high + set_mock 1 + set_line input active-high + assert_line 1 + set_mock 0 + assert_line 0 + set_mock 1 + assert_line 1 + + if [ "$full_test" ]; then + if [ "$dev_type" != "sysfs" ]; then + # test pulls + set_mock 0 + set_line input pull-up + assert_line 1 + set_mock 0 + assert_line 0 + + set_mock 1 + set_line input pull-down + assert_line 0 + set_mock 1 + assert_line 1 + + set_line bias-none + fi + + # test input active-low + set_mock 0 + set_line active-low + assert_line 1 + set_mock 1 + assert_line 0 + set_mock 0 + assert_line 1 + + # test output active-high + set_mock 1 + set_line active-high 0 + assert_mock 0 + set_line 1 + assert_mock 1 + set_line 0 + assert_mock 0 + fi + + # test output active-low + set_mock 0 + set_line active-low 0 + assert_mock 1 + set_line 1 + assert_mock 0 + set_line 0 + assert_mock 1 + + release_line +} + +test_no_line() +{ + log test_no_line "$*" + [ ! -e "$GPIO_DEBUGFS/$1/$2" ] || fail "unexpected line $1:$2" +} -# Note the quotes around `$TEMP': they are essential! -eval set -- "$TEMP" +# Load the module and check that the expected number of gpiochips, with the +# expected number of lines, are created and are functional. +# +# $1 is the gpio_mockup_ranges parameter for the module +# The remaining parameters are the number of lines, n, expected for each of +# the gpiochips expected to be created. +# +# For each gpiochip the fence post lines, 0 and n-1, are tested, and the +# line on the far side of the fence post, n, is tested to not exist. +# +# If the $random flag is set then a random line in the middle of the +# gpiochip is tested as well. +insmod_test() +{ + local ranges= + local gc= + local width= -while true; do - case $1 in - -f) + [ "${1:-}" ] || fail "missing ranges" + ranges=$1 ; shift + try_insert_module "gpio_mockup_ranges=$ranges" + log "GPIO $module test with ranges: <$ranges>:" + # e.g. /sys/kernel/debug/gpio-mockup/gpiochip1 + gpiochip=$(find "$DEBUGFS/$module/" -name gpiochip* -type d | sort) + for chip in $gpiochip; do + gc=${chip##*/} + [ "${1:-}" ] || fail "unexpected chip - $gc" + width=$1 ; shift + test_line $gc 0 + if [ "$random" -a $width -gt 2 ]; then + test_line $gc $((RANDOM % ($width - 2) + 1)) + fi + test_line $gc $(($width - 1)) + test_no_line $gc $width + done + [ "${1:-}" ] && fail "missing expected chip of width $1" + remove_module || fail "failed to remove module with error $?" +} + +while getopts ":frvt:" opt; do + case $opt in + f) full_test=true - shift - ;; - -h) - usage - exit ;; - -m) - module=$2 - shift 2 + r) + random=true ;; - -t) - dev_type=$2 - shift 2 + t) + dev_type=$OPTARG ;; - --) - shift - break + v) + verbose=true ;; *) - echo "Internal error!" - exit 1 + usage ;; esac done +shift $((OPTIND - 1)) -if [ X"$module" = X ]; then - module="gpio-mockup" -fi - -if [ X$dev_type != Xsysfs ]; then - dev_type="chardev" -fi +[ "${1:-}" ] && fail "unknown argument '$1'" prerequisite -echo "1. Test dynamic allocation of gpio successful means insert gpiochip and" -echo " manipulate gpio pin successful" -gpio_test "-1,32" true -gpio_test "-1,32,-1,32" true -gpio_test "-1,32,-1,32,-1,32" true -if [ X$full_test = Xtrue ]; then - gpio_test "-1,32,32,64" true - gpio_test "-1,32,40,64,-1,5" true - gpio_test "-1,32,32,64,-1,32" true - gpio_test "0,32,32,64,-1,32,-1,32" true - gpio_test "-1,32,-1,32,0,32,32,64" true - echo "2. Do basic test: successful means insert gpiochip and" - echo " manipulate gpio pin successful" - gpio_test "0,32" true - gpio_test "0,32,32,64" true - gpio_test "0,32,40,64,64,96" true +trap 'exit $ksft_fail' SIGTERM SIGINT +trap cleanup EXIT + +case "$dev_type" in +sysfs) + source $BASE/gpio-mockup-sysfs.sh + echo "WARNING: gpio sysfs ABI is deprecated." + ;; +cdev_v1) + echo "WARNING: gpio cdev ABI v1 is deprecated." + uapi_opt="-u1 " + ;; +cdev) + ;; +*) + fail "unknown interface type: $dev_type" + ;; +esac + +remove_module || fail "can't remove existing $module module" + +# manual gpio allocation tests fail if a physical chip already exists +[ "$full_test" -a -e "/dev/gpiochip0" ] && skip "full tests conflict with gpiochip0" + +echo "1. Module load tests" +echo "1.1. dynamic allocation of gpio" +insmod_test "-1,32" 32 +insmod_test "-1,23,-1,32" 23 32 +insmod_test "-1,23,-1,26,-1,32" 23 26 32 +if [ "$full_test" ]; then + echo "1.2. manual allocation of gpio" + insmod_test "0,32" 32 + insmod_test "0,32,32,60" 32 28 + insmod_test "0,32,40,64,64,96" 32 24 32 + echo "1.3. dynamic and manual allocation of gpio" + insmod_test "-1,32,32,62" 32 30 + insmod_test "-1,22,-1,23,0,24,32,64" 22 23 24 32 + insmod_test "-1,32,32,60,-1,29" 32 28 29 + insmod_test "-1,32,40,64,-1,5" 32 24 5 + insmod_test "0,32,32,44,-1,22,-1,31" 32 12 22 31 fi -echo "3. Error test: successful means insert gpiochip failed" -echo "3.1 Test number of gpio overflow" -#Currently: The max number of gpio(1024) is defined in arm architecture. -gpio_test "-1,32,-1,1024" false -if [ X$full_test = Xtrue ]; then - echo "3.2 Test zero line of gpio" - gpio_test "0,0" false - echo "3.3 Test range overlap" - echo "3.3.1 Test corner case" - gpio_test "0,32,0,1" false - gpio_test "0,32,32,64,32,40" false - gpio_test "0,32,35,64,35,45" false - gpio_test "0,32,31,32" false - gpio_test "0,32,32,64,36,37" false - gpio_test "0,32,35,64,34,36" false - echo "3.3.2 Test inserting invalid second gpiochip" - gpio_test "0,32,30,35" false - gpio_test "0,32,1,5" false - gpio_test "10,32,9,14" false - gpio_test "10,32,30,35" false - echo "3.3.3 Test others" - gpio_test "0,32,40,56,39,45" false - gpio_test "0,32,40,56,30,33" false - gpio_test "0,32,40,56,30,41" false - gpio_test "0,32,40,56,20,21" false +echo "2. Module load error tests" +echo "2.1 gpio overflow" +# Currently: The max number of gpio(1024) is defined in arm architecture. +insmod_test "-1,1024" +if [ "$full_test" ]; then + echo "2.2 no lines defined" + insmod_test "0,0" + echo "2.3 ignore range overlap" + insmod_test "0,32,0,1" 32 + insmod_test "0,32,1,5" 32 + insmod_test "0,32,30,35" 32 + insmod_test "0,32,31,32" 32 + insmod_test "10,32,30,35" 22 + insmod_test "10,32,9,14" 22 + insmod_test "0,32,20,21,40,56" 32 16 + insmod_test "0,32,32,64,32,40" 32 32 + insmod_test "0,32,32,64,36,37" 32 32 + insmod_test "0,32,35,64,34,36" 32 29 + insmod_test "0,30,35,64,35,45" 30 29 + insmod_test "0,32,40,56,30,33" 32 16 + insmod_test "0,32,40,56,30,41" 32 16 + insmod_test "0,32,40,56,39,45" 32 16 fi -echo GPIO test PASS - +echo "GPIO $module test PASS" diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index 5ec4d9e18806..656c43c24044 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -69,7 +69,7 @@ int restore_queue(struct msgque_data *msgque) printf("msgsnd failed (%m)\n"); ret = -errno; goto destroy; - }; + } } return 0; @@ -180,7 +180,7 @@ int fill_msgque(struct msgque_data *msgque) IPC_NOWAIT) != 0) { printf("First message send failed (%m)\n"); return -errno; - }; + } msgbuf.mtype = ANOTHER_MSG_TYPE; memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING)); @@ -188,7 +188,7 @@ int fill_msgque(struct msgque_data *msgque) IPC_NOWAIT) != 0) { printf("Second message send failed (%m)\n"); return -errno; - }; + } return 0; } diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh index bbc04646346b..00e60d6eb16b 100755 --- a/tools/testing/selftests/kselftest_deps.sh +++ b/tools/testing/selftests/kselftest_deps.sh @@ -129,13 +129,11 @@ l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \ grep -v "VAR_LDLIBS" | awk -F: '{print $1}') # Level 3 -# gpio, memfd and others use pkg-config to find mount and fuse libs +# memfd and others use pkg-config to find mount and fuse libs # respectively and save it in VAR_LDLIBS. If pkg-config doesn't find # any, VAR_LDLIBS set to default. # Use the default value and filter out pkg-config for dependency check. # e.g: -# gpio/Makefile -# VAR_LDLIBS := $(shell pkg-config --libs mount) 2>/dev/null) # memfd/Makefile # VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index edce85420d19..ae0f0f33b2a6 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -79,7 +79,7 @@ #endif /** - * TH_LOG(fmt, ...) + * TH_LOG() * * @fmt: format string * @...: optional arguments @@ -113,12 +113,16 @@ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) /** - * SKIP(statement, fmt, ...) + * SKIP() * * @statement: statement to run after reporting SKIP * @fmt: format string * @...: optional arguments * + * .. code-block:: c + * + * SKIP(statement, fmt, ...); + * * This forces a "pass" after reporting why something is being skipped * and runs "statement", which is usually "return" or "goto skip". */ @@ -136,7 +140,7 @@ } while (0) /** - * TEST(test_name) - Defines the test function and creates the registration + * TEST() - Defines the test function and creates the registration * stub * * @test_name: test name @@ -155,7 +159,7 @@ #define TEST(test_name) __TEST_IMPL(test_name, -1) /** - * TEST_SIGNAL(test_name, signal) + * TEST_SIGNAL() * * @test_name: test name * @signal: signal number @@ -195,7 +199,7 @@ struct __test_metadata __attribute__((unused)) *_metadata) /** - * FIXTURE_DATA(datatype_name) - Wraps the struct name so we have one less + * FIXTURE_DATA() - Wraps the struct name so we have one less * argument to pass around * * @datatype_name: datatype name @@ -212,7 +216,7 @@ #define FIXTURE_DATA(datatype_name) struct _test_data_##datatype_name /** - * FIXTURE(fixture_name) - Called once per fixture to setup the data and + * FIXTURE() - Called once per fixture to setup the data and * register * * @fixture_name: fixture name @@ -239,7 +243,7 @@ FIXTURE_DATA(fixture_name) /** - * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture. + * FIXTURE_SETUP() - Prepares the setup function for the fixture. * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name @@ -265,7 +269,7 @@ __attribute__((unused)) *variant) /** - * FIXTURE_TEARDOWN(fixture_name) + * FIXTURE_TEARDOWN() * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name @@ -286,7 +290,7 @@ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) /** - * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture + * FIXTURE_VARIANT() - Optionally called once per fixture * to declare fixture variant * * @fixture_name: fixture name @@ -305,7 +309,7 @@ #define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name /** - * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture + * FIXTURE_VARIANT_ADD() - Called once per fixture * variant to setup and register the data * * @fixture_name: fixture name @@ -339,7 +343,7 @@ _##fixture_name##_##variant_name##_variant = /** - * TEST_F(fixture_name, test_name) - Emits test registration and helpers for + * TEST_F() - Emits test registration and helpers for * fixture-based test cases * * @fixture_name: fixture name diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h index e8eafaf0941a..e2ea41de3f35 100644 --- a/tools/testing/selftests/kselftest_module.h +++ b/tools/testing/selftests/kselftest_module.h @@ -11,7 +11,8 @@ #define KSTM_MODULE_GLOBALS() \ static unsigned int total_tests __initdata; \ -static unsigned int failed_tests __initdata +static unsigned int failed_tests __initdata; \ +static unsigned int skipped_tests __initdata #define KSTM_CHECK_ZERO(x) do { \ total_tests++; \ @@ -21,11 +22,16 @@ static unsigned int failed_tests __initdata } \ } while (0) -static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests) +static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests, + unsigned int skipped_tests) { - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else + if (failed_tests == 0) { + if (skipped_tests) { + pr_info("skipped %u tests\n", skipped_tests); + pr_info("remaining %u tests passed\n", total_tests); + } else + pr_info("all %u tests passed\n", total_tests); + } else pr_warn("failed %u out of %u tests\n", failed_tests, total_tests); return failed_tests ? -EINVAL : 0; @@ -36,7 +42,7 @@ static int __init __module##_init(void) \ { \ pr_info("loaded.\n"); \ selftest(); \ - return kstm_report(total_tests, failed_tests); \ + return kstm_report(total_tests, failed_tests, skipped_tests); \ } \ static void __exit __module##_exit(void) \ { \ diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index ce8f4ad39684..3a84394829ea 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -7,6 +7,7 @@ /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test +/x86_64/get_cpuid_test /x86_64/kvm_pv_test /x86_64/hyperv_cpuid /x86_64/mmio_warning_test @@ -24,10 +25,15 @@ /x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test +/x86_64/xapic_ipi_test +/x86_64/xen_shinfo_test +/x86_64/xen_vmcall_test /x86_64/xss_msr_test +/x86_64/vmx_pmu_msrs_test /demand_paging_test /dirty_log_test /dirty_log_perf_test /kvm_create_max_vcpus +/memslot_modification_stress_test /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c7ca4faba272..8c8eda429576 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,13 +33,14 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test +TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test @@ -56,13 +57,18 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test +TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test +TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus +TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 3d96a7bfaff3..5f7a229c3af1 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -7,23 +7,20 @@ * Copyright (C) 2019, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ +#define _GNU_SOURCE /* for pipe2 */ #include <stdio.h> #include <stdlib.h> -#include <sys/syscall.h> -#include <unistd.h> -#include <asm/unistd.h> #include <time.h> #include <poll.h> #include <pthread.h> -#include <linux/bitmap.h> -#include <linux/bitops.h> #include <linux/userfaultfd.h> +#include <sys/syscall.h> -#include "perf_test_util.h" -#include "processor.h" +#include "kvm_util.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" #ifdef __NR_userfaultfd @@ -39,12 +36,14 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static char *guest_data_prototype; static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -65,7 +64,7 @@ static void *vcpu_worker(void *data) exit_reason_str(run->exit_reason)); } - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); @@ -96,7 +95,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) return r; } - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, timespec_to_ns(ts_diff)); @@ -191,7 +190,7 @@ static void *uffd_handler_thread_fn(void *arg) pages++; } - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", pages, ts_diff.tv_sec, ts_diff.tv_nsec, pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); @@ -248,9 +247,15 @@ static int setup_demand_paging(struct kvm_vm *vm, return 0; } -static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay) +struct test_params { + bool use_uffd; + useconds_t uffd_delay; + bool partition_vcpu_memory_access; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; @@ -261,7 +266,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, int vcpu_id; int r; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, + VM_MEM_SRC_ANONYMOUS); perf_test_args.wr_fract = 1; @@ -273,9 +279,10 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, + p->partition_vcpu_memory_access); - if (use_uffd) { + if (p->use_uffd) { uffd_handler_threads = malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); @@ -289,10 +296,19 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vm_paddr_t vcpu_gpa; void *vcpu_hva; + uint64_t vcpu_mem_size; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size); + + if (p->partition_vcpu_memory_access) { + vcpu_gpa = guest_test_phys_mem + + (vcpu_id * guest_percpu_mem_size); + vcpu_mem_size = guest_percpu_mem_size; + } else { + vcpu_gpa = guest_test_phys_mem; + vcpu_mem_size = guest_percpu_mem_size * nr_vcpus; + } PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size); + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); /* Cache the HVA pointer of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); @@ -308,8 +324,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, r = setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], - uffd_delay, &uffd_args[vcpu_id], - vcpu_hva, guest_percpu_mem_size); + p->uffd_delay, &uffd_args[vcpu_id], + vcpu_hva, vcpu_mem_size); if (r < 0) exit(-r); } @@ -335,11 +351,11 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); pr_info("All vCPU threads joined\n"); - if (use_uffd) { + if (p->use_uffd) { char c; /* Tell the user fault fd handler threads to quit */ @@ -357,43 +373,23 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, perf_test_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); free(guest_data_prototype); free(vcpu_threads); - if (use_uffd) { + if (p->use_uffd) { free(uffd_handler_threads); free(uffd_args); free(pipefds); } } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" - " [-b memory] [-v vcpus]\n", name); - printf(" -m: specify the guest mode ID to test\n" - " (default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + " [-b memory] [-v vcpus] [-o]\n", name); + guest_modes_help(); printf(" -u: use User Fault FD to handle vCPU page\n" " faults.\n"); printf(" -d: add a delay in usec to the User Fault\n" @@ -403,6 +399,8 @@ static void help(char *name) " demand paged by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); printf(" -v: specify the number of vCPUs to run.\n"); + printf(" -o: Overlap guest memory accesses instead of partitioning\n" + " them into a separate region of memory for each vCPU.\n"); puts(""); exit(0); } @@ -410,53 +408,24 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - bool mode_selected = false; - unsigned int mode; - int opt, i; - bool use_uffd = false; - useconds_t uffd_delay = 0; - -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + struct test_params p = { + .partition_vcpu_memory_access = true, + }; + int opt; - while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) { + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) { switch (opt) { case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'u': - use_uffd = true; + p.use_uffd = true; break; case 'd': - uffd_delay = strtoul(optarg, NULL, 0); - TEST_ASSERT(uffd_delay >= 0, - "A negative UFFD delay is not supported."); + p.uffd_delay = strtoul(optarg, NULL, 0); + TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); break; case 'b': guest_percpu_mem_size = parse_size(optarg); @@ -466,6 +435,9 @@ int main(int argc, char *argv[]) TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; + case 'o': + p.partition_vcpu_memory_access = false; + break; case 'h': default: help(argv[0]); @@ -473,14 +445,7 @@ int main(int argc, char *argv[]) } } - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 9c6a7be31e03..04a2641261be 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -8,29 +8,28 @@ * Copyright (C) 2020, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> -#include <unistd.h> #include <time.h> #include <pthread.h> #include <linux/bitmap.h> -#include <linux/bitops.h> #include "kvm_util.h" -#include "perf_test_util.h" -#include "processor.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + /* Host variables */ static u64 dirty_log_manual_caps; static bool host_quit; -static uint64_t iteration; -static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; +static int iteration; +static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; static void *vcpu_worker(void *data) { @@ -42,18 +41,18 @@ static void *vcpu_worker(void *data) struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); while (!READ_ONCE(host_quit)) { - uint64_t current_iteration = READ_ONCE(iteration); + int current_iteration = READ_ONCE(iteration); clock_gettime(CLOCK_MONOTONIC, &start); ret = _vcpu_run(vm, vcpu_id); - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, @@ -62,17 +61,17 @@ static void *vcpu_worker(void *data) pr_debug("Got sync event from vCPU %d\n", vcpu_id); vcpu_last_completed_iteration[vcpu_id] = current_iteration; - pr_debug("vCPU %d updated last completed iteration to %lu\n", + pr_debug("vCPU %d updated last completed iteration to %d\n", vcpu_id, vcpu_last_completed_iteration[vcpu_id]); if (current_iteration) { pages_count += vcpu_args->pages; total = timespec_add(total, ts_diff); - pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n", + pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n", vcpu_id, current_iteration, ts_diff.tv_sec, ts_diff.tv_nsec); } else { - pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n", + pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n", vcpu_id, current_iteration, ts_diff.tv_sec, ts_diff.tv_nsec); } @@ -82,16 +81,24 @@ static void *vcpu_worker(void *data) } avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); - pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); return NULL; } -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - uint64_t phys_offset, int wr_fract) +struct test_params { + unsigned long iterations; + uint64_t phys_offset; + int wr_fract; + bool partition_vcpu_memory_access; + enum vm_mem_backing_src_type backing_src; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; struct kvm_vm *vm; unsigned long *bmap; @@ -106,9 +113,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_enable_cap cap = {}; struct timespec clear_dirty_log_total = (struct timespec){0}; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, + p->backing_src); - perf_test_args.wr_fract = wr_fract; + perf_test_args.wr_fract = p->wr_fract; guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -124,7 +132,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, + p->partition_vcpu_memory_access); sync_global_to_guest(vm, perf_test_args); @@ -134,29 +143,33 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, clock_gettime(CLOCK_MONOTONIC, &start); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + vcpu_last_completed_iteration[vcpu_id] = -1; + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, &perf_test_args.vcpu_args[vcpu_id]); } - /* Allow the vCPU to populate memory */ - pr_debug("Starting iteration %lu - Populating\n", iteration); - while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) - pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n", - iteration); + /* Allow the vCPUs to populate memory */ + pr_debug("Starting iteration %d - Populating\n", iteration); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != + iteration) + ; + } - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); pr_info("Populate memory time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); /* Enable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, KVM_MEM_LOG_DIRTY_PAGES); - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); - while (iteration < iterations) { + while (iteration < p->iterations) { /* * Incrementing the iteration number will start the vCPUs * dirtying memory again. @@ -164,100 +177,80 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, clock_gettime(CLOCK_MONOTONIC, &start); iteration++; - pr_debug("Starting iteration %lu\n", iteration); + pr_debug("Starting iteration %d\n", iteration); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) - pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n", - vcpu_id, iteration); + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) + != iteration) + ; } - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff); - pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n", + pr_info("Iteration %d dirty memory time: %ld.%.9lds\n", iteration, ts_diff.tv_sec, ts_diff.tv_nsec); clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap); - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); get_dirty_log_total = timespec_add(get_dirty_log_total, ts_diff); - pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", + pr_info("Iteration %d get dirty log time: %ld.%.9lds\n", iteration, ts_diff.tv_sec, ts_diff.tv_nsec); if (dirty_log_manual_caps) { clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0, host_num_pages); - ts_diff = timespec_diff_now(start); + ts_diff = timespec_elapsed(start); clear_dirty_log_total = timespec_add(clear_dirty_log_total, ts_diff); - pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", + pr_info("Iteration %d clear dirty log time: %ld.%.9lds\n", iteration, ts_diff.tv_sec, ts_diff.tv_nsec); } } - /* Tell the vcpu thread to quit */ - host_quit = true; - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - pthread_join(vcpu_threads[vcpu_id], NULL); - /* Disable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); - ts_diff = timespec_diff_now(start); + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0); + ts_diff = timespec_elapsed(start); pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - avg = timespec_div(get_dirty_log_total, iterations); + /* Tell the vcpu thread to quit */ + host_quit = true; + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, get_dirty_log_total.tv_sec, + p->iterations, get_dirty_log_total.tv_sec, get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); if (dirty_log_manual_caps) { - avg = timespec_div(clear_dirty_log_total, iterations); + avg = timespec_div(clear_dirty_log_total, p->iterations); pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, clear_dirty_log_total.tv_sec, + p->iterations, clear_dirty_log_total.tv_sec, clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); } free(bmap); free(vcpu_threads); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-p offset] " - "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); + "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]\n", name); puts(""); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); @@ -266,80 +259,61 @@ static void help(char *name) " 1/<fraction of pages to write>.\n" " (default: 1 i.e. all pages are written to.)\n"); printf(" -v: specify the number of vCPUs to run.\n"); + printf(" -o: Overlap guest memory accesses instead of partitioning\n" + " them into a separate region of memory for each vCPU.\n"); + printf(" -s: specify the type of memory that should be used to\n" + " back the guest data region.\n\n"); + backing_src_help(); puts(""); exit(0); } int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i; - int wr_fract = 1; + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .wr_fract = 1, + .partition_vcpu_memory_access = true, + .backing_src = VM_MEM_SRC_ANONYMOUS, + }; + int opt; dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { + while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:")) != -1) { switch (opt) { case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = atoi(optarg); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; case 'f': - wr_fract = atoi(optarg); - TEST_ASSERT(wr_fract >= 1, + p.wr_fract = atoi(optarg); + TEST_ASSERT(p.wr_fract >= 1, "Write fraction cannot be less than one"); break; case 'v': nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(nr_vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); + break; + case 'o': + p.partition_vcpu_memory_access = false; + case 's': + p.backing_src = parse_backing_src_type(optarg); break; case 'h': default: @@ -348,18 +322,11 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); + TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); - pr_info("Test iterations: %"PRIu64"\n", iterations); + pr_info("Test iterations: %"PRIu64"\n", p.iterations); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, iterations, phys_offset, wr_fract); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 471baecb7772..bb2752d78fe3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -9,8 +9,6 @@ #include <stdio.h> #include <stdlib.h> -#include <unistd.h> -#include <time.h> #include <pthread.h> #include <semaphore.h> #include <sys/types.h> @@ -20,8 +18,9 @@ #include <linux/bitops.h> #include <asm/barrier.h> -#include "test_util.h" #include "kvm_util.h" +#include "test_util.h" +#include "guest_modes.h" #include "processor.h" #define VCPU_ID 1 @@ -673,9 +672,15 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - unsigned long interval, uint64_t phys_offset) +struct test_params { + unsigned long iterations; + unsigned long interval; + uint64_t phys_offset; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; struct kvm_vm *vm; unsigned long *bmap; @@ -709,12 +714,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, host_page_size = getpagesize(); host_num_pages = vm_num_host_pages(mode, guest_num_pages); - if (!phys_offset) { + if (!p->phys_offset) { guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { - guest_test_phys_mem = phys_offset; + guest_test_phys_mem = p->phys_offset; } #ifdef __s390x__ @@ -758,9 +763,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); - while (iteration < iterations) { + while (iteration < p->iterations) { /* Give the vcpu thread some time to dirty some pages */ - usleep(interval * 1000); + usleep(p->interval * 1000); log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); @@ -783,20 +788,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); @@ -813,51 +806,23 @@ static void help(char *name) printf(" -M: specify the host logging mode " "(default: run all log modes). Supported modes: \n\t"); log_modes_dump(); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); puts(""); exit(0); } int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - unsigned long interval = TEST_HOST_LOOP_INTERVAL; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i, j; + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .interval = TEST_HOST_LOOP_INTERVAL, + }; + int opt, i; sem_init(&dirty_ring_vcpu_stop, 0, 0); sem_init(&dirty_ring_vcpu_cont, 0, 0); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { switch (opt) { @@ -865,24 +830,16 @@ int main(int argc, char *argv[]) test_dirty_ring_count = strtol(optarg, NULL, 10); break; case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = strtol(optarg, NULL, 10); break; case 'I': - interval = strtol(optarg, NULL, 10); + p.interval = strtol(optarg, NULL, 10); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'M': if (!strcmp(optarg, "all")) { @@ -911,32 +868,24 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); - TEST_ASSERT(interval > 0, "Interval must be greater than zero"); + TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); + p.iterations, p.interval); srandom(time(0)); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - if (host_log_mode_option == LOG_MODE_ALL) { - /* Run each log mode */ - for (j = 0; j < LOG_MODE_NUM; j++) { - pr_info("Testing Log Mode '%s'\n", - log_modes[j].name); - host_log_mode = j; - run_test(i, iterations, interval, phys_offset); - } - } else { - host_log_mode = host_log_mode_option; - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (i = 0; i < LOG_MODE_NUM; i++) { + pr_info("Testing Log Mode '%s'\n", log_modes[i].name); + host_log_mode = i; + for_each_guest_mode(run_test, &p); } + } else { + host_log_mode = host_log_mode_option; + for_each_guest_mode(run_test, &p); } return 0; diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h new file mode 100644 index 000000000000..b691df33e64e --- /dev/null +++ b/tools/testing/selftests/kvm/include/guest_modes.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "kvm_util.h" + +struct guest_mode { + bool supported; + bool enabled; +}; + +extern struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_append(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +void guest_modes_append_default(void); +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg); +void guest_modes_help(void); +void guest_modes_cmdline(const char *arg); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index dfa9d369e8fc..2d7eb6989e83 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -16,6 +16,7 @@ #include "sparsebit.h" +#define KVM_MAX_VCPUS 512 /* * Callers of kvm_util only have an incomplete/opaque description of the @@ -70,11 +71,13 @@ enum vm_guest_mode { #define vm_guest_mode_string(m) vm_guest_mode_string[m] extern const char * const vm_guest_mode_string[]; -enum vm_mem_backing_src_type { - VM_MEM_SRC_ANONYMOUS, - VM_MEM_SRC_ANONYMOUS_THP, - VM_MEM_SRC_ANONYMOUS_HUGETLB, +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; }; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h new file mode 100644 index 000000000000..b020547403fd --- /dev/null +++ b/tools/testing/selftests/kvm/include/numaif.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * tools/testing/selftests/kvm/include/numaif.h + * + * Copyright (C) 2020, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Header file that provides access to NUMA API functions not explicitly + * exported to user space. + */ + +#ifndef SELFTEST_KVM_NUMAIF_H +#define SELFTEST_KVM_NUMAIF_H + +#define __NR_get_mempolicy 239 +#define __NR_migrate_pages 256 + +/* System calls */ +long get_mempolicy(int *policy, const unsigned long *nmask, + unsigned long maxnode, void *addr, int flags) +{ + return syscall(__NR_get_mempolicy, policy, nmask, + maxnode, addr, flags); +} + +long migrate_pages(int pid, unsigned long maxnode, + const unsigned long *frommask, + const unsigned long *tomask) +{ + return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask); +} + +/* Policies */ +#define MPOL_DEFAULT 0 +#define MPOL_PREFERRED 1 +#define MPOL_BIND 2 +#define MPOL_INTERLEAVE 3 + +#define MPOL_MAX MPOL_INTERLEAVE + +/* Flags for get_mem_policy */ +#define MPOL_F_NODE (1<<0) /* return next il node or node of address */ + /* Warning: MPOL_F_NODE is unsupported and + * subject to change. Don't use. + */ +#define MPOL_F_ADDR (1<<1) /* look up vma using address */ +#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */ + +/* Flags for mbind */ +#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ +#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ +#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ + +#endif /* SELFTEST_KVM_NUMAIF_H */ diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 239421e4f6b8..005f2143adeb 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -9,38 +9,15 @@ #define SELFTEST_KVM_PERF_TEST_UTIL_H #include "kvm_util.h" -#include "processor.h" - -#define MAX_VCPUS 512 - -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - -#define TEST_MEM_SLOT_INDEX 1 /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 #define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; - -/* Number of VCPUs for the test */ -static int nr_vcpus = 1; +#define PERF_TEST_MEM_SLOT_INDEX 1 -struct vcpu_args { +struct perf_test_vcpu_args { uint64_t gva; uint64_t pages; @@ -54,141 +31,24 @@ struct perf_test_args { uint64_t guest_page_size; int wr_fract; - struct vcpu_args vcpu_args[MAX_VCPUS]; + struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; -static struct perf_test_args perf_test_args; +extern struct perf_test_args perf_test_args; /* - * Continuously write to the first 8 bytes of each page in the - * specified region. + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. */ -static void guest_code(uint32_t vcpu_id) -{ - struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); - - gva = vcpu_args->gva; - pages = vcpu_args->pages; - - while (true) { - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); - - if (i % perf_test_args.wr_fract == 0) - *(uint64_t *)addr = 0x0123456789ABCDEF; - else - READ_ONCE(*(uint64_t *)addr); - } - - GUEST_SYNC(1); - } -} - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; - uint64_t guest_num_pages; - - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - vm = vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - - perf_test_args.vm = vm; - perf_test_args.guest_page_size = vm_get_page_size(vm); - perf_test_args.host_page_size = getpagesize(); - - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - guest_num_pages = (vcpus * vcpu_memory_bytes) / - perf_test_args.guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, - "Guest memory size is not host page size aligned."); - - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - perf_test_args.guest_page_size; - guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); - -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - return vm; -} - -static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) -{ - vm_paddr_t vcpu_gpa; - struct vcpu_args *vcpu_args; - int vcpu_id; - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - - vm_vcpu_add_default(vm, vcpu_id, guest_code); - - vcpu_args->vcpu_id = vcpu_id; - vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args->pages = vcpu_memory_bytes / - perf_test_args.guest_page_size; - - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); - } -} +extern uint64_t guest_test_phys_mem; + +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes, + enum vm_mem_backing_src_type backing_src); +void perf_test_destroy_vm(struct kvm_vm *vm); +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, + uint64_t vcpu_memory_bytes, + bool partition_vcpu_memory_access); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index ffffa560436b..b7f41399f22c 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -64,7 +64,21 @@ int64_t timespec_to_ns(struct timespec ts); struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); -struct timespec timespec_diff_now(struct timespec start); +struct timespec timespec_elapsed(struct timespec start); struct timespec timespec_div(struct timespec ts, int divisor); +enum vm_mem_backing_src_type { + VM_MEM_SRC_ANONYMOUS, + VM_MEM_SRC_ANONYMOUS_THP, + VM_MEM_SRC_ANONYMOUS_HUGETLB, +}; + +struct vm_mem_backing_src_alias { + const char *name; + enum vm_mem_backing_src_type type; +}; + +void backing_src_help(void); +enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 90cd5984751b..0b30b4e15c38 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -263,6 +263,19 @@ static inline void outl(uint16_t port, uint32_t value) __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); } +static inline void cpuid(uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx) + : "memory"); +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) @@ -338,8 +351,10 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); struct kvm_msr_list *kvm_get_msr_index_list(void); - +uint64_t kvm_get_feature_msr(uint64_t msr_index); struct kvm_cpuid2 *kvm_get_supported_cpuid(void); + +struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); @@ -391,6 +406,10 @@ bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3); +struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); +void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); +struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); + /* * Basic CPU control in CR0 */ @@ -406,8 +425,27 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, #define X86_CR0_CD (1UL<<30) /* Cache Disable */ #define X86_CR0_PG (1UL<<31) /* Paging */ +#define APIC_DEFAULT_GPA 0xfee00000ULL + +/* APIC base address MSR and fields */ +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_EXTD (1<<10) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) +#define GET_APIC_BASE(x) (((x) >> 12) << 12) + #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) +#define APIC_ID 0x20 +#define APIC_LVR 0x30 +#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF) +#define APIC_TASKPRI 0x80 +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_SPIV 0xF0 +#define APIC_SPIV_FOCUS_DISABLED (1 << 9) +#define APIC_SPIV_APIC_ENABLED (1 << 8) #define APIC_ICR 0x300 #define APIC_DEST_SELF 0x40000 #define APIC_DEST_ALLINC 0x80000 @@ -432,6 +470,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, #define APIC_DM_EXTINT 0x00700 #define APIC_VECTOR_MASK 0x000FF #define APIC_ICR2 0x310 +#define SET_APIC_DEST_FIELD(x) ((x) << 24) /* VMX_EPT_VPID_CAP bits */ #define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c new file mode 100644 index 000000000000..25bff307c71f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "guest_modes.h" + +struct guest_mode guest_modes[NUM_VM_MODES]; + +void guest_modes_append_default(void) +{ + guest_mode_append(VM_MODE_DEFAULT, true, true); + +#ifdef __aarch64__ + guest_mode_append(VM_MODE_P40V48_64K, true, true); + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + if (limit >= 52) + guest_mode_append(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_append(VM_MODE_P48V48_4K, true, true); + guest_mode_append(VM_MODE_P48V48_64K, true, true); + } + } +#endif +} + +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) +{ + int i; + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + func(i, arg); + } +} + +void guest_modes_help(void) +{ + int i; + + printf(" -m: specify the guest mode ID to test\n" + " (default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } +} + +void guest_modes_cmdline(const char *arg) +{ + static bool mode_selected; + unsigned int mode; + int i; + + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 88ef7067f1e6..d787cb802b4a 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -153,14 +153,7 @@ const char * const vm_guest_mode_string[] = { _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); -struct vm_guest_mode_params { - unsigned int pa_bits; - unsigned int va_bits; - unsigned int page_size; - unsigned int page_shift; -}; - -static const struct vm_guest_mode_params vm_guest_mode_params[] = { +const struct vm_guest_mode_params vm_guest_mode_params[] = { { 52, 48, 0x1000, 12 }, { 52, 48, 0x10000, 16 }, { 48, 48, 0x1000, 12 }, @@ -1808,6 +1801,7 @@ static struct exit_reason { {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, {KVM_EXIT_X86_RDMSR, "RDMSR"}, {KVM_EXIT_X86_WRMSR, "WRMSR"}, + {KVM_EXIT_XEN, "XEN"}, #ifdef KVM_EXIT_MEMORY_NOT_PRESENT {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, #endif diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c new file mode 100644 index 000000000000..81490b9b4e32 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + */ + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" + +struct perf_test_args perf_test_args; + +uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } + + GUEST_SYNC(1); + } +} + +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes, + enum vm_mem_backing_src_type backing_src) +{ + struct kvm_vm *vm; + uint64_t guest_num_pages; + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + + guest_num_pages = vm_adjust_num_guest_pages(mode, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + vm = vm_create_with_vcpus(mode, vcpus, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); + + perf_test_args.vm = vm; + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, backing_src, guest_test_phys_mem, + PERF_TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +void perf_test_destroy_vm(struct kvm_vm *vm) +{ + ucall_uninit(vm); + kvm_vm_free(vm); +} + +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, + uint64_t vcpu_memory_bytes, + bool partition_vcpu_memory_access) +{ + vm_paddr_t vcpu_gpa; + struct perf_test_vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vcpu_args->vcpu_id = vcpu_id; + if (partition_vcpu_memory_access) { + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + vcpu_gpa = guest_test_phys_mem + + (vcpu_id * vcpu_memory_bytes); + } else { + vcpu_args->gva = guest_test_virt_mem; + vcpu_args->pages = (vcpus * vcpu_memory_bytes) / + perf_test_args.guest_page_size; + vcpu_gpa = guest_test_phys_mem; + } + + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + + (vcpu_args->pages * perf_test_args.guest_page_size)); + } +} diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 8e04c0b1608e..906c955384e2 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -10,6 +10,7 @@ #include <limits.h> #include <stdlib.h> #include <time.h> +#include "linux/kernel.h" #include "test_util.h" @@ -84,7 +85,7 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) return timespec_add_ns((struct timespec){0}, ns1 - ns2); } -struct timespec timespec_diff_now(struct timespec start) +struct timespec timespec_elapsed(struct timespec start) { struct timespec end; @@ -109,3 +110,31 @@ void print_skip(const char *fmt, ...) va_end(ap); puts(", skipping test"); } + +const struct vm_mem_backing_src_alias backing_src_aliases[] = { + {"anonymous", VM_MEM_SRC_ANONYMOUS,}, + {"anonymous_thp", VM_MEM_SRC_ANONYMOUS_THP,}, + {"anonymous_hugetlb", VM_MEM_SRC_ANONYMOUS_HUGETLB,}, +}; + +void backing_src_help(void) +{ + int i; + + printf("Available backing src types:\n"); + for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++) + printf("\t%s\n", backing_src_aliases[i].name); +} + +enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++) + if (!strcmp(type_name, backing_src_aliases[i].name)) + return backing_src_aliases[i].type; + + backing_src_help(); + TEST_FAIL("Unknown backing src type: %s", type_name); + return -1; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 95e1a757c629..de0c76177d02 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -670,6 +670,82 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void) } /* + * KVM Get MSR + * + * Input Args: + * msr_index - Index of MSR + * + * Output Args: None + * + * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. + * + * Get value of MSR for VCPU. + */ +uint64_t kvm_get_feature_msr(uint64_t msr_index) +{ + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r, kvm_fd; + + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" + " rc: %i errno: %i", r, errno); + + close(kvm_fd); + return buffer.entry.data; +} + +/* + * VM VCPU CPUID Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU id + * + * Output Args: None + * + * Return: KVM CPUID (KVM_GET_CPUID2) + * + * Set the VCPU's CPUID. + */ +struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct kvm_cpuid2 *cpuid; + int rc, max_ent; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + cpuid = allocate_kvm_cpuid2(); + max_ent = cpuid->nent; + + for (cpuid->nent = 1; cpuid->nent <= max_ent; cpuid->nent++) { + rc = ioctl(vcpu->fd, KVM_GET_CPUID2, cpuid); + if (!rc) + break; + + TEST_ASSERT(rc == -1 && errno == E2BIG, + "KVM_GET_CPUID2 should either succeed or give E2BIG: %d %d", + rc, errno); + } + + TEST_ASSERT(rc == 0, "KVM_GET_CPUID2 failed, rc: %i errno: %i", + rc, errno); + + return cpuid; +} + + + +/* * Locate a cpuid entry. * * Input Args: @@ -1224,3 +1300,71 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, : "b"(a0), "c"(a1), "d"(a2), "S"(a3)); return r; } + +struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int ret; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n", + ret, errno); + + close(kvm_fd); + return cpuid; +} + +void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid) +{ + static struct kvm_cpuid2 *cpuid_full; + struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; + int i, nent = 0; + + if (!cpuid_full) { + cpuid_sys = kvm_get_supported_cpuid(); + cpuid_hv = kvm_get_supported_hv_cpuid(); + + cpuid_full = malloc(sizeof(*cpuid_full) + + (cpuid_sys->nent + cpuid_hv->nent) * + sizeof(struct kvm_cpuid_entry2)); + if (!cpuid_full) { + perror("malloc"); + abort(); + } + + /* Need to skip KVM CPUID leaves 0x400000xx */ + for (i = 0; i < cpuid_sys->nent; i++) { + if (cpuid_sys->entries[i].function >= 0x40000000 && + cpuid_sys->entries[i].function < 0x40000100) + continue; + cpuid_full->entries[nent] = cpuid_sys->entries[i]; + nent++; + } + + memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, + cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); + cpuid_full->nent = nent + cpuid_hv->nent; + } + + vcpu_set_cpuid(vm, vcpuid, cpuid_full); +} + +struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid) +{ + static struct kvm_cpuid2 *cpuid; + + cpuid = allocate_kvm_cpuid2(); + + vcpu_ioctl(vm, vcpuid, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + return cpuid; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c index 3a5c72ed2b79..827fe6028dd4 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -74,7 +74,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa); memset(vmcb, 0, sizeof(*vmcb)); - asm volatile ("vmsave\n\t" : : "a" (vmcb_gpa) : "memory"); + asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory"); vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr); vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr); vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr); @@ -131,19 +131,19 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) { asm volatile ( - "vmload\n\t" + "vmload %[vmcb_gpa]\n\t" "mov rflags, %%r15\n\t" // rflags "mov %%r15, 0x170(%[vmcb])\n\t" "mov guest_regs, %%r15\n\t" // rax "mov %%r15, 0x1f8(%[vmcb])\n\t" LOAD_GPR_C - "vmrun\n\t" + "vmrun %[vmcb_gpa]\n\t" SAVE_GPR_C "mov 0x170(%[vmcb]), %%r15\n\t" // rflags "mov %%r15, rflags\n\t" "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax "mov %%r15, guest_regs\n\t" - "vmsave\n\t" + "vmsave %[vmcb_gpa]\n\t" : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa) : "r15", "memory"); } diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c new file mode 100644 index 000000000000..6096bf0a5b34 --- /dev/null +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM memslot modification stress test + * Adapted from demand_paging_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2020, Google, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <asm/unistd.h> +#include <time.h> +#include <poll.h> +#include <pthread.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/userfaultfd.h> + +#include "perf_test_util.h" +#include "processor.h" +#include "test_util.h" +#include "guest_modes.h" + +#define DUMMY_MEMSLOT_INDEX 7 + +#define DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS 10 + + +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + +static bool run_vcpus = true; + +static void *vcpu_worker(void *data) +{ + int ret; + struct perf_test_vcpu_args *vcpu_args = + (struct perf_test_vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + struct kvm_vm *vm = perf_test_args.vm; + struct kvm_run *run; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + run = vcpu_state(vm, vcpu_id); + + /* Let the guest access its memory until a stop signal is received */ + while (READ_ONCE(run_vcpus)) { + ret = _vcpu_run(vm, vcpu_id); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + + if (get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC) + continue; + + TEST_ASSERT(false, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + } + + return NULL; +} + +struct memslot_antagonist_args { + struct kvm_vm *vm; + useconds_t delay; + uint64_t nr_modifications; +}; + +static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, + uint64_t nr_modifications, uint64_t gpa) +{ + int i; + + for (i = 0; i < nr_modifications; i++) { + usleep(delay); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, + DUMMY_MEMSLOT_INDEX, 1, 0); + + vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX); + } +} + +struct test_params { + useconds_t memslot_modification_delay; + uint64_t nr_memslot_modifications; + bool partition_vcpu_memory_access; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *p = arg; + pthread_t *vcpu_threads; + struct kvm_vm *vm; + int vcpu_id; + + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, + VM_MEM_SRC_ANONYMOUS); + + perf_test_args.wr_fract = 1; + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, + p->partition_vcpu_memory_access); + + /* Export the shared variables to the guest */ + sync_global_to_guest(vm, perf_test_args); + + pr_info("Finished creating vCPUs\n"); + + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); + + pr_info("Started all vCPUs\n"); + + add_remove_memslot(vm, p->memslot_modification_delay, + p->nr_memslot_modifications, + guest_test_phys_mem + + (guest_percpu_mem_size * nr_vcpus) + + perf_test_args.host_page_size + + perf_test_args.guest_page_size); + + run_vcpus = false; + + /* Wait for the vcpu threads to quit */ + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + pr_info("All vCPU threads joined\n"); + + ucall_uninit(vm); + kvm_vm_free(vm); + + free(vcpu_threads); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-m mode] [-d delay_usec]\n" + " [-b memory] [-v vcpus] [-o] [-i iterations]\n", name); + guest_modes_help(); + printf(" -d: add a delay between each iteration of adding and\n" + " deleting a memslot in usec.\n"); + printf(" -b: specify the size of the memory region which should be\n" + " accessed by each vCPU. e.g. 10M or 3G.\n" + " Default: 1G\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + printf(" -o: Overlap guest memory accesses instead of partitioning\n" + " them into a separate region of memory for each vCPU.\n"); + printf(" -i: specify the number of iterations of adding and removing\n" + " a memslot.\n" + " Default: %d\n", DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + int opt; + struct test_params p = { + .memslot_modification_delay = 0, + .nr_memslot_modifications = + DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS, + .partition_vcpu_memory_access = true + }; + + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 'd': + p.memslot_modification_delay = strtoul(optarg, NULL, 0); + TEST_ASSERT(p.memslot_modification_delay >= 0, + "A negative delay is not supported."); + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'v': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", + max_vcpus); + break; + case 'o': + p.partition_vcpu_memory_access = false; + break; + case 'i': + p.nr_memslot_modifications = atoi(optarg); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + for_each_guest_mode(run_test, &p); + + return 0; +} diff --git a/tools/testing/selftests/kvm/settings b/tools/testing/selftests/kvm/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/kvm/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 37b8a78f6b74..ca22ee6d19cb 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -99,6 +99,7 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); } + vcpu_set_hv_cpuid(vm, VCPU_ID); vcpu_enable_evmcs(vm, VCPU_ID); run = vcpu_state(vm, VCPU_ID); @@ -142,7 +143,7 @@ int main(int argc, char *argv[]) /* Restore state in a new VM. */ kvm_vm_restart(vm, O_RDWR); vm_vcpu_add(vm, VCPU_ID); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_set_hv_cpuid(vm, VCPU_ID); vcpu_enable_evmcs(vm, VCPU_ID); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c new file mode 100644 index 000000000000..9b78e8889638 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Red Hat Inc. + * + * Generic tests for KVM CPUID set/get ioctls + */ +#include <asm/kvm_para.h> +#include <linux/kvm_para.h> +#include <stdint.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 0 + +/* CPUIDs known to differ */ +struct { + u32 function; + u32 index; +} mangled_cpuids[] = { + {.function = 0xd, .index = 0}, +}; + +static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) +{ + int i; + u32 eax, ebx, ecx, edx; + + for (i = 0; i < guest_cpuid->nent; i++) { + eax = guest_cpuid->entries[i].function; + ecx = guest_cpuid->entries[i].index; + + cpuid(&eax, &ebx, &ecx, &edx); + + GUEST_ASSERT(eax == guest_cpuid->entries[i].eax && + ebx == guest_cpuid->entries[i].ebx && + ecx == guest_cpuid->entries[i].ecx && + edx == guest_cpuid->entries[i].edx); + } + +} + +static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid) +{ + u32 eax = 0x40000000, ebx, ecx = 0, edx; + + cpuid(&eax, &ebx, &ecx, &edx); + + GUEST_ASSERT(eax == 0x40000001); +} + +static void guest_main(struct kvm_cpuid2 *guest_cpuid) +{ + GUEST_SYNC(1); + + test_guest_cpuids(guest_cpuid); + + GUEST_SYNC(2); + + test_cpuid_40000000(guest_cpuid); + + GUEST_DONE(); +} + +static bool is_cpuid_mangled(struct kvm_cpuid_entry2 *entrie) +{ + int i; + + for (i = 0; i < sizeof(mangled_cpuids); i++) { + if (mangled_cpuids[i].function == entrie->function && + mangled_cpuids[i].index == entrie->index) + return true; + } + + return false; +} + +static void check_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *entrie) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + if (cpuid->entries[i].function == entrie->function && + cpuid->entries[i].index == entrie->index) { + if (is_cpuid_mangled(entrie)) + return; + + TEST_ASSERT(cpuid->entries[i].eax == entrie->eax && + cpuid->entries[i].ebx == entrie->ebx && + cpuid->entries[i].ecx == entrie->ecx && + cpuid->entries[i].edx == entrie->edx, + "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", + entrie->function, entrie->index, + cpuid->entries[i].eax, cpuid->entries[i].ebx, + cpuid->entries[i].ecx, cpuid->entries[i].edx, + entrie->eax, entrie->ebx, entrie->ecx, entrie->edx); + return; + } + } + + TEST_ASSERT(false, "CPUID 0x%x.%x not found", entrie->function, entrie->index); +} + +static void compare_cpuids(struct kvm_cpuid2 *cpuid1, struct kvm_cpuid2 *cpuid2) +{ + int i; + + for (i = 0; i < cpuid1->nent; i++) + check_cpuid(cpuid2, &cpuid1->entries[i]); + + for (i = 0; i < cpuid2->nent; i++) + check_cpuid(cpuid1, &cpuid2->entries[i]); +} + +static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) +{ + struct ucall uc; + + _vcpu_run(vm, vcpuid); + + switch (get_ucall(vm, vcpuid, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1, + "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + return; + case UCALL_DONE: + return; + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2], uc.args[3]); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + } +} + +struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid) +{ + int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]); + vm_vaddr_t gva = vm_vaddr_alloc(vm, size, + getpagesize(), 0, 0); + struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva); + + memcpy(guest_cpuids, cpuid, size); + + *p_gva = gva; + return guest_cpuids; +} + +int main(void) +{ + struct kvm_cpuid2 *supp_cpuid, *cpuid2; + vm_vaddr_t cpuid_gva; + struct kvm_vm *vm; + int stage; + + vm = vm_create_default(VCPU_ID, 0, guest_main); + + supp_cpuid = kvm_get_supported_cpuid(); + cpuid2 = vcpu_get_cpuid(vm, VCPU_ID); + + compare_cpuids(supp_cpuid, cpuid2); + + vcpu_alloc_cpuid(vm, &cpuid_gva, cpuid2); + + vcpu_args_set(vm, VCPU_ID, 1, cpuid_gva); + + for (stage = 0; stage < 3; stage++) + run_vcpu(vm, VCPU_ID, stage); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 88a595b7fbdd..7e2d2d17d2ed 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -125,30 +125,6 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system) " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno); } - -struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm, bool system) -{ - int nent = 20; /* should be enough */ - static struct kvm_cpuid2 *cpuid; - - cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2)); - - if (!cpuid) { - perror("malloc"); - abort(); - } - - cpuid->nent = nent; - - if (!system) - vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - else - kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - return cpuid; -} - - int main(int argc, char *argv[]) { struct kvm_vm *vm; @@ -167,7 +143,7 @@ int main(int argc, char *argv[]) /* Test vCPU ioctl version */ test_hv_cpuid_e2big(vm, false); - hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false); + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID); test_hv_cpuid(hv_cpuid_entries, false); free(hv_cpuid_entries); @@ -177,7 +153,7 @@ int main(int argc, char *argv[]) goto do_sys; } vcpu_enable_evmcs(vm, VCPU_ID); - hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false); + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID); test_hv_cpuid(hv_cpuid_entries, true); free(hv_cpuid_entries); @@ -190,9 +166,8 @@ do_sys: test_hv_cpuid_e2big(vm, true); - hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, true); + hv_cpuid_entries = kvm_get_supported_hv_cpuid(); test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported()); - free(hv_cpuid_entries); out: kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c new file mode 100644 index 000000000000..23051d84b907 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * VMX-pmu related msrs test + * + * Copyright (C) 2021 Intel Corporation + * + * Test to check the effect of various CPUID settings + * on the MSR_IA32_PERF_CAPABILITIES MSR, and check that + * whatever we write with KVM_SET_MSR is _not_ modified + * in the guest and test it can be retrieved with KVM_GET_MSR. + * + * Test to check that invalid LBR formats are rejected. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <sys/ioctl.h> + +#include "kvm_util.h" +#include "vmx.h" + +#define VCPU_ID 0 + +#define X86_FEATURE_PDCM (1<<15) +#define PMU_CAP_FW_WRITES (1ULL << 13) +#define PMU_CAP_LBR_FMT 0x3f + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +union perf_capabilities { + struct { + u64 lbr_format:6; + u64 pebs_trap:1; + u64 pebs_arch_reg:1; + u64 pebs_format:4; + u64 smm_freeze:1; + u64 full_width_write:1; + u64 pebs_baseline:1; + u64 perf_metrics:1; + u64 pebs_output_pt_available:1; + u64 anythread_deprecated:1; + }; + u64 capabilities; +}; + +static void guest_code(void) +{ + wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT); +} + +int main(int argc, char *argv[]) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_cpuid_entry2 *entry_1_0; + struct kvm_cpuid_entry2 *entry_a_0; + bool pdcm_supported = false; + struct kvm_vm *vm; + int ret; + union cpuid10_eax eax; + union perf_capabilities host_cap; + + host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES); + host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + cpuid = kvm_get_supported_cpuid(); + + if (kvm_get_cpuid_max_basic() >= 0xa) { + entry_1_0 = kvm_get_supported_cpuid_index(1, 0); + entry_a_0 = kvm_get_supported_cpuid_index(0xa, 0); + pdcm_supported = entry_1_0 && !!(entry_1_0->ecx & X86_FEATURE_PDCM); + eax.full = entry_a_0->eax; + } + if (!pdcm_supported) { + print_skip("MSR_IA32_PERF_CAPABILITIES is not supported by the vCPU"); + exit(KSFT_SKIP); + } + if (!eax.split.version_id) { + print_skip("PMU is not supported by the vCPU"); + exit(KSFT_SKIP); + } + + /* testcase 1, set capabilities when we have PDCM bit */ + vcpu_set_cpuid(vm, VCPU_ID, cpuid); + vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); + + /* check capabilities can be retrieved with KVM_GET_MSR */ + ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES); + + /* check whatever we write with KVM_SET_MSR is _not_ modified */ + vcpu_run(vm, VCPU_ID); + ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES); + + /* testcase 2, check valid LBR formats are accepted */ + vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0); + ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0); + + vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format); + ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format); + + /* testcase 3, check invalid LBR format is rejected */ + ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT); + TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); + + /* testcase 4, set capabilities when we don't have PDCM bit */ + entry_1_0->ecx &= ~X86_FEATURE_PDCM; + vcpu_set_cpuid(vm, VCPU_ID, cpuid); + ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); + + /* testcase 5, set capabilities when we don't have PMU version bits */ + entry_1_0->ecx |= X86_FEATURE_PDCM; + eax.split.version_id = 0; + entry_1_0->ecx = eax.full; + vcpu_set_cpuid(vm, VCPU_ID, cpuid); + ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); + TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); + + vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0); + ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c new file mode 100644 index 000000000000..2f964cdc273c --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * xapic_ipi_test + * + * Copyright (C) 2020, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake + * another vCPU that is halted when KVM's backing page for the APIC access + * address has been moved by mm. + * + * The test starts two vCPUs: one that sends IPIs and one that continually + * executes HLT. The sender checks that the halter has woken from the HLT and + * has reentered HLT before sending the next IPI. While the vCPUs are running, + * the host continually calls migrate_pages to move all of the process' pages + * amongst the available numa nodes on the machine. + * + * Migration is a command line option. When used on non-numa machines will + * exit with error. Test is still usefull on non-numa for testing IPIs. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <getopt.h> +#include <pthread.h> +#include <inttypes.h> +#include <string.h> +#include <time.h> + +#include "kvm_util.h" +#include "numaif.h" +#include "processor.h" +#include "test_util.h" +#include "vmx.h" + +/* Default running time for the test */ +#define DEFAULT_RUN_SECS 3 + +/* Default delay between migrate_pages calls (microseconds) */ +#define DEFAULT_DELAY_USECS 500000 + +#define HALTER_VCPU_ID 0 +#define SENDER_VCPU_ID 1 + +volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA; + +/* + * Vector for IPI from sender vCPU to halting vCPU. + * Value is arbitrary and was chosen for the alternating bit pattern. Any + * value should work. + */ +#define IPI_VECTOR 0xa5 + +/* + * Incremented in the IPI handler. Provides evidence to the sender that the IPI + * arrived at the destination + */ +static volatile uint64_t ipis_rcvd; + +/* Data struct shared between host main thread and vCPUs */ +struct test_data_page { + uint32_t halter_apic_id; + volatile uint64_t hlt_count; + volatile uint64_t wake_count; + uint64_t ipis_sent; + uint64_t migrations_attempted; + uint64_t migrations_completed; + uint32_t icr; + uint32_t icr2; + uint32_t halter_tpr; + uint32_t halter_ppr; + + /* + * Record local version register as a cross-check that APIC access + * worked. Value should match what KVM reports (APIC_VERSION in + * arch/x86/kvm/lapic.c). If test is failing, check that values match + * to determine whether APIC access exits are working. + */ + uint32_t halter_lvr; +}; + +struct thread_params { + struct test_data_page *data; + struct kvm_vm *vm; + uint32_t vcpu_id; + uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ +}; + +uint32_t read_apic_reg(uint reg) +{ + return apic_base[reg >> 2]; +} + +void write_apic_reg(uint reg, uint32_t val) +{ + apic_base[reg >> 2] = val; +} + +void disable_apic(void) +{ + wrmsr(MSR_IA32_APICBASE, + rdmsr(MSR_IA32_APICBASE) & + ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD)); +} + +void enable_xapic(void) +{ + uint64_t val = rdmsr(MSR_IA32_APICBASE); + + /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */ + if (val & MSR_IA32_APICBASE_EXTD) { + disable_apic(); + wrmsr(MSR_IA32_APICBASE, + rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE); + } else if (!(val & MSR_IA32_APICBASE_ENABLE)) { + wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE); + } + + /* + * Per SDM: reset value of spurious interrupt vector register has the + * APIC software enabled bit=0. It must be enabled in addition to the + * enable bit in the MSR. + */ + val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED; + write_apic_reg(APIC_SPIV, val); +} + +void verify_apic_base_addr(void) +{ + uint64_t msr = rdmsr(MSR_IA32_APICBASE); + uint64_t base = GET_APIC_BASE(msr); + + GUEST_ASSERT(base == APIC_DEFAULT_GPA); +} + +static void halter_guest_code(struct test_data_page *data) +{ + verify_apic_base_addr(); + enable_xapic(); + + data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID)); + data->halter_lvr = read_apic_reg(APIC_LVR); + + /* + * Loop forever HLTing and recording halts & wakes. Disable interrupts + * each time around to minimize window between signaling the pending + * halt to the sender vCPU and executing the halt. No need to disable on + * first run as this vCPU executes first and the host waits for it to + * signal going into first halt before starting the sender vCPU. Record + * TPR and PPR for diagnostic purposes in case the test fails. + */ + for (;;) { + data->halter_tpr = read_apic_reg(APIC_TASKPRI); + data->halter_ppr = read_apic_reg(APIC_PROCPRI); + data->hlt_count++; + asm volatile("sti; hlt; cli"); + data->wake_count++; + } +} + +/* + * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to + * enable diagnosing errant writes to the APIC access address backing page in + * case of test failure. + */ +static void guest_ipi_handler(struct ex_regs *regs) +{ + ipis_rcvd++; + write_apic_reg(APIC_EOI, 77); +} + +static void sender_guest_code(struct test_data_page *data) +{ + uint64_t last_wake_count; + uint64_t last_hlt_count; + uint64_t last_ipis_rcvd_count; + uint32_t icr_val; + uint32_t icr2_val; + uint64_t tsc_start; + + verify_apic_base_addr(); + enable_xapic(); + + /* + * Init interrupt command register for sending IPIs + * + * Delivery mode=fixed, per SDM: + * "Delivers the interrupt specified in the vector field to the target + * processor." + * + * Destination mode=physical i.e. specify target by its local APIC + * ID. This vCPU assumes that the halter vCPU has already started and + * set data->halter_apic_id. + */ + icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); + icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); + data->icr = icr_val; + data->icr2 = icr2_val; + + last_wake_count = data->wake_count; + last_hlt_count = data->hlt_count; + last_ipis_rcvd_count = ipis_rcvd; + for (;;) { + /* + * Send IPI to halter vCPU. + * First IPI can be sent unconditionally because halter vCPU + * starts earlier. + */ + write_apic_reg(APIC_ICR2, icr2_val); + write_apic_reg(APIC_ICR, icr_val); + data->ipis_sent++; + + /* + * Wait up to ~1 sec for halter to indicate that it has: + * 1. Received the IPI + * 2. Woken up from the halt + * 3. Gone back into halt + * Current CPUs typically run at 2.x Ghz which is ~2 + * billion ticks per second. + */ + tsc_start = rdtsc(); + while (rdtsc() - tsc_start < 2000000000) { + if ((ipis_rcvd != last_ipis_rcvd_count) && + (data->wake_count != last_wake_count) && + (data->hlt_count != last_hlt_count)) + break; + } + + GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && + (data->wake_count != last_wake_count) && + (data->hlt_count != last_hlt_count)); + + last_wake_count = data->wake_count; + last_hlt_count = data->hlt_count; + last_ipis_rcvd_count = ipis_rcvd; + } +} + +static void *vcpu_thread(void *arg) +{ + struct thread_params *params = (struct thread_params *)arg; + struct ucall uc; + int old; + int r; + unsigned int exit_reason; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(r == 0, + "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + params->vcpu_id, r); + + fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id); + vcpu_run(params->vm, params->vcpu_id); + exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason; + + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", + params->vcpu_id, exit_reason, exit_reason_str(exit_reason)); + + if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) { + TEST_ASSERT(false, + "vCPU %u exited with error: %s.\n" + "Sending vCPU sent %lu IPIs to halting vCPU\n" + "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" + "Halter TPR=%#x PPR=%#x LVR=%#x\n" + "Migrations attempted: %lu\n" + "Migrations completed: %lu\n", + params->vcpu_id, (const char *)uc.args[0], + params->data->ipis_sent, params->data->hlt_count, + params->data->wake_count, + *params->pipis_rcvd, params->data->halter_tpr, + params->data->halter_ppr, params->data->halter_lvr, + params->data->migrations_attempted, + params->data->migrations_completed); + } + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(r == 0, + "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu_id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(r == 0, + "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu_id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, + uint64_t *pipis_rcvd) +{ + long pages_not_moved; + unsigned long nodemask = 0; + unsigned long nodemasks[sizeof(nodemask) * 8]; + int nodes = 0; + time_t start_time, last_update, now; + time_t interval_secs = 1; + int i, r; + int from, to; + unsigned long bit; + uint64_t hlt_count; + uint64_t wake_count; + uint64_t ipis_sent; + + fprintf(stderr, "Calling migrate_pages every %d microseconds\n", + delay_usecs); + + /* Get set of first 64 numa nodes available */ + r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, + 0, MPOL_F_MEMS_ALLOWED); + TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); + + fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " + "(each 1-bit indicates node is present): %#lx\n", + sizeof(nodemask) * 8, nodemask); + + /* Init array of masks containing a single-bit in each, one for each + * available node. migrate_pages called below requires specifying nodes + * as bit masks. + */ + for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { + if (nodemask & bit) { + nodemasks[nodes] = nodemask & bit; + nodes++; + } + } + + TEST_ASSERT(nodes > 1, + "Did not find at least 2 numa nodes. Can't do migration\n"); + + fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); + + from = 0; + to = 1; + start_time = time(NULL); + last_update = start_time; + + ipis_sent = data->ipis_sent; + hlt_count = data->hlt_count; + wake_count = data->wake_count; + + while ((int)(time(NULL) - start_time) < run_secs) { + data->migrations_attempted++; + + /* + * migrate_pages with PID=0 will migrate all pages of this + * process between the nodes specified as bitmasks. The page + * backing the APIC access address belongs to this process + * because it is allocated by KVM in the context of the + * KVM_CREATE_VCPU ioctl. If that assumption ever changes this + * test may break or give a false positive signal. + */ + pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), + &nodemasks[from], + &nodemasks[to]); + if (pages_not_moved < 0) + fprintf(stderr, + "migrate_pages failed, errno=%d\n", errno); + else if (pages_not_moved > 0) + fprintf(stderr, + "migrate_pages could not move %ld pages\n", + pages_not_moved); + else + data->migrations_completed++; + + from = to; + to++; + if (to == nodes) + to = 0; + + now = time(NULL); + if (((now - start_time) % interval_secs == 0) && + (now != last_update)) { + last_update = now; + fprintf(stderr, + "%lu seconds: Migrations attempted=%lu completed=%lu, " + "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", + now - start_time, data->migrations_attempted, + data->migrations_completed, + data->ipis_sent, *pipis_rcvd, + data->hlt_count, data->wake_count); + + TEST_ASSERT(ipis_sent != data->ipis_sent && + hlt_count != data->hlt_count && + wake_count != data->wake_count, + "IPI, HLT and wake count have not increased " + "in the last %lu seconds. " + "HLTer is likely hung.\n", interval_secs); + + ipis_sent = data->ipis_sent; + hlt_count = data->hlt_count; + wake_count = data->wake_count; + } + usleep(delay_usecs); + } +} + +void get_cmdline_args(int argc, char *argv[], int *run_secs, + bool *migrate, int *delay_usecs) +{ + for (;;) { + int opt = getopt(argc, argv, "s:d:m"); + + if (opt == -1) + break; + switch (opt) { + case 's': + *run_secs = parse_size(optarg); + break; + case 'm': + *migrate = true; + break; + case 'd': + *delay_usecs = parse_size(optarg); + break; + default: + TEST_ASSERT(false, + "Usage: -s <runtime seconds>. Default is %d seconds.\n" + "-m adds calls to migrate_pages while vCPUs are running." + " Default is no migrations.\n" + "-d <delay microseconds> - delay between migrate_pages() calls." + " Default is %d microseconds.\n", + DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); + } + } +} + +int main(int argc, char *argv[]) +{ + int r; + int wait_secs; + const int max_halter_wait = 10; + int run_secs = 0; + int delay_usecs = 0; + struct test_data_page *data; + vm_vaddr_t test_data_page_vaddr; + bool migrate = false; + pthread_t threads[2]; + struct thread_params params[2]; + struct kvm_vm *vm; + uint64_t *pipis_rcvd; + + get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); + if (run_secs <= 0) + run_secs = DEFAULT_RUN_SECS; + if (delay_usecs <= 0) + delay_usecs = DEFAULT_DELAY_USECS; + + vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code); + params[0].vm = vm; + params[1].vm = vm; + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID); + vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0); + + vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code); + + test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0); + data = + (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr); + memset(data, 0, sizeof(*data)); + params[0].data = data; + params[1].data = data; + + vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr); + vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr); + + pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); + params[0].pipis_rcvd = pipis_rcvd; + params[1].pipis_rcvd = pipis_rcvd; + + /* Start halter vCPU thread and wait for it to execute first HLT. */ + params[0].vcpu_id = HALTER_VCPU_ID; + r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); + TEST_ASSERT(r == 0, + "pthread_create halter failed errno=%d", errno); + fprintf(stderr, "Halter vCPU thread started\n"); + + wait_secs = 0; + while ((wait_secs < max_halter_wait) && !data->hlt_count) { + sleep(1); + wait_secs++; + } + + TEST_ASSERT(data->hlt_count, + "Halter vCPU did not execute first HLT within %d seconds", + max_halter_wait); + + fprintf(stderr, + "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", + data->halter_apic_id, wait_secs); + + params[1].vcpu_id = SENDER_VCPU_ID; + r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); + TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); + + fprintf(stderr, + "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", + run_secs); + + if (!migrate) + sleep(run_secs); + else + do_migrations(data, run_secs, delay_usecs, pipis_rcvd); + + /* + * Cancel threads and wait for them to stop. + */ + cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID); + cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID); + + fprintf(stderr, + "Test successful after running for %d seconds.\n" + "Sending vCPU sent %lu IPIs to halting vCPU\n" + "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" + "Halter APIC ID=%#x\n" + "Sender ICR value=%#x ICR2 value=%#x\n" + "Halter TPR=%#x PPR=%#x LVR=%#x\n" + "Migrations attempted: %lu\n" + "Migrations completed: %lu\n", + run_secs, data->ipis_sent, + data->hlt_count, data->wake_count, *pipis_rcvd, + data->halter_apic_id, + data->icr, data->icr2, + data->halter_tpr, data->halter_ppr, data->halter_lvr, + data->migrations_attempted, data->migrations_completed); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c new file mode 100644 index 000000000000..9246ea310587 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_vmcall_test + * + * Copyright © 2021 Amazon.com, Inc. or its affiliates. + * + * Xen shared_info / pvclock testing + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#include <stdint.h> +#include <time.h> + +#define VCPU_ID 5 + +#define SHINFO_REGION_GPA 0xc0000000ULL +#define SHINFO_REGION_SLOT 10 +#define PAGE_SIZE 4096 + +#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) + +static struct kvm_vm *vm; + +#define XEN_HYPERCALL_MSR 0x40000000 + +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; +} __attribute__((__packed__)); + +static void guest_code(void) +{ + GUEST_DONE(); +} + +static int cmp_timespec(struct timespec *a, struct timespec *b) +{ + if (a->tv_sec > b->tv_sec) + return 1; + else if (a->tv_sec < b->tv_sec) + return -1; + else if (a->tv_nsec > b->tv_nsec) + return 1; + else if (a->tv_nsec < b->tv_nsec) + return -1; + else + return 0; +} + +int main(int argc, char *argv[]) +{ + struct timespec min_ts, max_ts, vm_ts; + + if (!(kvm_check_cap(KVM_CAP_XEN_HVM) & + KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { + print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available"); + exit(KSFT_SKIP); + } + + clock_gettime(CLOCK_REALTIME, &min_ts); + + vm = vm_create_default(VCPU_ID, 0, (void *) guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + /* Map a region for the shared_info page */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); + + struct kvm_xen_hvm_config hvmc = { + .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, + .msr = XEN_HYPERCALL_MSR, + }; + vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); + + struct kvm_xen_hvm_attr lm = { + .type = KVM_XEN_ATTR_TYPE_LONG_MODE, + .u.long_mode = 1, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + struct kvm_xen_hvm_attr ha = { + .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, + .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); + + struct kvm_xen_vcpu_attr vi = { + .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, + .u.gpa = SHINFO_REGION_GPA + 0x40, + }; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi); + + struct kvm_xen_vcpu_attr pvclock = { + .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, + .u.gpa = PVTIME_ADDR, + }; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s", (const char *)uc.args[0]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } + + done: + clock_gettime(CLOCK_REALTIME, &max_ts); + + /* + * Just a *really* basic check that things are being put in the + * right place. The actual calculations are much the same for + * Xen as they are for the KVM variants, so no need to check. + */ + struct pvclock_wall_clock *wc; + struct pvclock_vcpu_time_info *ti, *ti2; + + wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); + ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); + ti2 = addr_gpa2hva(vm, PVTIME_ADDR); + + vm_ts.tv_sec = wc->sec; + vm_ts.tv_nsec = wc->nsec; + TEST_ASSERT(wc->version && !(wc->version & 1), + "Bad wallclock version %x", wc->version); + TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); + TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); + + TEST_ASSERT(ti->version && !(ti->version & 1), + "Bad time_info version %x", ti->version); + TEST_ASSERT(ti2->version && !(ti2->version & 1), + "Bad time_info version %x", ti->version); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c new file mode 100644 index 000000000000..8389e0bfd711 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * xen_vmcall_test + * + * Copyright © 2020 Amazon.com, Inc. or its affiliates. + * + * Userspace hypercall testing + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 5 + +#define HCALL_REGION_GPA 0xc0000000ULL +#define HCALL_REGION_SLOT 10 +#define PAGE_SIZE 4096 + +static struct kvm_vm *vm; + +#define INPUTVALUE 17 +#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x) +#define RETVALUE 0xcafef00dfbfbffffUL + +#define XEN_HYPERCALL_MSR 0x40000200 +#define HV_GUEST_OS_ID_MSR 0x40000000 +#define HV_HYPERCALL_MSR 0x40000001 + +#define HVCALL_SIGNAL_EVENT 0x005d +#define HV_STATUS_INVALID_ALIGNMENT 4 + +static void guest_code(void) +{ + unsigned long rax = INPUTVALUE; + unsigned long rdi = ARGVALUE(1); + unsigned long rsi = ARGVALUE(2); + unsigned long rdx = ARGVALUE(3); + unsigned long rcx; + register unsigned long r10 __asm__("r10") = ARGVALUE(4); + register unsigned long r8 __asm__("r8") = ARGVALUE(5); + register unsigned long r9 __asm__("r9") = ARGVALUE(6); + + /* First a direct invocation of 'vmcall' */ + __asm__ __volatile__("vmcall" : + "=a"(rax) : + "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx), + "r"(r10), "r"(r8), "r"(r9)); + GUEST_ASSERT(rax == RETVALUE); + + /* Fill in the Xen hypercall page */ + __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR), + "a" (HCALL_REGION_GPA & 0xffffffff), + "d" (HCALL_REGION_GPA >> 32)); + + /* Set Hyper-V Guest OS ID */ + __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR), + "a" (0x5a), "d" (0)); + + /* Hyper-V hypercall page */ + u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1; + __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR), + "a" (msrval & 0xffffffff), + "d" (msrval >> 32)); + + /* Invoke a Xen hypercall */ + __asm__ __volatile__("call *%1" : "=a"(rax) : + "r"(HCALL_REGION_GPA + INPUTVALUE * 32), + "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx), + "r"(r10), "r"(r8), "r"(r9)); + GUEST_ASSERT(rax == RETVALUE); + + /* Invoke a Hyper-V hypercall */ + rax = 0; + rcx = HVCALL_SIGNAL_EVENT; /* code */ + rdx = 0x5a5a5a5a; /* ingpa (badly aligned) */ + __asm__ __volatile__("call *%1" : "=a"(rax) : + "r"(HCALL_REGION_GPA + PAGE_SIZE), + "a"(rax), "c"(rcx), "d"(rdx), + "r"(r8)); + GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + if (!(kvm_check_cap(KVM_CAP_XEN_HVM) & + KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) ) { + print_skip("KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL not available"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, (void *) guest_code); + vcpu_set_hv_cpuid(vm, VCPU_ID); + + struct kvm_xen_hvm_config hvmc = { + .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, + .msr = XEN_HYPERCALL_MSR, + }; + vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); + + /* Map a region for the hypercall pages */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0); + virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + + if (run->exit_reason == KVM_EXIT_XEN) { + ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); + ASSERT_EQ(run->xen.u.hcall.cpl, 0); + ASSERT_EQ(run->xen.u.hcall.longmode, 1); + ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE); + ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1)); + ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2)); + ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3)); + ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4)); + ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5)); + ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6)); + run->xen.u.hcall.result = RETVALUE; + continue; + } + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s", (const char *)uc.args[0]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/mount_setattr/.gitignore b/tools/testing/selftests/mount_setattr/.gitignore new file mode 100644 index 000000000000..5f74d8488472 --- /dev/null +++ b/tools/testing/selftests/mount_setattr/.gitignore @@ -0,0 +1 @@ +mount_setattr_test diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile new file mode 100644 index 000000000000..2250f7dcb81e --- /dev/null +++ b/tools/testing/selftests/mount_setattr/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for mount selftests. +CFLAGS = -g -I../../../../usr/include/ -Wall -O2 -pthread + +TEST_GEN_FILES += mount_setattr_test + +include ../lib.mk diff --git a/tools/testing/selftests/mount_setattr/config b/tools/testing/selftests/mount_setattr/config new file mode 100644 index 000000000000..416bd53ce982 --- /dev/null +++ b/tools/testing/selftests/mount_setattr/config @@ -0,0 +1 @@ +CONFIG_USER_NS=y diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c new file mode 100644 index 000000000000..4e94e566e040 --- /dev/null +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -0,0 +1,1424 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> +#include <stdio.h> +#include <errno.h> +#include <pthread.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/wait.h> +#include <sys/vfs.h> +#include <sys/statvfs.h> +#include <sys/sysinfo.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <grp.h> +#include <stdbool.h> +#include <stdarg.h> + +#include "../kselftest_harness.h" + +#ifndef CLONE_NEWNS +#define CLONE_NEWNS 0x00020000 +#endif + +#ifndef CLONE_NEWUSER +#define CLONE_NEWUSER 0x10000000 +#endif + +#ifndef MS_REC +#define MS_REC 16384 +#endif + +#ifndef MS_RELATIME +#define MS_RELATIME (1 << 21) +#endif + +#ifndef MS_STRICTATIME +#define MS_STRICTATIME (1 << 24) +#endif + +#ifndef MOUNT_ATTR_RDONLY +#define MOUNT_ATTR_RDONLY 0x00000001 +#endif + +#ifndef MOUNT_ATTR_NOSUID +#define MOUNT_ATTR_NOSUID 0x00000002 +#endif + +#ifndef MOUNT_ATTR_NOEXEC +#define MOUNT_ATTR_NOEXEC 0x00000008 +#endif + +#ifndef MOUNT_ATTR_NODIRATIME +#define MOUNT_ATTR_NODIRATIME 0x00000080 +#endif + +#ifndef MOUNT_ATTR__ATIME +#define MOUNT_ATTR__ATIME 0x00000070 +#endif + +#ifndef MOUNT_ATTR_RELATIME +#define MOUNT_ATTR_RELATIME 0x00000000 +#endif + +#ifndef MOUNT_ATTR_NOATIME +#define MOUNT_ATTR_NOATIME 0x00000010 +#endif + +#ifndef MOUNT_ATTR_STRICTATIME +#define MOUNT_ATTR_STRICTATIME 0x00000020 +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 +#endif + +#ifndef MS_SHARED +#define MS_SHARED (1 << 20) +#endif + +#define DEFAULT_THREADS 4 +#define ptr_to_int(p) ((int)((intptr_t)(p))) +#define int_to_ptr(u) ((void *)((intptr_t)(u))) + +#ifndef __NR_mount_setattr + #if defined __alpha__ + #define __NR_mount_setattr 552 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_mount_setattr (442 + 4000) + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_mount_setattr (442 + 6000) + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_mount_setattr (442 + 5000) + #endif + #elif defined __ia64__ + #define __NR_mount_setattr (442 + 1024) + #else + #define __NR_mount_setattr 442 + #endif + +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; +#endif + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #elif defined __ia64__ + #define __NR_open_tree (428 + 1024) + #else + #define __NR_open_tree 428 + #endif +#endif + +#ifndef MOUNT_ATTR_IDMAP +#define MOUNT_ATTR_IDMAP 0x00100000 +#endif + +static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags, + struct mount_attr *attr, size_t size) +{ + return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); +} + +#ifndef OPEN_TREE_CLONE +#define OPEN_TREE_CLONE 1 +#endif + +#ifndef OPEN_TREE_CLOEXEC +#define OPEN_TREE_CLOEXEC O_CLOEXEC +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) +{ + return syscall(__NR_open_tree, dfd, filename, flags); +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static int write_file(const char *path, const void *buf, size_t count) +{ + int fd; + ssize_t ret; + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); + if (fd < 0) + return -1; + + ret = write_nointr(fd, buf, count); + close(fd); + if (ret < 0 || (size_t)ret != count) + return -1; + + return 0; +} + +static int create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + char map[100]; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER)) + return -1; + + if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && + errno != ENOENT) + return -1; + + snprintf(map, sizeof(map), "0 %d 1", uid); + if (write_file("/proc/self/uid_map", map, strlen(map))) + return -1; + + + snprintf(map, sizeof(map), "0 %d 1", gid); + if (write_file("/proc/self/gid_map", map, strlen(map))) + return -1; + + if (setgid(0)) + return -1; + + if (setuid(0)) + return -1; + + return 0; +} + +static int prepare_unpriv_mountns(void) +{ + if (create_and_enter_userns()) + return -1; + + if (unshare(CLONE_NEWNS)) + return -1; + + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0)) + return -1; + + return 0; +} + +static int read_mnt_flags(const char *path) +{ + int ret; + struct statvfs stat; + unsigned int mnt_flags; + + ret = statvfs(path, &stat); + if (ret != 0) + return -EINVAL; + + if (stat.f_flag & + ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME | + ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK)) + return -EINVAL; + + mnt_flags = 0; + if (stat.f_flag & ST_RDONLY) + mnt_flags |= MS_RDONLY; + if (stat.f_flag & ST_NOSUID) + mnt_flags |= MS_NOSUID; + if (stat.f_flag & ST_NODEV) + mnt_flags |= MS_NODEV; + if (stat.f_flag & ST_NOEXEC) + mnt_flags |= MS_NOEXEC; + if (stat.f_flag & ST_NOATIME) + mnt_flags |= MS_NOATIME; + if (stat.f_flag & ST_NODIRATIME) + mnt_flags |= MS_NODIRATIME; + if (stat.f_flag & ST_RELATIME) + mnt_flags |= MS_RELATIME; + if (stat.f_flag & ST_SYNCHRONOUS) + mnt_flags |= MS_SYNCHRONOUS; + if (stat.f_flag & ST_MANDLOCK) + mnt_flags |= ST_MANDLOCK; + + return mnt_flags; +} + +static char *get_field(char *src, int nfields) +{ + int i; + char *p = src; + + for (i = 0; i < nfields; i++) { + while (*p && *p != ' ' && *p != '\t') + p++; + + if (!*p) + break; + + p++; + } + + return p; +} + +static void null_endofword(char *word) +{ + while (*word && *word != ' ' && *word != '\t') + word++; + *word = '\0'; +} + +static bool is_shared_mount(const char *path) +{ + size_t len = 0; + char *line = NULL; + FILE *f = NULL; + + f = fopen("/proc/self/mountinfo", "re"); + if (!f) + return false; + + while (getline(&line, &len, f) != -1) { + char *opts, *target; + + target = get_field(line, 4); + if (!target) + continue; + + opts = get_field(target, 2); + if (!opts) + continue; + + null_endofword(target); + + if (strcmp(target, path) != 0) + continue; + + null_endofword(opts); + if (strstr(opts, "shared:")) + return true; + } + + free(line); + fclose(f); + + return false; +} + +static void *mount_setattr_thread(void *data) +{ + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID, + .attr_clr = 0, + .propagation = MS_SHARED, + }; + + if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr))) + pthread_exit(int_to_ptr(-1)); + + pthread_exit(int_to_ptr(0)); +} + +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif + +static bool mount_setattr_supported(void) +{ + int ret; + + ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0); + if (ret < 0 && errno == ENOSYS) + return false; + + return true; +} + +FIXTURE(mount_setattr) { +}; + +FIXTURE_SETUP(mount_setattr) +{ + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_EQ(prepare_unpriv_mountns(), 0); + + (void)umount2("/mnt", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); + + ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); + + ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); + + ASSERT_EQ(mkdir("/mnt/B", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", + MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); + + ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", + MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); +} + +FIXTURE_TEARDOWN(mount_setattr) +{ + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + (void)umount2("/mnt/A", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); +} + +TEST_F(mount_setattr, invalid_attributes) +{ + struct mount_attr invalid_attr = { + .attr_set = (1U << 31), + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + invalid_attr.attr_set = 0; + invalid_attr.attr_clr = (1U << 31); + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + invalid_attr.attr_clr = 0; + invalid_attr.propagation = (1U << 31); + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + invalid_attr.attr_set = (1U << 31); + invalid_attr.attr_clr = (1U << 31); + invalid_attr.propagation = (1U << 31); + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); +} + +TEST_F(mount_setattr, extensibility) +{ + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + char *s = "dummy"; + struct mount_attr invalid_attr = {}; + struct mount_attr_large { + struct mount_attr attr1; + struct mount_attr attr2; + struct mount_attr attr3; + } large_attr = {}; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL, + sizeof(invalid_attr)), 0); + ASSERT_EQ(errno, EFAULT); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s, + sizeof(invalid_attr)), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr) / 2), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr) / 2), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, + (void *)&large_attr, sizeof(large_attr)), 0); + + large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, + (void *)&large_attr, sizeof(large_attr)), 0); + + large_attr.attr3.attr_set = 0; + large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, + (void *)&large_attr, sizeof(large_attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); +} + +TEST_F(mount_setattr, basic) +{ + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, + .attr_clr = MOUNT_ATTR__ATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + expected_flags |= MS_NOEXEC; + expected_flags &= ~MS_NOATIME; + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, old_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, old_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, old_flags); +} + +TEST_F(mount_setattr, basic_recursive) +{ + int fd; + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, + .attr_clr = MOUNT_ATTR__ATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + expected_flags |= MS_NOEXEC; + expected_flags &= ~MS_NOATIME; + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_clr = MOUNT_ATTR_RDONLY; + attr.propagation = MS_SHARED; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_RDONLY; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), true); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); + + fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); + ASSERT_GE(fd, 0); + + /* + * We're holding a fd open for writing so this needs to fail somewhere + * in the middle and the mount options need to be unchanged. + */ + attr.attr_set = MOUNT_ATTR_RDONLY; + ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), true); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); + + EXPECT_EQ(close(fd), 0); +} + +TEST_F(mount_setattr, mount_has_writers) +{ + int fd, dfd; + unsigned int old_flags = 0, new_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, + .attr_clr = MOUNT_ATTR__ATIME, + .propagation = MS_SHARED, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); + ASSERT_GE(fd, 0); + + /* + * We're holding a fd open to a mount somwhere in the middle so this + * needs to fail somewhere in the middle. After this the mount options + * need to be unchanged. + */ + ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), false); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false); + + dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC); + ASSERT_GE(dfd, 0); + EXPECT_EQ(fsync(dfd), 0); + EXPECT_EQ(close(dfd), 0); + + EXPECT_EQ(fsync(fd), 0); + EXPECT_EQ(close(fd), 0); + + /* All writers are gone so this should succeed. */ + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); +} + +TEST_F(mount_setattr, mixed_mount_options) +{ + unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME, + .attr_set = MOUNT_ATTR_RELATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags1 = read_mnt_flags("/mnt/B"); + ASSERT_GT(old_flags1, 0); + + old_flags2 = read_mnt_flags("/mnt/B/BB"); + ASSERT_GT(old_flags2, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags2; + expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/B"); + ASSERT_EQ(new_flags, expected_flags); + + expected_flags = old_flags2; + expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/B/BB"); + ASSERT_EQ(new_flags, expected_flags); +} + +TEST_F(mount_setattr, time_changes) +{ + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = MOUNT_ATTR_STRICTATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; + attr.attr_clr = MOUNT_ATTR__ATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = 0; + attr.attr_clr = MOUNT_ATTR_STRICTATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_clr = MOUNT_ATTR_NOATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME; + attr.attr_clr = MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_NOATIME; + expected_flags |= MS_NODIRATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_set &= ~MOUNT_ATTR_NOATIME; + attr.attr_set |= MOUNT_ATTR_RELATIME; + attr.attr_clr |= MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_NOATIME; + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_set &= ~MOUNT_ATTR_RELATIME; + attr.attr_set |= MOUNT_ATTR_STRICTATIME; + attr.attr_clr |= MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_set &= ~MOUNT_ATTR_STRICTATIME; + attr.attr_set |= MOUNT_ATTR_NOATIME; + attr.attr_clr |= MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags |= MS_NOATIME; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_clr = MOUNT_ATTR_NODIRATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_NODIRATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); +} + +TEST_F(mount_setattr, multi_threaded) +{ + int i, j, nthreads, ret = 0; + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + pthread_attr_t pattr; + pthread_t threads[DEFAULT_THREADS]; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + /* Try to change mount options from multiple threads. */ + nthreads = get_nprocs_conf(); + if (nthreads > DEFAULT_THREADS) + nthreads = DEFAULT_THREADS; + + pthread_attr_init(&pattr); + for (i = 0; i < nthreads; i++) + ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0); + + for (j = 0; j < i; j++) { + void *retptr = NULL; + + EXPECT_EQ(pthread_join(threads[j], &retptr), 0); + + ret += ptr_to_int(retptr); + EXPECT_EQ(ret, 0); + } + pthread_attr_destroy(&pattr); + + ASSERT_EQ(ret, 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + expected_flags |= MS_NOSUID; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), true); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); +} + +TEST_F(mount_setattr, wrong_user_namespace) +{ + int ret; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + EXPECT_EQ(create_and_enter_userns(), 0); + ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EPERM); +} + +TEST_F(mount_setattr, wrong_mount_namespace) +{ + int fd, ret; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC); + ASSERT_GE(fd, 0); + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr)); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EINVAL); +} + +FIXTURE(mount_setattr_idmapped) { +}; + +FIXTURE_SETUP(mount_setattr_idmapped) +{ + int img_fd = -EBADF; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0); + + (void)umount2("/mnt", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); + + ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B", 0777), 0); + ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0); + ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0); + + ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); + ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0); + ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); + + ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); + + ASSERT_EQ(mkdir("/mnt/B", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", + MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); + + ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", + MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); + + ASSERT_EQ(mkdir("/mnt/C", 0777), 0); + ASSERT_EQ(mkdir("/mnt/D", 0777), 0); + img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600); + ASSERT_GE(img_fd, 0); + ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0); + ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0); + ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0); + ASSERT_EQ(close(img_fd), 0); +} + +FIXTURE_TEARDOWN(mount_setattr_idmapped) +{ + (void)umount2("/mnt/A", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); +} + +/** + * Validate that negative fd values are rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_negative) +{ + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + .userns_fd = -EBADF, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { + TH_LOG("failure: created idmapped mount with negative fd"); + } +} + +/** + * Validate that excessively large fd values are rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_large) +{ + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + .userns_fd = INT64_MAX, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { + TH_LOG("failure: created idmapped mount with too large fd value"); + } +} + +/** + * Validate that closed fd values are rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_closed) +{ + int fd; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_GE(close(fd), 0); + + attr.userns_fd = fd; + ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { + TH_LOG("failure: created idmapped mount with closed fd"); + } +} + +/** + * Validate that the initial user namespace is rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(errno, EPERM); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid, + unsigned long range) +{ + char map[100], procfile[256]; + + snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid); + snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); + if (write_file(procfile, map, strlen(map))) + return -1; + + + snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid); + snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); + if (write_file(procfile, map, strlen(map))) + return -1; + + return 0; +} + +#define __STACK_SIZE (8 * 1024 * 1024) +static pid_t do_clone(int (*fn)(void *), void *arg, int flags) +{ + void *stack; + + stack = malloc(__STACK_SIZE); + if (!stack) + return -ENOMEM; + +#ifdef __ia64__ + return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL); +#else + return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL); +#endif +} + +static int get_userns_fd_cb(void *data) +{ + return kill(getpid(), SIGSTOP); +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range) +{ + int ret; + pid_t pid; + char path[256]; + + pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER); + if (pid < 0) + return -errno; + + ret = map_ids(pid, nsid, hostid, range); + if (ret < 0) + return ret; + + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + ret = open(path, O_RDONLY | O_CLOEXEC); + kill(pid, SIGKILL); + wait_for_pid(pid); + return ret; +} + +/** + * Validate that an attached mount in our mount namespace can be idmapped. + * (The kernel enforces that the mount's mount namespace and the caller's mount + * namespace match.) + */ +TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that idmapping a mount is rejected if the mount's mount namespace + * and our mount namespace don't match. + * (The kernel enforces that the mount's mount namespace and the caller's mount + * namespace match.) + */ +TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC); + ASSERT_GE(open_tree_fd, 0); + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, + sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that an attached mount in our mount namespace can be idmapped. + */ +TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + /* Changing mount properties on a detached mount. */ + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", + AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that a detached mount not in our mount namespace can be idmapped. + */ +TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + /* Changing mount properties on a detached mount. */ + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", + AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that currently changing the idmapping of an idmapped mount fails. + */ +TEST_F(mount_setattr_idmapped, change_idmapping) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", + AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + + /* Change idmapping on a detached mount that is already idmapped. */ + attr.userns_fd = get_userns_fd(0, 20000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +static bool expected_uid_gid(int dfd, const char *path, int flags, + uid_t expected_uid, gid_t expected_gid) +{ + int ret; + struct stat st; + + ret = fstatat(dfd, path, &st, flags); + if (ret < 0) + return false; + + return st.st_uid == expected_uid && st.st_gid == expected_gid; +} + +TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/A", + AT_RECURSIVE | + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); + + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/nci/Makefile b/tools/testing/selftests/nci/Makefile new file mode 100644 index 000000000000..47669a1d6a59 --- /dev/null +++ b/tools/testing/selftests/nci/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -Wl,-no-as-needed -Wall +LDFLAGS += -lpthread + +TEST_GEN_PROGS := nci_dev +include ../lib.mk diff --git a/tools/testing/selftests/nci/config b/tools/testing/selftests/nci/config new file mode 100644 index 000000000000..b084e78276be --- /dev/null +++ b/tools/testing/selftests/nci/config @@ -0,0 +1,3 @@ +CONFIG_NFC=y +CONFIG_NFC_NCI=y +CONFIG_NFC_VIRTUAL_NCI=y diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c new file mode 100644 index 000000000000..57b505cb1561 --- /dev/null +++ b/tools/testing/selftests/nci/nci_dev.c @@ -0,0 +1,599 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Samsung Electrnoics + * Bongsu Jeon <bongsu.jeon@samsung.com> + * + * Test code for nci + */ + +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <pthread.h> +#include <linux/genetlink.h> +#include <sys/socket.h> +#include <linux/nfc.h> + +#include "../kselftest_harness.h" + +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) +#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN) + +#define MAX_MSG_SIZE 1024 + +#define IOCTL_GET_NCIDEV_IDX 0 +#define VIRTUAL_NFC_PROTOCOLS (NFC_PROTO_JEWEL_MASK | \ + NFC_PROTO_MIFARE_MASK | \ + NFC_PROTO_FELICA_MASK | \ + NFC_PROTO_ISO14443_MASK | \ + NFC_PROTO_ISO14443_B_MASK | \ + NFC_PROTO_ISO15693_MASK) + +const __u8 nci_reset_cmd[] = {0x20, 0x00, 0x01, 0x01}; +const __u8 nci_init_cmd[] = {0x20, 0x01, 0x00}; +const __u8 nci_rf_discovery_cmd[] = {0x21, 0x03, 0x09, 0x04, 0x00, 0x01, + 0x01, 0x01, 0x02, 0x01, 0x06, 0x01}; +const __u8 nci_init_cmd_v2[] = {0x20, 0x01, 0x02, 0x00, 0x00}; +const __u8 nci_rf_disc_map_cmd[] = {0x21, 0x00, 0x07, 0x02, 0x04, 0x03, + 0x02, 0x05, 0x03, 0x03}; +const __u8 nci_rf_deact_cmd[] = {0x21, 0x06, 0x01, 0x00}; +const __u8 nci_reset_rsp[] = {0x40, 0x00, 0x03, 0x00, 0x10, 0x01}; +const __u8 nci_reset_rsp_v2[] = {0x40, 0x00, 0x01, 0x00}; +const __u8 nci_reset_ntf[] = {0x60, 0x00, 0x09, 0x02, 0x01, 0x20, 0x0e, + 0x04, 0x61, 0x00, 0x04, 0x02}; +const __u8 nci_init_rsp[] = {0x40, 0x01, 0x14, 0x00, 0x02, 0x0e, 0x02, + 0x00, 0x03, 0x01, 0x02, 0x03, 0x02, 0xc8, + 0x00, 0xff, 0x10, 0x00, 0x0e, 0x12, 0x00, + 0x00, 0x04}; +const __u8 nci_init_rsp_v2[] = {0x40, 0x01, 0x1c, 0x00, 0x1a, 0x7e, 0x06, + 0x00, 0x02, 0x92, 0x04, 0xff, 0xff, 0x01, + 0x00, 0x40, 0x06, 0x00, 0x00, 0x01, 0x01, + 0x00, 0x02, 0x00, 0x03, 0x01, 0x01, 0x06, + 0x00, 0x80, 0x00}; +const __u8 nci_rf_disc_map_rsp[] = {0x41, 0x00, 0x01, 0x00}; +const __u8 nci_rf_disc_rsp[] = {0x41, 0x03, 0x01, 0x00}; +const __u8 nci_rf_deact_rsp[] = {0x41, 0x06, 0x01, 0x00}; + +struct msgtemplate { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; +}; + +static int create_nl_socket(void) +{ + int fd; + struct sockaddr_nl local; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (fd < 0) + return -1; + + memset(&local, 0, sizeof(local)); + local.nl_family = AF_NETLINK; + + if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) + goto error; + + return fd; +error: + close(fd); + return -1; +} + +static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u8 genl_cmd, int nla_num, __u16 nla_type[], + void *nla_data[], int nla_len[]) +{ + struct sockaddr_nl nladdr; + struct msgtemplate msg; + struct nlattr *na; + int cnt, prv_len; + int r, buflen; + char *buf; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = NLM_F_REQUEST; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 0x1; + + prv_len = 0; + for (cnt = 0; cnt < nla_num; cnt++) { + na = (struct nlattr *)(GENLMSG_DATA(&msg) + prv_len); + na->nla_type = nla_type[cnt]; + na->nla_len = nla_len[cnt] + NLA_HDRLEN; + + if (nla_len > 0) + memcpy(NLA_DATA(na), nla_data[cnt], nla_len[cnt]); + + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + prv_len = na->nla_len; + } + + buf = (char *)&msg; + buflen = msg.n.nlmsg_len; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *)&nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) { + return -1; + } + } + return 0; +} + +static int send_get_nfc_family(int sd, __u32 pid) +{ + __u16 nla_get_family_type = CTRL_ATTR_FAMILY_NAME; + void *nla_get_family_data; + int nla_get_family_len; + char family_name[100]; + + nla_get_family_len = strlen(NFC_GENL_NAME) + 1; + strcpy(family_name, NFC_GENL_NAME); + nla_get_family_data = family_name; + + return send_cmd_mt_nla(sd, GENL_ID_CTRL, pid, CTRL_CMD_GETFAMILY, + 1, &nla_get_family_type, + &nla_get_family_data, &nla_get_family_len); +} + +static int get_family_id(int sd, __u32 pid) +{ + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[512]; + } ans; + struct nlattr *na; + int rep_len; + __u16 id; + int rc; + + rc = send_get_nfc_family(sd, pid); + + if (rc < 0) + return 0; + + rep_len = recv(sd, &ans, sizeof(ans), 0); + + if (ans.n.nlmsg_type == NLMSG_ERROR || rep_len < 0 || + !NLMSG_OK(&ans.n, rep_len)) + return 0; + + na = (struct nlattr *)GENLMSG_DATA(&ans); + na = (struct nlattr *)((char *)na + NLA_ALIGN(na->nla_len)); + if (na->nla_type == CTRL_ATTR_FAMILY_ID) + id = *(__u16 *)NLA_DATA(na); + + return id; +} + +static int send_cmd_with_idx(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u8 genl_cmd, int dev_id) +{ + __u16 nla_type = NFC_ATTR_DEVICE_INDEX; + void *nla_data = &dev_id; + int nla_len = 4; + + return send_cmd_mt_nla(sd, nlmsg_type, nlmsg_pid, genl_cmd, 1, + &nla_type, &nla_data, &nla_len); +} + +static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtemplate *msg) +{ + int rc, rep_len; + + rc = send_cmd_with_idx(sd, fid, pid, NFC_CMD_GET_DEVICE, dev_id); + if (rc < 0) { + rc = -1; + goto error; + } + + rep_len = recv(sd, msg, sizeof(*msg), 0); + if (rep_len < 0) { + rc = -2; + goto error; + } + + if (msg->n.nlmsg_type == NLMSG_ERROR || + !NLMSG_OK(&msg->n, rep_len)) { + rc = -3; + goto error; + } + + return 0; +error: + return rc; +} + +static __u8 get_dev_enable_state(struct msgtemplate *msg) +{ + struct nlattr *na; + int rep_len; + int len; + + rep_len = GENLMSG_PAYLOAD(&msg->n); + na = (struct nlattr *)GENLMSG_DATA(msg); + len = 0; + + while (len < rep_len) { + len += NLA_ALIGN(na->nla_len); + if (na->nla_type == NFC_ATTR_DEVICE_POWERED) + return *(char *)NLA_DATA(na); + na = (struct nlattr *)(GENLMSG_DATA(msg) + len); + } + + return rep_len; +} + +FIXTURE(NCI) { + int virtual_nci_fd; + bool open_state; + int dev_idex; + bool isNCI2; + int proto; + __u32 pid; + __u16 fid; + int sd; +}; + +FIXTURE_VARIANT(NCI) { + bool isNCI2; +}; + +FIXTURE_VARIANT_ADD(NCI, NCI1_0) { + .isNCI2 = false, +}; + +FIXTURE_VARIANT_ADD(NCI, NCI2_0) { + .isNCI2 = true, +}; + +static void *virtual_dev_open(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_reset_cmd)) + goto error; + if (memcmp(nci_reset_cmd, buf, len)) + goto error; + write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp)); + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_init_cmd)) + goto error; + if (memcmp(nci_init_cmd, buf, len)) + goto error; + write(dev_fd, nci_init_rsp, sizeof(nci_init_rsp)); + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_rf_disc_map_cmd)) + goto error; + if (memcmp(nci_rf_disc_map_cmd, buf, len)) + goto error; + write(dev_fd, nci_rf_disc_map_rsp, sizeof(nci_rf_disc_map_rsp)); + + return (void *)0; +error: + return (void *)-1; +} + +static void *virtual_dev_open_v2(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_reset_cmd)) + goto error; + if (memcmp(nci_reset_cmd, buf, len)) + goto error; + write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2)); + write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf)); + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_init_cmd_v2)) + goto error; + if (memcmp(nci_init_cmd_v2, buf, len)) + goto error; + write(dev_fd, nci_init_rsp_v2, sizeof(nci_init_rsp_v2)); + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_rf_disc_map_cmd)) + goto error; + if (memcmp(nci_rf_disc_map_cmd, buf, len)) + goto error; + write(dev_fd, nci_rf_disc_map_rsp, sizeof(nci_rf_disc_map_rsp)); + + return (void *)0; +error: + return (void *)-1; +} + +FIXTURE_SETUP(NCI) +{ + struct msgtemplate msg; + pthread_t thread_t; + int status; + int rc; + + self->open_state = false; + self->proto = VIRTUAL_NFC_PROTOCOLS; + self->isNCI2 = variant->isNCI2; + + self->sd = create_nl_socket(); + ASSERT_NE(self->sd, -1); + + self->pid = getpid(); + self->fid = get_family_id(self->sd, self->pid); + ASSERT_NE(self->fid, -1); + + self->virtual_nci_fd = open("/dev/virtual_nci", O_RDWR); + ASSERT_GT(self->virtual_nci_fd, -1); + + rc = ioctl(self->virtual_nci_fd, IOCTL_GET_NCIDEV_IDX, &self->dev_idex); + ASSERT_EQ(rc, 0); + + rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex, &msg); + ASSERT_EQ(rc, 0); + EXPECT_EQ(get_dev_enable_state(&msg), 0); + + if (self->isNCI2) + rc = pthread_create(&thread_t, NULL, virtual_dev_open_v2, + (void *)&self->virtual_nci_fd); + else + rc = pthread_create(&thread_t, NULL, virtual_dev_open, + (void *)&self->virtual_nci_fd); + ASSERT_GT(rc, -1); + + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_DEV_UP, self->dev_idex); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + ASSERT_EQ(status, 0); + self->open_state = true; +} + +static void *virtual_deinit(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_reset_cmd)) + goto error; + if (memcmp(nci_reset_cmd, buf, len)) + goto error; + write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp)); + + return (void *)0; +error: + return (void *)-1; +} + +static void *virtual_deinit_v2(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_reset_cmd)) + goto error; + if (memcmp(nci_reset_cmd, buf, len)) + goto error; + write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2)); + write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf)); + + return (void *)0; +error: + return (void *)-1; +} + +FIXTURE_TEARDOWN(NCI) +{ + pthread_t thread_t; + int status; + int rc; + + if (self->open_state) { + if (self->isNCI2) + rc = pthread_create(&thread_t, NULL, + virtual_deinit_v2, + (void *)&self->virtual_nci_fd); + else + rc = pthread_create(&thread_t, NULL, virtual_deinit, + (void *)&self->virtual_nci_fd); + + ASSERT_GT(rc, -1); + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_DEV_DOWN, self->dev_idex); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + ASSERT_EQ(status, 0); + } + + close(self->sd); + close(self->virtual_nci_fd); + self->open_state = false; +} + +TEST_F(NCI, init) +{ + struct msgtemplate msg; + int rc; + + rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex, + &msg); + ASSERT_EQ(rc, 0); + EXPECT_EQ(get_dev_enable_state(&msg), 1); +} + +static void *virtual_poll_start(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_rf_discovery_cmd)) + goto error; + if (memcmp(nci_rf_discovery_cmd, buf, len)) + goto error; + write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp)) + ; + + return (void *)0; +error: + return (void *)-1; +} + +static void *virtual_poll_stop(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_rf_deact_cmd)) + goto error; + if (memcmp(nci_rf_deact_cmd, buf, len)) + goto error; + write(dev_fd, nci_rf_deact_rsp, sizeof(nci_rf_deact_rsp)); + + return (void *)0; +error: + return (void *)-1; +} + +TEST_F(NCI, start_poll) +{ + __u16 nla_start_poll_type[2] = {NFC_ATTR_DEVICE_INDEX, + NFC_ATTR_PROTOCOLS}; + void *nla_start_poll_data[2] = {&self->dev_idex, &self->proto}; + int nla_start_poll_len[2] = {4, 4}; + pthread_t thread_t; + int status; + int rc; + + rc = pthread_create(&thread_t, NULL, virtual_poll_start, + (void *)&self->virtual_nci_fd); + ASSERT_GT(rc, -1); + + rc = send_cmd_mt_nla(self->sd, self->fid, self->pid, + NFC_CMD_START_POLL, 2, nla_start_poll_type, + nla_start_poll_data, nla_start_poll_len); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + ASSERT_EQ(status, 0); + + rc = pthread_create(&thread_t, NULL, virtual_poll_stop, + (void *)&self->virtual_nci_fd); + ASSERT_GT(rc, -1); + + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_STOP_POLL, self->dev_idex); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + ASSERT_EQ(status, 0); +} + +TEST_F(NCI, deinit) +{ + struct msgtemplate msg; + pthread_t thread_t; + int status; + int rc; + + rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex, + &msg); + ASSERT_EQ(rc, 0); + EXPECT_EQ(get_dev_enable_state(&msg), 1); + + if (self->isNCI2) + rc = pthread_create(&thread_t, NULL, virtual_deinit_v2, + (void *)&self->virtual_nci_fd); + else + rc = pthread_create(&thread_t, NULL, virtual_deinit, + (void *)&self->virtual_nci_fd); + ASSERT_GT(rc, -1); + + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_DEV_DOWN, self->dev_idex); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + self->open_state = 0; + ASSERT_EQ(status, 0); + + rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex, + &msg); + ASSERT_EQ(rc, 0); + EXPECT_EQ(get_dev_enable_state(&msg), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index fa5fa425d148..25f198bec0b2 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -22,6 +22,7 @@ TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh +TEST_PROGS += unicast_extensions.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 02b0b9ead40b..a8ad92850e63 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -801,9 +801,9 @@ ipv4_tcp_md5_novrf() # basic use case log_start - run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" # client sends MD5, server not configured @@ -811,23 +811,23 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" # client from different address log_start show_hint "Should timeout due to MD5 mismatch" - run_cmd nettest -s -M ${MD5_PW} -r ${NSB_LO_IP} & + run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" # @@ -838,7 +838,7 @@ ipv4_tcp_md5_novrf() log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" # client in prefix, wrong password @@ -846,7 +846,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" # client outside of prefix @@ -854,7 +854,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -869,33 +869,33 @@ ipv4_tcp_md5() # basic use case log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" # client sends MD5, server not configured log_start show_hint "Should timeout since server does not have MD5 auth" - run_cmd nettest -s -d ${VRF} & + run_cmd nettest -s -I ${VRF} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" # client from different address log_start show_hint "Should timeout since server config differs from client" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" # @@ -904,25 +904,25 @@ ipv4_tcp_md5() # client in prefix log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" # client in prefix, wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" # client outside of prefix log_start show_hint "Should timeout since client address is outside of prefix" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" # @@ -930,74 +930,74 @@ ipv4_tcp_md5() # log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" # # negative tests # log_start - run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP} + run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP} log_test $? 1 "MD5: VRF: Device must be a VRF - single address" log_start - run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} + run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" } @@ -1020,7 +1020,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1076,7 +1076,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1085,7 +1085,7 @@ ipv4_tcp_novrf() do log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" - run_cmd nettest -s -d ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" @@ -1110,7 +1110,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1145,13 +1145,13 @@ ipv4_tcp_vrf() log_test_addr ${a} $? 1 "Global server" log_start - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1186,14 +1186,14 @@ ipv4_tcp_vrf() do log_start show_hint "client socket should be bound to VRF" - run_cmd nettest -s -2 ${VRF} & + run_cmd nettest -s -3 ${VRF} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" log_start show_hint "client socket should be bound to VRF" - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -1208,7 +1208,7 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start show_hint "client socket should be bound to device" - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1218,7 +1218,7 @@ ipv4_tcp_vrf() do log_start show_hint "Should fail 'Connection refused' since client is not bound to VRF" - run_cmd nettest -s -d ${VRF} & + run_cmd nettest -s -I ${VRF} & sleep 1 run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" @@ -1255,7 +1255,7 @@ ipv4_tcp_vrf() for a in ${NSA_IP} ${VRF_IP} 127.0.0.1 do log_start - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" @@ -1263,26 +1263,26 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" log_start show_hint "Should fail 'No route to host' since client is out of VRF scope" - run_cmd nettest -s -d ${VRF} & + run_cmd nettest -s -I ${VRF} & sleep 1 run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1321,7 +1321,7 @@ ipv4_udp_novrf() for a in ${NSA_IP} ${NSA_LO_IP} do log_start - run_cmd nettest -D -s -2 ${NSA_DEV} & + run_cmd nettest -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -1334,7 +1334,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start - run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1393,7 +1393,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1402,7 +1402,7 @@ ipv4_udp_novrf() do log_start show_hint "Should fail 'Connection refused' since address is out of device scope" - run_cmd nettest -s -D -d ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} & sleep 1 run_cmd nettest -D -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" @@ -1456,7 +1456,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start - run_cmd nettest -D -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -1487,13 +1487,13 @@ ipv4_udp_vrf() log_test_addr ${a} $? 1 "Global server" log_start - run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start - run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1513,26 +1513,26 @@ ipv4_udp_vrf() a=${NSA_IP} log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection" a=${NSA_IP} log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1547,19 +1547,19 @@ ipv4_udp_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest -D -s -2 ${NSA_DEV} & + run_cmd nettest -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start - run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start - run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1601,31 +1601,31 @@ ipv4_udp_vrf() # a=${NSA_IP} log_start - run_cmd nettest -D -s -2 ${NSA_DEV} & + run_cmd nettest -D -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1633,7 +1633,7 @@ ipv4_udp_vrf() for a in ${VRF_IP} 127.0.0.1 do log_start - run_cmd nettest -D -s -2 ${VRF} & + run_cmd nettest -D -s -3 ${VRF} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" @@ -1642,7 +1642,7 @@ ipv4_udp_vrf() for a in ${VRF_IP} 127.0.0.1 do log_start - run_cmd nettest -s -D -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -D -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -1697,7 +1697,7 @@ ipv4_addr_bind_novrf() log_test_addr ${a} $? 0 "Raw socket bind to local address" log_start - run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done @@ -1706,11 +1706,11 @@ ipv4_addr_bind_novrf() # a=${NSA_IP} log_start - run_cmd nettest -l ${a} -r ${NSB_IP} -t1 -b + run_cmd nettest -c ${a} -r ${NSB_IP} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address" log_start - run_cmd nettest -l ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b + run_cmd nettest -c ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" # Sadly, the kernel allows binding a socket to a device and then @@ -1720,7 +1720,7 @@ ipv4_addr_bind_novrf() #a=${NSA_LO_IP} #log_start #show_hint "Should fail with 'Cannot assign requested address'" - #run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + #run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b #log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" } @@ -1736,17 +1736,17 @@ ipv4_addr_bind_vrf() log_test_addr ${a} $? 0 "Raw socket bind to local address" log_start - run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" log_start - run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after VRF bind" done a=${NSA_LO_IP} log_start show_hint "Address on loopback is out of VRF scope" - run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind" # @@ -1755,23 +1755,23 @@ ipv4_addr_bind_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address" log_start - run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" done a=${NSA_LO_IP} log_start show_hint "Address on loopback out of scope for VRF" - run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF" log_start show_hint "Address on loopback out of scope for device in VRF" - run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind" } @@ -1818,7 +1818,7 @@ ipv4_rt() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest ${varg} -s -d ${VRF} & + run_cmd nettest ${varg} -s -I ${VRF} & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -1831,7 +1831,7 @@ ipv4_rt() a=${NSA_IP} log_start - run_cmd nettest ${varg} -s -d ${NSA_DEV} & + run_cmd nettest ${varg} -s -I ${NSA_DEV} & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -1886,7 +1886,7 @@ ipv4_rt() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 @@ -1910,7 +1910,7 @@ ipv4_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -1921,7 +1921,7 @@ ipv4_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${NSA_DEV} -s & + run_cmd nettest ${varg} -I ${NSA_DEV} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -2265,9 +2265,9 @@ ipv6_tcp_md5_novrf() # basic use case log_start - run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" # client sends MD5, server not configured @@ -2275,23 +2275,23 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" # client from different address log_start show_hint "Should timeout due to MD5 mismatch" - run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_LO_IP6} & + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" # @@ -2302,7 +2302,7 @@ ipv6_tcp_md5_novrf() log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" # client in prefix, wrong password @@ -2310,7 +2310,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" # client outside of prefix @@ -2318,7 +2318,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -2333,33 +2333,33 @@ ipv6_tcp_md5() # basic use case log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" # client sends MD5, server not configured log_start show_hint "Should timeout since server does not have MD5 auth" - run_cmd nettest -6 -s -d ${VRF} & + run_cmd nettest -6 -s -I ${VRF} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" # client from different address log_start show_hint "Should timeout since server config differs from client" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" # @@ -2368,25 +2368,25 @@ ipv6_tcp_md5() # client in prefix log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" # client in prefix, wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" # client outside of prefix log_start show_hint "Should timeout since client address is outside of prefix" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" # @@ -2394,74 +2394,74 @@ ipv6_tcp_md5() # log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" # # negative tests # log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP6} + run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP6} log_test $? 1 "MD5: VRF: Device must be a VRF - single address" log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6} + run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6} log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" } @@ -2534,7 +2534,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2543,7 +2543,7 @@ ipv6_tcp_novrf() do log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" - run_cmd nettest -6 -s -d ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" @@ -2569,7 +2569,7 @@ ipv6_tcp_novrf() for a in ${NSA_IP6} ${NSA_LINKIP6} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -2611,7 +2611,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2620,7 +2620,7 @@ ipv6_tcp_vrf() # link local is always bound to ingress device a=${NSA_LINKIP6}%${NSB_DEV} log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2628,7 +2628,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -2664,7 +2664,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -2 ${VRF} & + run_cmd nettest -6 -s -3 ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -2673,7 +2673,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2682,13 +2682,13 @@ ipv6_tcp_vrf() # For LLA, child socket is bound to device a=${NSA_LINKIP6}%${NSB_DEV} log_start - run_cmd nettest -6 -s -2 ${NSA_DEV} & + run_cmd nettest -6 -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2696,7 +2696,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -2716,7 +2716,7 @@ ipv6_tcp_vrf() do log_start show_hint "Fails 'Connection refused' since client is not in VRF" - run_cmd nettest -6 -s -d ${VRF} & + run_cmd nettest -6 -s -I ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" @@ -2771,7 +2771,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} ::1 do log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" @@ -2779,7 +2779,7 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" @@ -2787,13 +2787,13 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start show_hint "Should fail since unbound client is out of VRF scope" - run_cmd nettest -6 -s -d ${VRF} & + run_cmd nettest -6 -s -I ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" @@ -2801,7 +2801,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${NSA_LINKIP6} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -2841,13 +2841,13 @@ ipv6_udp_novrf() for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV} do log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -2855,7 +2855,7 @@ ipv6_udp_novrf() a=${NSA_LO_IP6} log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -2865,7 +2865,7 @@ ipv6_udp_novrf() # behavior. #log_start #show_hint "Should fail since loopback address is out of scope" - #run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + #run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & #sleep 1 #run_cmd_nsb nettest -6 -D -r ${a} #log_test_addr ${a} $? 1 "Device server" @@ -2933,7 +2933,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2942,7 +2942,7 @@ ipv6_udp_novrf() do log_start show_hint "Should fail 'Connection refused' since address is out of device scope" - run_cmd nettest -6 -s -D -d ${NSA_DEV} & + run_cmd nettest -6 -s -D -I ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Device server, local connection" @@ -2993,7 +2993,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -D -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3040,7 +3040,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -3049,7 +3049,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -3080,7 +3080,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${VRF} -s & + run_cmd nettest -6 -D -I ${VRF} -s & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3095,19 +3095,19 @@ ipv6_udp_vrf() log_test_addr ${a} $? 1 "Global server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -3122,7 +3122,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -3131,7 +3131,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -3140,7 +3140,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -3184,13 +3184,13 @@ ipv6_udp_vrf() # a=${NSA_IP6} log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" #log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3198,13 +3198,13 @@ ipv6_udp_vrf() a=${VRF_IP6} log_start - run_cmd nettest -6 -D -s -2 ${VRF} & + run_cmd nettest -6 -D -s -3 ${VRF} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${VRF} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3220,25 +3220,25 @@ ipv6_udp_vrf() # device to global IP a=${NSA_IP6} log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3332,7 +3332,7 @@ ipv6_addr_bind_novrf() log_test_addr ${a} $? 0 "Raw socket bind to local address" log_start - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done @@ -3345,13 +3345,13 @@ ipv6_addr_bind_novrf() log_test_addr ${a} $? 0 "TCP socket bind to local address" log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" a=${NSA_LO_IP6} log_start show_hint "Should fail with 'Cannot assign requested address'" - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" } @@ -3363,18 +3363,18 @@ ipv6_addr_bind_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after vrf bind" log_start - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done a=${NSA_LO_IP6} log_start show_hint "Address on loopback is out of VRF scope" - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind" # @@ -3384,29 +3384,29 @@ ipv6_addr_bind_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with VRF bind" done a=${NSA_IP6} log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind" a=${VRF_IP6} log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind" a=${NSA_LO_IP6} log_start show_hint "Address on loopback out of scope for VRF" - run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF" log_start show_hint "Address on loopback out of scope for device in VRF" - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind" } @@ -3454,7 +3454,7 @@ ipv6_rt() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -3468,7 +3468,7 @@ ipv6_rt() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest ${varg} -d ${NSA_DEV} -s & + run_cmd nettest ${varg} -I ${NSA_DEV} -s & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -3525,7 +3525,7 @@ ipv6_rt() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 @@ -3549,7 +3549,7 @@ ipv6_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -3560,7 +3560,7 @@ ipv6_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${NSA_DEV} -s & + run_cmd nettest ${varg} -I ${NSA_DEV} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index eb693a3b7b4a..4c7d33618437 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -869,7 +869,7 @@ ipv6_torture() pid3=$! ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 84205c3a55eb..2b5707738609 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1055,7 +1055,6 @@ ipv6_addr_metric_test() check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on local side" - log_test $? 0 "User specified metric on local address" check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on peer side" diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 10e9a3321ae1..a4bd1b087303 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -10,6 +10,7 @@ CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m CONFIG_VETH=m diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh index 9188e624dec0..b9bfb45085af 100644 --- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh @@ -22,6 +22,40 @@ ethtool_set() check_err $out "error in configuration. $cmd" } +dev_linkmodes_params_get() +{ + local dev=$1; shift + local adver=$1; shift + local -a linkmodes_params + local param_count + local arr + + if (($adver)); then + mode="Advertised link modes" + else + mode="Supported link modes" + fi + + local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver)) + for ((i=0; i<${#dev_linkmodes[@]}; i++)); do + linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \ + # Replaces all non numbers with spaces + sed -e 's/[^0-9]/ /g' | \ + # Squeeze spaces in sequence to 1 space + tr -s ' ') + # Count how many numbers were found in the linkmode + param_count=$(echo "${linkmodes_params[$i]}" | wc -w) + if [[ $param_count -eq 1 ]]; then + linkmodes_params[$i]="${linkmodes_params[$i]} 1" + elif [[ $param_count -ge 3 ]]; then + arr=(${linkmodes_params[$i]}) + # Take only first two params + linkmodes_params[$i]=$(echo "${arr[@]:0:2}") + fi + done + echo ${linkmodes_params[@]} +} + dev_speeds_get() { local dev=$1; shift diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 98ea37d26c44..be71012b8fc5 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -42,6 +42,47 @@ check_tc_version() fi } +# Old versions of tc don't understand "mpls_uc" +check_tc_mpls_support() +{ + local dev=$1; shift + + tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \ + matchall action pipe &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing MPLS support" + return 1 + fi + tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \ + matchall +} + +# Old versions of tc produce invalid json output for mpls lse statistics +check_tc_mpls_lse_stats() +{ + local dev=$1; shift + local ret; + + tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \ + flower mpls lse depth 2 \ + action continue &> /dev/null + + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support" + return 1 + fi + + tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null + ret=$? + tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \ + flower + + if [[ $ret -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters" + return 1 + fi +} + check_tc_shblock_support() { tc filter help 2>&1 | grep block &> /dev/null @@ -69,6 +110,15 @@ check_tc_action_hw_stats_support() fi } +check_ethtool_lanes_support() +{ + ethtool --help 2>&1| grep lanes &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: ethtool too old; it is missing lanes support" + exit 1 + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit 0 @@ -263,6 +313,20 @@ not() [[ $? != 0 ]] } +get_max() +{ + local arr=("$@") + + max=${arr[0]} + for cur in ${arr[@]}; do + if [[ $cur -gt $max ]]; then + max=$cur + fi + done + + echo $max +} + grep_bridge_fdb() { local addr=$1; shift @@ -279,6 +343,11 @@ grep_bridge_fdb() $@ | grep $addr | grep $flag "$word" } +wait_for_port_up() +{ + "$@" | grep -q "Link detected: yes" +} + wait_for_offload() { "$@" | grep -q offload diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 388e4492b81b..76efb1f8375e 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -203,7 +203,7 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d 1msec -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 79a209927962..464821c587a5 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -178,7 +178,7 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d 1msec -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh index 2934fb5ed2a2..b95de0463ebd 100755 --- a/tools/testing/selftests/net/forwarding/tc_chains.sh +++ b/tools/testing/selftests/net/forwarding/tc_chains.sh @@ -136,7 +136,7 @@ template_filter_fits() tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \ flower src_mac $h2mac action drop &> /dev/null - check_fail $? "Incorrectly succeded to insert filter which does not template" + check_fail $? "Incorrectly succeeded to insert filter which does not template" tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ flower src_mac $h2mac action drop @@ -144,7 +144,7 @@ template_filter_fits() tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \ flower dst_mac $h2mac action drop &> /dev/null - check_fail $? "Incorrectly succeded to insert filter which does not template" + check_fail $? "Incorrectly succeeded to insert filter which does not template" tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \ flower &> /dev/null diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 058c746ee300..683711f41aa9 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -3,7 +3,10 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \ - match_ip_tos_test match_indev_test" + match_ip_tos_test match_indev_test match_ip_ttl_test + match_mpls_label_test \ + match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \ + match_mpls_lse_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -310,6 +313,42 @@ match_ip_tos_test() log_test "ip_tos match ($tcflags)" } +match_ip_ttl_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 ip_ttl 63 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=63" -q + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=63,mf,frag=256" -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on the wrong filter (no check on ttl)" + + tc_check_packets "dev $h2 ingress" 101 2 + check_err $? "Did not match on correct filter (ttl=63)" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=255" -q + + tc_check_packets "dev $h2 ingress" 101 3 + check_fail $? "Matched on a wrong filter (ttl=63)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (no check on ttl)" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "ip_ttl match ($tcflags)" +} + match_indev_test() { RET=0 @@ -334,6 +373,309 @@ match_indev_test() log_test "indev match ($tcflags)" } +# Unfortunately, mausezahn can't build MPLS headers when used in L2 +# mode, so we have this function to build Label Stack Entries. +mpls_lse() +{ + local label=$1 + local tc=$2 + local bos=$3 + local ttl=$4 + + printf "%02x %02x %02x %02x" \ + $((label >> 12)) \ + $((label >> 4 & 0xff)) \ + $((((label & 0xf) << 4) + (tc << 1) + bos)) \ + $ttl +} + +match_mpls_label_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_support $h2 || return 0 + + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls_label 0 action drop + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls_label 1048575 action drop + + pkt="$ethtype $(mpls_lse 1048575 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter (1048575)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (1048575)" + + pkt="$ethtype $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_fail $? "Matched on a wrong filter (0)" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter (0)" + + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls_label match ($tcflags)" +} + +match_mpls_tc_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_support $h2 || return 0 + + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls_tc 0 action drop + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls_tc 7 action drop + + pkt="$ethtype $(mpls_lse 0 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter (7)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (7)" + + pkt="$ethtype $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_fail $? "Matched on a wrong filter (0)" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter (0)" + + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls_tc match ($tcflags)" +} + +match_mpls_bos_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_support $h2 || return 0 + + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls_bos 0 action drop + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls_bos 1 action drop + + pkt="$ethtype $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter (1)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (1)" + + # Need to add a second label to properly mark the Bottom of Stack + pkt="$ethtype $(mpls_lse 0 0 0 255) $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_fail $? "Matched on a wrong filter (0)" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter (0)" + + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls_bos match ($tcflags)" +} + +match_mpls_ttl_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_support $h2 || return 0 + + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls_ttl 0 action drop + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls_ttl 255 action drop + + pkt="$ethtype $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter (255)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (255)" + + pkt="$ethtype $(mpls_lse 0 0 1 0)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_fail $? "Matched on a wrong filter (0)" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter (0)" + + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls_ttl match ($tcflags)" +} + +match_mpls_lse_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_lse_stats $h2 || return 0 + + # Match on first LSE (minimal values for each field) + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls lse depth 1 label 0 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls lse depth 1 tc 0 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 3 handle 103 \ + flower $tcflags mpls lse depth 1 bos 0 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 4 handle 104 \ + flower $tcflags mpls lse depth 1 ttl 0 action continue + + # Match on second LSE (maximal values for each field) + tc filter add dev $h2 ingress protocol mpls_uc pref 5 handle 105 \ + flower $tcflags mpls lse depth 2 label 1048575 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 6 handle 106 \ + flower $tcflags mpls lse depth 2 tc 7 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 7 handle 107 \ + flower $tcflags mpls lse depth 2 bos 1 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 8 handle 108 \ + flower $tcflags mpls lse depth 2 ttl 255 action continue + + # Match on LSE depth + tc filter add dev $h2 ingress protocol mpls_uc pref 9 handle 109 \ + flower $tcflags mpls lse depth 1 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 10 handle 110 \ + flower $tcflags mpls lse depth 2 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 11 handle 111 \ + flower $tcflags mpls lse depth 3 action continue + + # Base packet, matched by all filters (except for stack depth 3) + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Make a variant of the above packet, with a non-matching value + # for each LSE field + + # Wrong label at depth 1 + pkt="$ethtype $(mpls_lse 1 0 0 0) $(mpls_lse 1048575 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong TC at depth 1 + pkt="$ethtype $(mpls_lse 0 1 0 0) $(mpls_lse 1048575 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong BOS at depth 1 (not adding a second LSE here since BOS is set + # in the first label, so anything that'd follow wouldn't be considered) + pkt="$ethtype $(mpls_lse 0 0 1 0)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong TTL at depth 1 + pkt="$ethtype $(mpls_lse 0 0 0 1) $(mpls_lse 1048575 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong label at depth 2 + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048574 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong TC at depth 2 + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 6 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong BOS at depth 2 (adding a third LSE here since BOS isn't set in + # the second label) + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 0 255)" + pkt="$pkt $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong TTL at depth 2 + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 1 254)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Filters working at depth 1 should match all packets but one + + tc_check_packets "dev $h2 ingress" 101 8 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 102 8 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 103 8 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 104 8 + check_err $? "Did not match on correct filter" + + # Filters working at depth 2 should match all packets but two (because + # of the test packet where the label stack depth is just one) + + tc_check_packets "dev $h2 ingress" 105 7 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 106 7 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 107 7 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 108 7 + check_err $? "Did not match on correct filter" + + # Finally, verify the filters that only match on LSE depth + + tc_check_packets "dev $h2 ingress" 109 9 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 110 8 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 111 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol mpls_uc pref 11 handle 111 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 10 handle 110 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 9 handle 109 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 8 handle 108 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 7 handle 107 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 6 handle 106 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 5 handle 105 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 4 handle 104 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls lse match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 2cfd87d94db8..10a030b53b23 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -128,6 +128,7 @@ cleanup() local netns for netns in "$ns1" "$ns2" "$ns3" "$ns4";do ip netns del $netns + rm -f /tmp/$netns.{nstat,out} done } @@ -333,6 +334,21 @@ do_ping() return 0 } +# $1: ns, $2: MIB counter +get_mib_counter() +{ + local listener_ns="${1}" + local mib="${2}" + + # strip the header + ip netns exec "${listener_ns}" \ + nstat -z -a "${mib}" | \ + tail -n+2 | \ + while read a count c rest; do + echo $count + done +} + # $1: ns, $2: port wait_local_port_listen() { @@ -409,10 +425,10 @@ do_transfer() sleep 1 fi - local stat_synrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) - local stat_ackrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) - local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) - local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & local spid=$! @@ -438,16 +454,26 @@ do_transfer() kill ${cappid_connector} fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + if [ ${listener_ns} != ${connector_ns} ]; then + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out + fi + local duration duration=$((stop-start)) - duration=$(printf "(duration %05sms)" $duration) + printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2 + echo "[ FAIL ] client exit code $retc, server $rets" 1>&2 echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + cat /tmp/${listener_ns}.out echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out + echo cat "$capout" return 1 fi @@ -457,11 +483,10 @@ do_transfer() check_transfer $cin $sout "file received by server" rets=$? - local stat_synrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) - local stat_ackrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) - - local stat_cookietx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) - local stat_cookierx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -473,37 +498,50 @@ do_transfer() expect_synrx=$((stat_synrx_last_l+1)) expect_ackrx=$((stat_ackrx_last_l+1)) fi + + if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then + printf "[ FAIL ] lower MPC SYN rx (%d) than expected (%d)\n" \ + "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 + retc=1 + fi + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then + printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ + "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 + rets=1 + fi + + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then + printf "[ OK ]" + fi + if [ $cookies -eq 2 ];then if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then - echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: did not advance" + printf " WARN: CookieSent: did not advance" fi if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then - echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: did not advance" + printf " WARN: CookieRecv: did not advance" fi else if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then - echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: changed" + printf " WARN: CookieSent: changed" fi if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then - echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: changed" + printf " WARN: CookieRecv: changed" fi fi - if [ $expect_synrx -ne $stat_synrx_now_l ] ;then - echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" + if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then + printf " WARN: SYNRX: expect %d, got %d (probably retransmissions)" \ + "${expect_synrx}" "${stat_synrx_now_l}" fi - if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then - echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" - fi - - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - echo "$duration [ OK ]" - cat "$capout" - return 0 + if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then + printf " WARN: ACKRX: expect %d, got %d (probably retransmissions)" \ + "${expect_ackrx}" "${stat_ackrx_now_l}" fi + echo cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9aa9624cff97..964db9ed544f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -209,9 +209,10 @@ do_transfer() srv_proto="$4" connect_addr="$5" test_link_fail="$6" - rm_nr_ns1="$7" - rm_nr_ns2="$8" + addr_nr_ns1="$7" + addr_nr_ns2="$8" speed="$9" + bkup="${10}" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -263,7 +264,24 @@ do_transfer() fi cpid=$! - if [ $rm_nr_ns1 -gt 0 ]; then + if [ $addr_nr_ns1 -gt 0 ]; then + let add_nr_ns1=addr_nr_ns1 + counter=2 + sleep 1 + while [ $add_nr_ns1 -gt 0 ]; do + local addr + if is_v6 "${connect_addr}"; then + addr="dead:beef:$counter::1" + else + addr="10.0.$counter.1" + fi + ip netns exec $ns1 ./pm_nl_ctl add $addr flags signal + let counter+=1 + let add_nr_ns1-=1 + done + sleep 1 + elif [ $addr_nr_ns1 -lt 0 ]; then + let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then counter=1 sleep 1 @@ -280,7 +298,24 @@ do_transfer() fi fi - if [ $rm_nr_ns2 -gt 0 ]; then + if [ $addr_nr_ns2 -gt 0 ]; then + let add_nr_ns2=addr_nr_ns2 + counter=3 + sleep 1 + while [ $add_nr_ns2 -gt 0 ]; do + local addr + if is_v6 "${connect_addr}"; then + addr="dead:beef:$counter::2" + else + addr="10.0.$counter.2" + fi + ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow + let counter+=1 + let add_nr_ns2-=1 + done + sleep 1 + elif [ $addr_nr_ns2 -lt 0 ]; then + let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then counter=1 sleep 1 @@ -297,6 +332,18 @@ do_transfer() fi fi + if [ ! -z $bkup ]; then + sleep 1 + for netns in "$ns1" "$ns2"; do + dump=(`ip netns exec $netns ./pm_nl_ctl dump`) + if [ ${#dump[@]} -gt 0 ]; then + addr=${dump[${#dump[@]} - 1]} + backup="ip netns exec $netns ./pm_nl_ctl set $addr flags $bkup" + $backup + fi + done + fi + wait $cpid retc=$? wait $spid @@ -355,9 +402,10 @@ run_tests() connector_ns="$2" connect_addr="$3" test_linkfail="${4:-0}" - rm_nr_ns1="${5:-0}" - rm_nr_ns2="${6:-0}" + addr_nr_ns1="${5:-0}" + addr_nr_ns2="${6:-0}" speed="${7:-fast}" + bkup="${8:-""}" lret=0 oldin="" @@ -372,7 +420,7 @@ run_tests() fi do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${test_linkfail} ${rm_nr_ns1} ${rm_nr_ns2} ${speed} + ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup} lret=$? if [ "$test_linkfail" -eq 1 ];then @@ -439,6 +487,12 @@ chk_add_nr() { local add_nr=$1 local echo_nr=$2 + local port_nr=${3:-0} + local syn_nr=${4:-$port_nr} + local syn_ack_nr=${5:-$port_nr} + local ack_nr=${6:-$port_nr} + local mis_syn_nr=${7:-0} + local mis_ack_nr=${8:-0} local count local dump_stats @@ -461,7 +515,87 @@ chk_add_nr() ret=1 dump_stats=1 else - echo "[ ok ]" + echo -n "[ ok ]" + fi + + if [ $port_nr -gt 0 ]; then + echo -n " - pt " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtPortAdd | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$port_nr" ]; then + echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + printf "%-39s %s" " " "syn" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortSynRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$syn_nr" ]; then + echo "[fail] got $count JOIN[s] syn with a different \ + port-number expected $syn_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - synack" + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinPortSynAckRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$syn_ack_nr" ]; then + echo "[fail] got $count JOIN[s] synack with a different \ + port-number expected $syn_ack_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - ack" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortAckRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$ack_nr" ]; then + echo "[fail] got $count JOIN[s] ack with a different \ + port-number expected $ack_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + printf "%-39s %s" " " "syn" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortSynRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$mis_syn_nr" ]; then + echo "[fail] got $count JOIN[s] syn with a mismatched \ + port-number expected $mis_syn_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - ack " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortAckRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$mis_ack_nr" ]; then + echo "[fail] got $count JOIN[s] ack with a mismatched \ + port-number expected $mis_ack_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + else + echo "" fi if [ "${dump_stats}" = 1 ]; then @@ -509,6 +643,588 @@ chk_rm_nr() fi } +chk_prio_nr() +{ + local mp_prio_nr_tx=$1 + local mp_prio_nr_rx=$2 + local count + local dump_stats + + printf "%-39s %s" " " "ptx" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$mp_prio_nr_tx" ]; then + echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - prx " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$mp_prio_nr_rx" ]; then + echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + +subflows_tests() +{ + reset + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "no JOIN" "0" "0" "0" + + # subflow limited by client + reset + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow, limited by client" 0 0 0 + + # subflow limited by server + reset + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow, limited by server" 1 1 0 + + # subflow + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow" 1 1 1 + + # multiple subflows + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple subflows" 2 2 2 + + # multiple subflows limited by serverf + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple subflows, limited by server" 2 2 1 +} + +signal_address_tests() +{ + # add_address, unused + reset + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "unused signal address" 0 0 0 + chk_add_nr 1 1 + + # accept and use add_addr + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address" 1 1 1 + chk_add_nr 1 1 + + # accept and use add_addr with an additional subflow + # note: signal address in server ns and local addresses in client ns must + # belong to different subnets or one of the listed local address could be + # used for 'add_addr' subflow + reset + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and signal" 2 2 2 + chk_add_nr 1 1 + + # accept and use add_addr with additional subflows + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple subflows and signal" 3 3 3 + chk_add_nr 1 1 +} + +link_failure_tests() +{ + # accept and use add_addr with additional subflows and link loss + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 1 + chk_join_nr "multiple flows, signal, link failure" 3 3 3 + chk_add_nr 1 1 +} + +add_addr_timeout_tests() +{ + # add_addr timeout + reset_with_add_addr_timeout + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 + chk_add_nr 4 0 + + # add_addr timeout IPv6 + reset_with_add_addr_timeout 6 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1 + chk_add_nr 4 0 +} + +remove_tests() +{ + # single subflow, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow + chk_join_nr "remove single subflow" 1 1 1 + chk_rm_nr 1 1 + + # multiple subflows, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow + chk_join_nr "remove multiple subflows" 2 2 2 + chk_rm_nr 2 2 + + # single address, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow + chk_join_nr "remove single address" 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 0 0 + + # subflow and signal, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow + chk_join_nr "remove subflow and signal" 2 2 2 + chk_add_nr 1 1 + chk_rm_nr 1 1 + + # subflows and signal, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow + chk_join_nr "remove subflows and signal" 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 2 2 + + # subflows and signal, flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush subflows and signal" 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 2 2 +} + +add_tests() +{ + # add single subflow + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow + chk_join_nr "add single subflow" 1 1 1 + + # add signal address + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow + chk_join_nr "add signal address" 1 1 1 + chk_add_nr 1 1 + + # add multiple subflows + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow + chk_join_nr "add multiple subflows" 2 2 2 + + # add multiple subflows IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow + chk_join_nr "add multiple subflows IPv6" 2 2 2 + + # add multiple addresses IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow + chk_join_nr "add multiple addresses IPv6" 2 2 2 + chk_add_nr 2 2 +} + +ipv6_tests() +{ + # subflow IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + chk_join_nr "single subflow IPv6" 1 1 1 + + # add_address, unused IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + chk_join_nr "unused signal address IPv6" 0 0 0 + chk_add_nr 1 1 + + # signal address IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + chk_join_nr "single address IPv6" 1 1 1 + chk_add_nr 1 1 + + # single address IPv6, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow + chk_join_nr "remove single address IPv6" 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 0 0 + + # subflow and signal IPv6, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow + chk_join_nr "remove subflow and signal IPv6" 2 2 2 + chk_add_nr 1 1 + chk_rm_nr 1 1 +} + +v4mapped_tests() +{ + # subflow IPv4-mapped to IPv4-mapped + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr "single subflow IPv4-mapped" 1 1 1 + + # signal address IPv4-mapped with IPv4-mapped sk + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr "signal address IPv4-mapped" 1 1 1 + chk_add_nr 1 1 + + # subflow v4-map-v6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr "single subflow v4-map-v6" 1 1 1 + + # signal address v4-map-v6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr "signal address v4-map-v6" 1 1 1 + chk_add_nr 1 1 + + # subflow v6-map-v4 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow v6-map-v4" 1 1 1 + + # signal address v6-map-v4 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address v6-map-v4" 1 1 1 + chk_add_nr 1 1 + + # no subflow IPv6 to v4 address + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "no JOIN with diff families v4-v6" 0 0 0 + + # no subflow IPv6 to v4 address even if v6 has a valid v4 at the end + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0 + + # no subflow IPv4 to v6 address, no need to slow down too then + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr "no JOIN with diff families v6-v4" 0 0 0 +} + +backup_tests() +{ + # single subflow, backup + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup + chk_join_nr "single subflow, backup" 1 1 1 + chk_prio_nr 0 1 + + # single address, backup + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr "single address, backup" 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 +} + +add_addr_ports_tests() +{ + # signal address with port + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address with port" 1 1 1 + chk_add_nr 1 1 1 + + # subflow and signal with port + reset + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and signal with port" 2 2 2 + chk_add_nr 1 1 1 + + # single address with port, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow + chk_join_nr "remove single address with port" 1 1 1 + chk_add_nr 1 1 1 + chk_rm_nr 0 0 + + # subflow and signal with port, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow + chk_join_nr "remove subflow and signal with port" 2 2 2 + chk_add_nr 1 1 1 + chk_rm_nr 1 1 + + # subflows and signal with port, flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush subflows and signal with port" 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 2 2 + + # multiple addresses with port + reset + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10100 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple addresses with port" 2 2 2 + chk_add_nr 2 2 2 + + # multiple addresses with ports + reset + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10101 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple addresses with ports" 2 2 2 + chk_add_nr 2 2 2 +} + +syncookies_tests() +{ + # single subflow, syncookies + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow with syn cookies" 1 1 1 + + # multiple subflows with syn cookies + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple subflows with syn cookies" 2 2 2 + + # multiple subflows limited by server + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflows limited by server w cookies" 2 2 1 + + # test signal address with cookies + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address with syn cookies" 1 1 1 + chk_add_nr 1 1 + + # test cookie with subflow and signal + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and signal w cookies" 2 2 2 + chk_add_nr 1 1 + + # accept and use add_addr with additional subflows + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflows and signal w. cookies" 3 3 3 + chk_add_nr 1 1 +} + +all_tests() +{ + subflows_tests + signal_address_tests + link_failure_tests + add_addr_timeout_tests + remove_tests + add_tests + ipv6_tests + v4mapped_tests + backup_tests + add_addr_ports_tests + syncookies_tests +} + +usage() +{ + echo "mptcp_join usage:" + echo " -f subflows_tests" + echo " -s signal_address_tests" + echo " -l link_failure_tests" + echo " -t add_addr_timeout_tests" + echo " -r remove_tests" + echo " -a add_tests" + echo " -6 ipv6_tests" + echo " -4 v4mapped_tests" + echo " -b backup_tests" + echo " -p add_addr_ports_tests" + echo " -c syncookies_tests" + echo " -h help" +} + sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) @@ -519,280 +1235,50 @@ make_file "$cin" "client" 1 make_file "$sin" "server" 1 trap cleanup EXIT -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "no JOIN" "0" "0" "0" - -# subflow limted by client -reset -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow, limited by client" 0 0 0 - -# subflow limted by server -reset -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow, limited by server" 1 1 0 - -# subflow -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow" 1 1 1 - -# multiple subflows -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows" 2 2 2 - -# multiple subflows limited by serverf -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows, limited by server" 2 2 1 - -# add_address, unused -reset -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "unused signal address" 0 0 0 -chk_add_nr 1 1 - -# accept and use add_addr -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "signal address" 1 1 1 -chk_add_nr 1 1 - -# accept and use add_addr with an additional subflow -# note: signal address in server ns and local addresses in client ns must -# belong to different subnets or one of the listed local address could be -# used for 'add_addr' subflow -reset -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflow and signal" 2 2 2 -chk_add_nr 1 1 - -# accept and use add_addr with additional subflows -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows and signal" 3 3 3 -chk_add_nr 1 1 - -# accept and use add_addr with additional subflows and link loss -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 1 -chk_join_nr "multiple flows, signal, link failure" 3 3 3 -chk_add_nr 1 1 - -# add_addr timeout -reset_with_add_addr_timeout -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow -chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 -chk_add_nr 4 0 - -# single subflow, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow -chk_join_nr "remove single subflow" 1 1 1 -chk_rm_nr 1 1 - -# multiple subflows, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow -chk_join_nr "remove multiple subflows" 2 2 2 -chk_rm_nr 2 2 - -# single address, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow -chk_join_nr "remove single address" 1 1 1 -chk_add_nr 1 1 -chk_rm_nr 0 0 - -# subflow and signal, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 1 1 slow -chk_join_nr "remove subflow and signal" 2 2 2 -chk_add_nr 1 1 -chk_rm_nr 1 1 - -# subflows and signal, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 1 2 slow -chk_join_nr "remove subflows and signal" 3 3 3 -chk_add_nr 1 1 -chk_rm_nr 2 2 - -# subflows and signal, flush -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 8 8 slow -chk_join_nr "flush subflows and signal" 3 3 3 -chk_add_nr 1 1 -chk_rm_nr 2 2 - -# subflow IPv6 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow -run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow -chk_join_nr "single subflow IPv6" 1 1 1 - -# add_address, unused IPv6 -reset -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow -chk_join_nr "unused signal address IPv6" 0 0 0 -chk_add_nr 1 1 - -# signal address IPv6 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow -chk_join_nr "single address IPv6" 1 1 1 -chk_add_nr 1 1 - -# add_addr timeout IPv6 -reset_with_add_addr_timeout 6 -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow -chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1 -chk_add_nr 4 0 - -# single address IPv6, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 dead:beef:1::1 0 1 0 slow -chk_join_nr "remove single address IPv6" 1 1 1 -chk_add_nr 1 1 -chk_rm_nr 0 0 - -# subflow and signal IPv6, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow -run_tests $ns1 $ns2 dead:beef:1::1 0 1 1 slow -chk_join_nr "remove subflow and signal IPv6" 2 2 2 -chk_add_nr 1 1 -chk_rm_nr 1 1 - -# single subflow, syncookies -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow with syn cookies" 1 1 1 - -# multiple subflows with syn cookies -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows with syn cookies" 2 2 2 - -# multiple subflows limited by server -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflows limited by server w cookies" 2 2 1 - -# test signal address with cookies -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "signal address with syn cookies" 1 1 1 -chk_add_nr 1 1 - -# test cookie with subflow and signal -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflow and signal w cookies" 2 2 2 -chk_add_nr 1 1 - -# accept and use add_addr with additional subflows -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflows and signal w. cookies" 3 3 3 -chk_add_nr 1 1 +if [ -z $1 ]; then + all_tests + exit $ret +fi + +while getopts 'fsltra64bpch' opt; do + case $opt in + f) + subflows_tests + ;; + s) + signal_address_tests + ;; + l) + link_failure_tests + ;; + t) + add_addr_timeout_tests + ;; + r) + remove_tests + ;; + a) + add_tests + ;; + 6) + ipv6_tests + ;; + 4) + v4mapped_tests + ;; + b) + backup_tests + ;; + p) + add_addr_ports_tests + ;; + c) + syncookies_tests + ;; + h | *) + usage + ;; + esac +done exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 15f4f46ca3a9..a617e293734c 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -91,7 +91,7 @@ id 3 flags signal,backup 10.0.1.3" "dump addrs after del" ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" for i in `seq 5 9`; do @@ -102,9 +102,10 @@ check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" for i in `seq 9 256`; do ip netns exec $ns1 ./pm_nl_ctl del $i - ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $((i+1)) done check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 +id 2 flags 10.0.0.9 id 3 flags signal,backup 10.0.1.3 id 4 flags signal 10.0.1.4 id 5 flags signal 10.0.1.5 @@ -127,4 +128,40 @@ ip netns exec $ns1 ./pm_nl_ctl limits 8 8 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 subflows 8" "set limits" +ip netns exec $ns1 ./pm_nl_ctl flush +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8 +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 +id 2 flags 10.0.1.2 +id 3 flags 10.0.1.7 +id 4 flags 10.0.1.8 +id 100 flags 10.0.1.3 +id 101 flags 10.0.1.4 +id 254 flags 10.0.1.5 +id 255 flags 10.0.1.6" "set ids" + +ip netns exec $ns1 ./pm_nl_ctl flush +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8 +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1 +id 2 flags 10.0.0.4 +id 3 flags 10.0.0.6 +id 4 flags 10.0.0.7 +id 5 flags 10.0.0.8 +id 253 flags 10.0.0.5 +id 254 flags 10.0.0.2 +id 255 flags 10.0.0.3" "wrap-around ids" + exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index b24a2f17d415..7b4167f3f9a2 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -24,10 +24,11 @@ static void syntax(char *argv[]) { - fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]); + fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]); fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n"); fprintf(stderr, "\tdel <id>\n"); fprintf(stderr, "\tget <id>\n"); + fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); @@ -176,8 +177,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t flags = 0; u_int16_t family; - u_int32_t flags; int nest_start; u_int8_t id; int off = 0; @@ -223,7 +224,6 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) char *tok, *str; /* flags */ - flags = 0; if (++arg >= argc) error(1, 0, " missing flags value"); @@ -271,6 +271,20 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(4); memcpy(RTA_DATA(rta), &ifindex, 4); off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "port")) { + u_int16_t port; + + if (++arg >= argc) + error(1, 0, " missing port value"); + if (!(flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) + error(1, 0, " flags must be signal when using port"); + + port = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); } else error(1, 0, "unknown keyword %s", argv[arg]); } @@ -323,6 +337,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) static void print_addr(struct rtattr *attrs, int len) { uint16_t family = 0; + uint16_t port = 0; char str[1024]; uint32_t flags; uint8_t id; @@ -330,12 +345,16 @@ static void print_addr(struct rtattr *attrs, int len) while (RTA_OK(attrs, len)) { if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY) memcpy(&family, RTA_DATA(attrs), 2); + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_PORT) + memcpy(&port, RTA_DATA(attrs), 2); if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) { if (family != AF_INET) error(1, errno, "wrong IP (v4) for family %d", family); inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str)); printf("%s", str); + if (port) + printf(" %d", port); } if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) { if (family != AF_INET6) @@ -343,6 +362,8 @@ static void print_addr(struct rtattr *attrs, int len) family); inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str)); printf("%s", str); + if (port) + printf(" %d", port); } if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) { memcpy(&id, RTA_DATA(attrs), 1); @@ -584,6 +605,88 @@ int get_set_limits(int fd, int pm_family, int argc, char *argv[]) return 0; } +int set_flags(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + u_int32_t flags = 0; + u_int16_t family; + int nest_start; + int off = 0; + int arg; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SET_FLAGS, + MPTCP_PM_VER); + + if (argc < 3) + syntax(argv); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", argv[2]); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + for (arg = 3; arg < argc; arg++) { + if (!strcmp(argv[arg], "flags")) { + char *tok, *str; + + /* flags */ + if (++arg >= argc) + error(1, 0, " missing flags value"); + + /* do not support flag list yet */ + for (str = argv[arg]; (tok = strtok(str, ",")); + str = NULL) { + if (!strcmp(tok, "backup")) + flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else if (strcmp(tok, "nobackup")) + error(1, errno, + "unknown flag %s", argv[arg]); + } + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else { + error(1, 0, "unknown keyword %s", argv[arg]); + } + } + nest->rta_len = off - nest_start; + + do_nl_req(fd, nh, off, 0); + return 0; +} + int main(int argc, char *argv[]) { int fd, pm_family; @@ -609,6 +712,8 @@ int main(int argc, char *argv[]) return dump_addrs(fd, pm_family, argc, argv); else if (!strcmp(argv[1], "limits")) return get_set_limits(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "set")) + return set_flags(fd, pm_family, argc, argv); fprintf(stderr, "unknown sub-command: %s", argv[1]); syntax(argv); diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 026384c189c9..a62d2fa1275c 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=450 +timeout=600 diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index f75c53ce0a2d..6365c7fd1262 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -9,6 +9,7 @@ #include <sys/types.h> #include <sys/ioctl.h> #include <sys/socket.h> +#include <sys/wait.h> #include <linux/tcp.h> #include <arpa/inet.h> #include <net/if.h> @@ -17,6 +18,7 @@ #include <fcntl.h> #include <libgen.h> #include <limits.h> +#include <sched.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -34,6 +36,8 @@ #define DEFAULT_PORT 12345 +#define NS_PREFIX "/run/netns/" + #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif @@ -43,12 +47,15 @@ struct sock_args { /* local address */ + const char *local_addr_str; + const char *client_local_addr_str; union { struct in_addr in; struct in6_addr in6; } local_addr; /* remote address */ + const char *remote_addr_str; union { struct in_addr in; struct in6_addr in6; @@ -73,10 +80,16 @@ struct sock_args { int use_setsockopt; int use_cmsg; const char *dev; + const char *server_dev; int ifindex; + const char *clientns; + const char *serverns; + const char *password; + const char *client_pw; /* prefix for MD5 password */ + const char *md5_prefix_str; union { struct sockaddr_in v4; struct sockaddr_in6 v6; @@ -84,15 +97,19 @@ struct sock_args { unsigned int prefix_len; /* expected addresses and device index for connection */ + const char *expected_dev; + const char *expected_server_dev; int expected_ifindex; /* local address */ + const char *expected_laddr_str; union { struct in_addr in; struct in6_addr in6; } expected_laddr; /* remote address */ + const char *expected_raddr_str; union { struct in_addr in; struct in6_addr in6; @@ -186,7 +203,7 @@ static void log_address(const char *desc, struct sockaddr *sa) if (sa->sa_family == AF_INET) { struct sockaddr_in *s = (struct sockaddr_in *) sa; - log_msg("%s %s:%d", + log_msg("%s %s:%d\n", desc, inet_ntop(AF_INET, &s->sin_addr, addrstr, sizeof(addrstr)), @@ -195,18 +212,37 @@ static void log_address(const char *desc, struct sockaddr *sa) } else if (sa->sa_family == AF_INET6) { struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa; - log_msg("%s [%s]:%d", + log_msg("%s [%s]:%d\n", desc, inet_ntop(AF_INET6, &s6->sin6_addr, addrstr, sizeof(addrstr)), ntohs(s6->sin6_port)); } - printf("\n"); - fflush(stdout); } +static int switch_ns(const char *ns) +{ + char path[PATH_MAX]; + int fd, ret; + + if (geteuid()) + log_error("warning: likely need root to set netns %s!\n", ns); + + snprintf(path, sizeof(path), "%s%s", NS_PREFIX, ns); + fd = open(path, 0); + if (fd < 0) { + log_err_errno("Failed to open netns path; can not switch netns"); + return 1; + } + + ret = setns(fd, CLONE_NEWNET); + close(fd); + + return ret; +} + static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args) { int keylen = strlen(args->password); @@ -259,13 +295,13 @@ static int tcp_md5_remote(int sd, struct sock_args *args) switch (args->version) { case AF_INET: sin.sin_port = htons(args->port); - sin.sin_addr = args->remote_addr.in; + sin.sin_addr = args->md5_prefix.v4.sin_addr; addr = &sin; alen = sizeof(sin); break; case AF_INET6: sin6.sin6_port = htons(args->port); - sin6.sin6_addr = args->remote_addr.in6; + sin6.sin6_addr = args->md5_prefix.v6.sin6_addr; addr = &sin6; alen = sizeof(sin6); break; @@ -522,6 +558,33 @@ static int str_to_uint(const char *str, int min, int max, unsigned int *value) return -1; } +static int resolve_devices(struct sock_args *args) +{ + if (args->dev) { + args->ifindex = get_ifidx(args->dev); + if (args->ifindex < 0) { + log_error("Invalid device name\n"); + return 1; + } + } + + if (args->expected_dev) { + unsigned int tmp; + + if (str_to_uint(args->expected_dev, 0, INT_MAX, &tmp) == 0) { + args->expected_ifindex = (int)tmp; + } else { + args->expected_ifindex = get_ifidx(args->expected_dev); + if (args->expected_ifindex < 0) { + fprintf(stderr, "Invalid expected device\n"); + return 1; + } + } + } + + return 0; +} + static int expected_addr_match(struct sockaddr *sa, void *expected, const char *desc) { @@ -533,7 +596,7 @@ static int expected_addr_match(struct sockaddr *sa, void *expected, struct in_addr *exp_in = (struct in_addr *) expected; if (s->sin_addr.s_addr != exp_in->s_addr) { - log_error("%s address does not match expected %s", + log_error("%s address does not match expected %s\n", desc, inet_ntop(AF_INET, exp_in, addrstr, sizeof(addrstr))); @@ -544,14 +607,14 @@ static int expected_addr_match(struct sockaddr *sa, void *expected, struct in6_addr *exp_in = (struct in6_addr *) expected; if (memcmp(&s6->sin6_addr, exp_in, sizeof(*exp_in))) { - log_error("%s address does not match expected %s", + log_error("%s address does not match expected %s\n", desc, inet_ntop(AF_INET6, exp_in, addrstr, sizeof(addrstr))); rc = 1; } } else { - log_error("%s address does not match expected - unknown family", + log_error("%s address does not match expected - unknown family\n", desc); rc = 1; } @@ -599,6 +662,160 @@ static int show_sockstat(int sd, struct sock_args *args) return rc; } +enum addr_type { + ADDR_TYPE_LOCAL, + ADDR_TYPE_REMOTE, + ADDR_TYPE_MCAST, + ADDR_TYPE_EXPECTED_LOCAL, + ADDR_TYPE_EXPECTED_REMOTE, + ADDR_TYPE_MD5_PREFIX, +}; + +static int convert_addr(struct sock_args *args, const char *_str, + enum addr_type atype) +{ + int pfx_len_max = args->version == AF_INET6 ? 128 : 32; + int family = args->version; + char *str, *dev, *sep; + struct in6_addr *in6; + struct in_addr *in; + const char *desc; + void *addr; + int rc = 0; + + str = strdup(_str); + if (!str) + return -ENOMEM; + + switch (atype) { + case ADDR_TYPE_LOCAL: + desc = "local"; + addr = &args->local_addr; + break; + case ADDR_TYPE_REMOTE: + desc = "remote"; + addr = &args->remote_addr; + break; + case ADDR_TYPE_MCAST: + desc = "mcast grp"; + addr = &args->grp; + break; + case ADDR_TYPE_EXPECTED_LOCAL: + desc = "expected local"; + addr = &args->expected_laddr; + break; + case ADDR_TYPE_EXPECTED_REMOTE: + desc = "expected remote"; + addr = &args->expected_raddr; + break; + case ADDR_TYPE_MD5_PREFIX: + desc = "md5 prefix"; + if (family == AF_INET) { + args->md5_prefix.v4.sin_family = AF_INET; + addr = &args->md5_prefix.v4.sin_addr; + } else if (family == AF_INET6) { + args->md5_prefix.v6.sin6_family = AF_INET6; + addr = &args->md5_prefix.v6.sin6_addr; + } else + return 1; + + sep = strchr(str, '/'); + if (sep) { + *sep = '\0'; + sep++; + if (str_to_uint(sep, 1, pfx_len_max, + &args->prefix_len) != 0) { + fprintf(stderr, "Invalid port\n"); + return 1; + } + } else { + args->prefix_len = 0; + } + break; + default: + log_error("unknown address type\n"); + exit(1); + } + + switch (family) { + case AF_INET: + in = (struct in_addr *) addr; + if (str) { + if (inet_pton(AF_INET, str, in) == 0) { + log_error("Invalid %s IP address\n", desc); + rc = -1; + goto out; + } + } else { + in->s_addr = htonl(INADDR_ANY); + } + break; + + case AF_INET6: + dev = strchr(str, '%'); + if (dev) { + *dev = '\0'; + dev++; + } + + in6 = (struct in6_addr *) addr; + if (str) { + if (inet_pton(AF_INET6, str, in6) == 0) { + log_error("Invalid %s IPv6 address\n", desc); + rc = -1; + goto out; + } + } else { + *in6 = in6addr_any; + } + if (dev) { + args->scope_id = get_ifidx(dev); + if (args->scope_id < 0) { + log_error("Invalid scope on %s IPv6 address\n", + desc); + rc = -1; + goto out; + } + } + break; + + default: + log_error("Invalid address family\n"); + } + +out: + free(str); + return rc; +} + +static int validate_addresses(struct sock_args *args) +{ + if (args->local_addr_str && + convert_addr(args, args->local_addr_str, ADDR_TYPE_LOCAL) < 0) + return 1; + + if (args->remote_addr_str && + convert_addr(args, args->remote_addr_str, ADDR_TYPE_REMOTE) < 0) + return 1; + + if (args->md5_prefix_str && + convert_addr(args, args->md5_prefix_str, + ADDR_TYPE_MD5_PREFIX) < 0) + return 1; + + if (args->expected_laddr_str && + convert_addr(args, args->expected_laddr_str, + ADDR_TYPE_EXPECTED_LOCAL)) + return 1; + + if (args->expected_raddr_str && + convert_addr(args, args->expected_raddr_str, + ADDR_TYPE_EXPECTED_REMOTE)) + return 1; + + return 0; +} + static int get_index_from_cmsg(struct msghdr *m) { struct cmsghdr *cm; @@ -1180,8 +1397,19 @@ err: return -1; } -static int do_server(struct sock_args *args) +static void ipc_write(int fd, int message) +{ + /* Not in both_mode, so there's no process to signal */ + if (fd < 0) + return; + + if (write(fd, &message, sizeof(message)) < 0) + log_err_errno("Failed to send client status"); +} + +static int do_server(struct sock_args *args, int ipc_fd) { + /* ipc_fd = -1 if no parent process to signal */ struct timeval timeout = { .tv_sec = prog_timeout }, *ptval = NULL; unsigned char addr[sizeof(struct sockaddr_in6)] = {}; socklen_t alen = sizeof(addr); @@ -1190,6 +1418,20 @@ static int do_server(struct sock_args *args) fd_set rfds; int rc; + if (args->serverns) { + if (switch_ns(args->serverns)) { + log_error("Could not set server netns to %s\n", + args->serverns); + goto err_exit; + } + log_msg("Switched server netns\n"); + } + + args->dev = args->server_dev; + args->expected_dev = args->expected_server_dev; + if (resolve_devices(args) || validate_addresses(args)) + goto err_exit; + if (prog_timeout) ptval = &timeout; @@ -1199,14 +1441,16 @@ static int do_server(struct sock_args *args) lsd = lsock_init(args); if (lsd < 0) - return 1; + goto err_exit; if (args->bind_test_only) { close(lsd); + ipc_write(ipc_fd, 1); return 0; } if (args->type != SOCK_STREAM) { + ipc_write(ipc_fd, 1); rc = msg_loop(0, lsd, (void *) addr, alen, args); close(lsd); return rc; @@ -1214,11 +1458,11 @@ static int do_server(struct sock_args *args) if (args->password && tcp_md5_remote(lsd, args)) { close(lsd); - return 1; + goto err_exit; } + ipc_write(ipc_fd, 1); while (1) { - log_msg("\n"); log_msg("waiting for client connection.\n"); FD_ZERO(&rfds); FD_SET(lsd, &rfds); @@ -1264,6 +1508,9 @@ static int do_server(struct sock_args *args) close(lsd); return rc; +err_exit: + ipc_write(ipc_fd, 0); + return 1; } static int wait_for_connect(int sd) @@ -1375,6 +1622,26 @@ static int do_client(struct sock_args *args) return 1; } + if (args->clientns) { + if (switch_ns(args->clientns)) { + log_error("Could not set client netns to %s\n", + args->clientns); + return 1; + } + log_msg("Switched client netns\n"); + } + + args->local_addr_str = args->client_local_addr_str; + if (resolve_devices(args) || validate_addresses(args)) + return 1; + + if ((args->use_setsockopt || args->use_cmsg) && !args->ifindex) { + fprintf(stderr, "Device binding not specified\n"); + return 1; + } + if (args->use_setsockopt || args->use_cmsg) + args->dev = NULL; + switch (args->version) { case AF_INET: sin.sin_port = htons(args->port); @@ -1394,6 +1661,8 @@ static int do_client(struct sock_args *args) break; } + args->password = args->client_pw; + if (args->has_grp) sd = msock_client(args); else @@ -1419,132 +1688,6 @@ out: return rc; } -enum addr_type { - ADDR_TYPE_LOCAL, - ADDR_TYPE_REMOTE, - ADDR_TYPE_MCAST, - ADDR_TYPE_EXPECTED_LOCAL, - ADDR_TYPE_EXPECTED_REMOTE, - ADDR_TYPE_MD5_PREFIX, -}; - -static int convert_addr(struct sock_args *args, const char *_str, - enum addr_type atype) -{ - int pfx_len_max = args->version == AF_INET6 ? 128 : 32; - int family = args->version; - char *str, *dev, *sep; - struct in6_addr *in6; - struct in_addr *in; - const char *desc; - void *addr; - int rc = 0; - - str = strdup(_str); - if (!str) - return -ENOMEM; - - switch (atype) { - case ADDR_TYPE_LOCAL: - desc = "local"; - addr = &args->local_addr; - break; - case ADDR_TYPE_REMOTE: - desc = "remote"; - addr = &args->remote_addr; - break; - case ADDR_TYPE_MCAST: - desc = "mcast grp"; - addr = &args->grp; - break; - case ADDR_TYPE_EXPECTED_LOCAL: - desc = "expected local"; - addr = &args->expected_laddr; - break; - case ADDR_TYPE_EXPECTED_REMOTE: - desc = "expected remote"; - addr = &args->expected_raddr; - break; - case ADDR_TYPE_MD5_PREFIX: - desc = "md5 prefix"; - if (family == AF_INET) { - args->md5_prefix.v4.sin_family = AF_INET; - addr = &args->md5_prefix.v4.sin_addr; - } else if (family == AF_INET6) { - args->md5_prefix.v6.sin6_family = AF_INET6; - addr = &args->md5_prefix.v6.sin6_addr; - } else - return 1; - - sep = strchr(str, '/'); - if (sep) { - *sep = '\0'; - sep++; - if (str_to_uint(sep, 1, pfx_len_max, - &args->prefix_len) != 0) { - fprintf(stderr, "Invalid port\n"); - return 1; - } - } else { - args->prefix_len = pfx_len_max; - } - break; - default: - log_error("unknown address type"); - exit(1); - } - - switch (family) { - case AF_INET: - in = (struct in_addr *) addr; - if (str) { - if (inet_pton(AF_INET, str, in) == 0) { - log_error("Invalid %s IP address\n", desc); - rc = -1; - goto out; - } - } else { - in->s_addr = htonl(INADDR_ANY); - } - break; - - case AF_INET6: - dev = strchr(str, '%'); - if (dev) { - *dev = '\0'; - dev++; - } - - in6 = (struct in6_addr *) addr; - if (str) { - if (inet_pton(AF_INET6, str, in6) == 0) { - log_error("Invalid %s IPv6 address\n", desc); - rc = -1; - goto out; - } - } else { - *in6 = in6addr_any; - } - if (dev) { - args->scope_id = get_ifidx(dev); - if (args->scope_id < 0) { - log_error("Invalid scope on %s IPv6 address\n", - desc); - rc = -1; - goto out; - } - } - break; - - default: - log_error("Invalid address family\n"); - } - -out: - free(str); - return rc; -} - static char *random_msg(int len) { int i, n = 0, olen = len + 1; @@ -1568,7 +1711,68 @@ static char *random_msg(int len) return m; } -#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:SCi6L:0:1:2:Fbq" +static int ipc_child(int fd, struct sock_args *args) +{ + char *outbuf, *errbuf; + int rc = 1; + + outbuf = malloc(4096); + errbuf = malloc(4096); + if (!outbuf || !errbuf) { + fprintf(stderr, "server: Failed to allocate buffers for stdout and stderr\n"); + goto out; + } + + setbuffer(stdout, outbuf, 4096); + setbuffer(stderr, errbuf, 4096); + + server_mode = 1; /* to tell log_msg in case we are in both_mode */ + + /* when running in both mode, address validation applies + * solely to client side + */ + args->has_expected_laddr = 0; + args->has_expected_raddr = 0; + + rc = do_server(args, fd); + +out: + free(outbuf); + free(errbuf); + + return rc; +} + +static int ipc_parent(int cpid, int fd, struct sock_args *args) +{ + int client_status; + int status; + int buf; + + /* do the client-side function here in the parent process, + * waiting to be told when to continue + */ + if (read(fd, &buf, sizeof(buf)) <= 0) { + log_err_errno("Failed to read IPC status from status"); + return 1; + } + if (!buf) { + log_error("Server failed; can not continue\n"); + return 1; + } + log_msg("Server is ready\n"); + + client_status = do_client(args); + log_msg("parent is done!\n"); + + if (kill(cpid, 0) == 0) + kill(cpid, SIGKILL); + + wait(&status); + return client_status; +} + +#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" static void print_usage(char *prog) { @@ -1582,13 +1786,18 @@ static void print_usage(char *prog) " -t timeout seconds (default: none)\n" "\n" "Optional:\n" + " -B do both client and server via fork and IPC\n" + " -N ns set client to network namespace ns (requires root)\n" + " -O ns set server to network namespace ns (requires root)\n" " -F Restart server loop\n" " -6 IPv6 (default is IPv4)\n" " -P proto protocol for socket: icmp, ospf (default: none)\n" " -D|R datagram (D) / raw (R) socket (default stream)\n" - " -l addr local address to bind to\n" + " -l addr local address to bind to in server mode\n" + " -c addr local address to bind to in client mode\n" "\n" " -d dev bind socket to given device name\n" + " -I dev bind socket to given device name - server mode\n" " -S use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n" " to set device binding\n" " -C use cmsg and IP_PKTINFO to specify device binding\n" @@ -1597,6 +1806,7 @@ static void print_usage(char *prog) " -n num number of times to send message\n" "\n" " -M password use MD5 sum protection\n" + " -X password MD5 password for client mode\n" " -m prefix/len prefix and length to use for MD5 key\n" " -g grp multicast group (e.g., 239.1.1.1)\n" " -i interactive mode (default is echo and terminate)\n" @@ -1604,6 +1814,7 @@ static void print_usage(char *prog) " -0 addr Expected local address\n" " -1 addr Expected remote address\n" " -2 dev Expected device name (or index) to receive packet\n" + " -3 dev Expected device name (or index) to receive packets - server mode\n" "\n" " -b Bind test only.\n" " -q Be quiet. Run test without printing anything.\n" @@ -1618,8 +1829,11 @@ int main(int argc, char *argv[]) .port = DEFAULT_PORT, }; struct protoent *pe; + int both_mode = 0; unsigned int tmp; int forever = 0; + int fd[2]; + int cpid; /* process inputs */ extern char *optarg; @@ -1631,6 +1845,9 @@ int main(int argc, char *argv[]) while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) { switch (rc) { + case 'B': + both_mode = 1; + break; case 's': server_mode = 1; break; @@ -1639,13 +1856,15 @@ int main(int argc, char *argv[]) break; case 'l': args.has_local_ip = 1; - if (convert_addr(&args, optarg, ADDR_TYPE_LOCAL) < 0) - return 1; + args.local_addr_str = optarg; break; case 'r': args.has_remote_ip = 1; - if (convert_addr(&args, optarg, ADDR_TYPE_REMOTE) < 0) - return 1; + args.remote_addr_str = optarg; + break; + case 'c': + args.has_local_ip = 1; + args.client_local_addr_str = optarg; break; case 'p': if (str_to_uint(optarg, 1, 65535, &tmp) != 0) { @@ -1685,15 +1904,23 @@ int main(int argc, char *argv[]) case 'n': iter = atoi(optarg); break; + case 'N': + args.clientns = optarg; + break; + case 'O': + args.serverns = optarg; + break; case 'L': msg = random_msg(atoi(optarg)); break; case 'M': args.password = optarg; break; + case 'X': + args.client_pw = optarg; + break; case 'm': - if (convert_addr(&args, optarg, ADDR_TYPE_MD5_PREFIX) < 0) - return 1; + args.md5_prefix_str = optarg; break; case 'S': args.use_setsockopt = 1; @@ -1703,11 +1930,9 @@ int main(int argc, char *argv[]) break; case 'd': args.dev = optarg; - args.ifindex = get_ifidx(optarg); - if (args.ifindex < 0) { - fprintf(stderr, "Invalid device name\n"); - return 1; - } + break; + case 'I': + args.server_dev = optarg; break; case 'i': interactive = 1; @@ -1726,28 +1951,17 @@ int main(int argc, char *argv[]) break; case '0': args.has_expected_laddr = 1; - if (convert_addr(&args, optarg, - ADDR_TYPE_EXPECTED_LOCAL)) - return 1; + args.expected_laddr_str = optarg; break; case '1': args.has_expected_raddr = 1; - if (convert_addr(&args, optarg, - ADDR_TYPE_EXPECTED_REMOTE)) - return 1; - + args.expected_raddr_str = optarg; break; case '2': - if (str_to_uint(optarg, 0, INT_MAX, &tmp) == 0) { - args.expected_ifindex = (int)tmp; - } else { - args.expected_ifindex = get_ifidx(optarg); - if (args.expected_ifindex < 0) { - fprintf(stderr, - "Invalid expected device\n"); - return 1; - } - } + args.expected_dev = optarg; + break; + case '3': + args.expected_server_dev = optarg; break; case 'q': quiet = 1; @@ -1759,23 +1973,17 @@ int main(int argc, char *argv[]) } if (args.password && - ((!args.has_remote_ip && !args.prefix_len) || args.type != SOCK_STREAM)) { + ((!args.has_remote_ip && !args.md5_prefix_str) || + args.type != SOCK_STREAM)) { log_error("MD5 passwords apply to TCP only and require a remote ip for the password\n"); return 1; } - if (args.prefix_len && !args.password) { + if (args.md5_prefix_str && !args.password) { log_error("Prefix range for MD5 protection specified without a password\n"); return 1; } - if ((args.use_setsockopt || args.use_cmsg) && !args.ifindex) { - fprintf(stderr, "Device binding not specified\n"); - return 1; - } - if (args.use_setsockopt || args.use_cmsg) - args.dev = NULL; - if (iter == 0) { fprintf(stderr, "Invalid number of messages to send\n"); return 1; @@ -1792,7 +2000,7 @@ int main(int argc, char *argv[]) return 1; } - if (!server_mode && !args.has_grp && + if ((both_mode || !server_mode) && !args.has_grp && !args.has_remote_ip && !args.has_local_ip) { fprintf(stderr, "Local (server mode) or remote IP (client IP) required\n"); @@ -1804,9 +2012,26 @@ int main(int argc, char *argv[]) msg = NULL; } + if (both_mode) { + if (pipe(fd) < 0) { + perror("pipe"); + exit(1); + } + + cpid = fork(); + if (cpid < 0) { + perror("fork"); + exit(1); + } + if (cpid) + return ipc_parent(cpid, fd[0], &args); + + return ipc_child(fd[1], &args); + } + if (server_mode) { do { - rc = do_server(&args); + rc = do_server(&args, -1); } while (forever); return rc; diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 464e31eabc73..64cd2e23c568 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -162,7 +162,15 @@ # - list_flush_ipv6_exception # Using the same topology as in pmtu_ipv6, create exceptions, and check # they are shown when listing exception caches, gone after flushing them - +# +# - pmtu_ipv4_route_change +# Use the same topology as in pmtu_ipv4, but issue a route replacement +# command and delete the corresponding device afterward. This tests for +# proper cleanup of the PMTU exceptions by the route replacement path. +# Device unregistration should complete successfully +# +# - pmtu_ipv6_route_change +# Same as above but with IPv6 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -224,7 +232,9 @@ tests=" cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 - list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" + list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 + pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" NS_A="ns-A" NS_B="ns-B" @@ -1782,6 +1792,63 @@ test_list_flush_ipv6_exception() { return ${fail} } +test_pmtu_ipvX_route_change() { + family=${1} + + setup namespaces routing || return 2 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + gw="${prefix4}.${a_r1}.2" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + gw="${prefix6}:${a_r1}::2" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Replace the route from A to R1 + run_cmd ${ns_a} ip route change default via ${gw} + + # Delete the device in A + run_cmd ${ns_a} ip link del "veth_A-R1" +} + +test_pmtu_ipv4_route_change() { + test_pmtu_ipvX_route_change 4 +} + +test_pmtu_ipv6_route_change() { + test_pmtu_ipvX_route_change 6 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 3155fbbf644b..b4cca382d125 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -196,7 +196,7 @@ static void do_recv_errqueue_timeout(int fdt) default: error(1, 0, "errqueue: errno %u code %u\n", err->ee_errno, err->ee_code); - }; + } tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info; tstamp -= (int64_t) glob_tstart; diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index cb0d1890a860..426d07875a48 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -103,8 +103,8 @@ FIXTURE(tls) FIXTURE_VARIANT(tls) { - u16 tls_version; - u16 cipher_type; + uint16_t tls_version; + uint16_t cipher_type; }; FIXTURE_VARIANT_ADD(tls, 12_gcm) @@ -133,7 +133,10 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha) FIXTURE_SETUP(tls) { - union tls_crypto_context tls12; + union { + struct tls12_crypto_info_aes_gcm_128 aes128; + struct tls12_crypto_info_chacha20_poly1305 chacha20; + } tls12; struct sockaddr_in addr; socklen_t len; int sfd, ret; @@ -143,14 +146,16 @@ FIXTURE_SETUP(tls) len = sizeof(addr); memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = variant->tls_version; - tls12.info.cipher_type = variant->cipher_type; switch (variant->cipher_type) { case TLS_CIPHER_CHACHA20_POLY1305: - tls12_sz = sizeof(tls12_crypto_info_chacha20_poly1305); + tls12_sz = sizeof(struct tls12_crypto_info_chacha20_poly1305); + tls12.chacha20.info.version = variant->tls_version; + tls12.chacha20.info.cipher_type = variant->cipher_type; break; case TLS_CIPHER_AES_GCM_128: - tls12_sz = sizeof(tls12_crypto_info_aes_gcm_128); + tls12_sz = sizeof(struct tls12_crypto_info_aes_gcm_128); + tls12.aes128.info.version = variant->tls_version; + tls12.aes128.info.cipher_type = variant->cipher_type; break; default: tls12_sz = 0; diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 490a8cca708a..fabb1d555ee5 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -26,6 +26,7 @@ #include <inttypes.h> #include <linux/errqueue.h> #include <linux/if_ether.h> +#include <linux/if_packet.h> #include <linux/ipv6.h> #include <linux/net_tstamp.h> #include <netdb.h> @@ -34,7 +35,6 @@ #include <netinet/ip.h> #include <netinet/udp.h> #include <netinet/tcp.h> -#include <netpacket/packet.h> #include <poll.h> #include <stdarg.h> #include <stdbool.h> @@ -495,12 +495,12 @@ static void do_test(int family, unsigned int report_opt) total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); - if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { if (family == PF_INET) total_len += sizeof(struct iphdr); else total_len += sizeof(struct ipv6hdr); - + } /* special case, only rawv6_sendmsg: * pass proto in sin6_port if not connected * also see ANK comment in net/ipv4/raw.c diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index ac2a30be9b32..f8a19f548ae9 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,14 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +# set global exit status, but never reset nonzero one. +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -44,7 +52,9 @@ run_one() { # Hack: let bg programs complete the startup sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_test() { @@ -87,8 +97,10 @@ run_one_nat() { sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? kill -INT $pid wait $(jobs -p) + return $ret } run_one_2sock() { @@ -110,7 +122,9 @@ run_one_2sock() { sleep 0.1 # first UDP GSO socket should be closed at this point ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_nat_test() { @@ -131,36 +145,54 @@ run_all() { local -r core_args="-l 4" local -r ipv4_args="${core_args} -4 -D 192.168.1.1" local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + ret=0 echo "ipv4" run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400" + check_err $? # explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1) # when GRO does not take place run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1" + check_err $? # the GSO packets are aggregated because: # * veth schedule napi after each xmit # * segmentation happens in BH context, veth napi poll is delayed after # the transmission of the last segment run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + check_err $? run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? echo "ipv6" run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" + check_err $? run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1" + check_err $? run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520" + check_err $? run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452" + check_err $? run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + check_err $? run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" + check_err $? + return $ret } if [ ! -f ../bpf/xdp_dummy.o ]; then @@ -180,3 +212,5 @@ elif [[ $1 == "__subprocess_2sock" ]]; then shift run_one_2sock $@ fi + +exit $? diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh new file mode 100755 index 000000000000..dbf0421986df --- /dev/null +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -0,0 +1,228 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project +# Thanks to David Ahern for help and advice on nettest modifications. +# +# Self-tests for IPv4 address extensions: the kernel's ability to accept +# certain traditionally unused or unallocated IPv4 addresses. For each kind +# of address, we test for interface assignment, ping, TCP, and forwarding. +# Must be run as root (to manipulate network namespaces and virtual +# interfaces). +# +# Things we test for here: +# +# * Currently the kernel accepts addresses in 0/8 and 240/4 as valid. +# +# * Notwithstanding that, 0.0.0.0 and 255.255.255.255 cannot be assigned. +# +# * Currently the kernel DOES NOT accept unicast use of the lowest +# address in an IPv4 subnet (e.g. 192.168.100.0/32 in 192.168.100.0/24). +# This is treated as a second broadcast address, for compatibility +# with 4.2BSD (!). +# +# * Currently the kernel DOES NOT accept unicast use of any of 127/8. +# +# * Currently the kernel DOES NOT accept unicast use of any of 224/4. +# +# These tests provide an easy way to flip the expected result of any +# of these behaviors for testing kernel patches that change them. + +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit 0 + fi +fi + +result=0 + +hide_output(){ exec 3>&1 4>&2 >/dev/null 2>/dev/null; } +show_output(){ exec >&3 2>&4; } + +show_result(){ + if [ $1 -eq 0 ]; then + printf "TEST: %-60s [ OK ]\n" "${2}" + else + printf "TEST: %-60s [FAIL]\n" "${2}" + result=1 + fi +} + +_do_segmenttest(){ + # Perform a simple set of link tests between a pair of + # IP addresses on a shared (virtual) segment, using + # ping and nettest. + # foo --- bar + # Arguments: ip_a ip_b prefix_length test_description + # + # Caller must set up foo-ns and bar-ns namespaces + # containing linked veth devices foo and bar, + # respectively. + + ip -n foo-ns address add $1/$3 dev foo || return 1 + ip -n foo-ns link set foo up || return 1 + ip -n bar-ns address add $2/$3 dev bar || return 1 + ip -n bar-ns link set bar up || return 1 + + ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N bar-ns -O foo-ns -r $1 || return 1 + nettest -B -N foo-ns -O bar-ns -r $2 || return 1 + + return 0 +} + +_do_route_test(){ + # Perform a simple set of gateway tests. + # + # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix + # host gateway host + # + # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description + # Displays test result and returns success or failure. + + # Caller must set up foo-ns, bar-ns, and router-ns + # containing linked veth devices foo-foo1, bar1-bar + # (foo in foo-ns, foo1 and bar1 in router-ns, and + # bar in bar-ns). + + ip -n foo-ns address add $1/$5 dev foo || return 1 + ip -n foo-ns link set foo up || return 1 + ip -n foo-ns route add default via $2 || return 1 + ip -n bar-ns address add $4/$5 dev bar || return 1 + ip -n bar-ns link set bar up || return 1 + ip -n bar-ns route add default via $3 || return 1 + ip -n router-ns address add $2/$5 dev foo1 || return 1 + ip -n router-ns link set foo1 up || return 1 + ip -n router-ns address add $3/$5 dev bar1 || return 1 + ip -n router-ns link set bar1 up || return 1 + + echo 1 | ip netns exec router-ns tee /proc/sys/net/ipv4/ip_forward + + ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec foo-ns timeout 2 ping -c 1 $4 || return 1 + ip netns exec bar-ns timeout 2 ping -c 1 $3 || return 1 + ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N bar-ns -O foo-ns -r $1 || return 1 + nettest -B -N foo-ns -O bar-ns -r $4 || return 1 + + return 0 +} + +segmenttest(){ + # Sets up veth link and tries to connect over it. + # Arguments: ip_a ip_b prefix_len test_description + hide_output + ip netns add foo-ns + ip netns add bar-ns + ip link add foo netns foo-ns type veth peer name bar netns bar-ns + + test_result=0 + _do_segmenttest "$@" || test_result=1 + + ip netns pids foo-ns | xargs -r kill -9 + ip netns pids bar-ns | xargs -r kill -9 + ip netns del foo-ns + ip netns del bar-ns + show_output + + # inverted tests will expect failure instead of success + [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result` + + show_result $test_result "$4" +} + +route_test(){ + # Sets up a simple gateway and tries to connect through it. + # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix + # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description + # Returns success or failure. + + hide_output + ip netns add foo-ns + ip netns add bar-ns + ip netns add router-ns + ip link add foo netns foo-ns type veth peer name foo1 netns router-ns + ip link add bar netns bar-ns type veth peer name bar1 netns router-ns + + test_result=0 + _do_route_test "$@" || test_result=1 + + ip netns pids foo-ns | xargs -r kill -9 + ip netns pids bar-ns | xargs -r kill -9 + ip netns pids router-ns | xargs -r kill -9 + ip netns del foo-ns + ip netns del bar-ns + ip netns del router-ns + + show_output + + # inverted tests will expect failure instead of success + [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result` + show_result $test_result "$6" +} + +echo "###########################################################################" +echo "Unicast address extensions tests (behavior of reserved IPv4 addresses)" +echo "###########################################################################" +# +# Test support for 240/4 +segmenttest 240.1.2.1 240.1.2.4 24 "assign and ping within 240/4 (1 of 2) (is allowed)" +segmenttest 250.100.2.1 250.100.30.4 16 "assign and ping within 240/4 (2 of 2) (is allowed)" +# +# Test support for 0/8 +segmenttest 0.1.2.17 0.1.2.23 24 "assign and ping within 0/8 (1 of 2) (is allowed)" +segmenttest 0.77.240.17 0.77.2.23 16 "assign and ping within 0/8 (2 of 2) (is allowed)" +# +# Even 255.255/16 is OK! +segmenttest 255.255.3.1 255.255.50.77 16 "assign and ping inside 255.255/16 (is allowed)" +# +# Or 255.255.255/24 +segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255/24 (is allowed)" +# +# Routing between different networks +route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)" +route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)" +# +# ============================================== +# ==== TESTS THAT CURRENTLY EXPECT FAILURE ===== +# ============================================== +expect_failure=true +# It should still not be possible to use 0.0.0.0 or 255.255.255.255 +# as a unicast address. Thus, these tests expect failure. +segmenttest 0.0.1.5 0.0.0.0 16 "assigning 0.0.0.0 (is forbidden)" +segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forbidden)" +# +# Test support for not having all of 127 be loopback +# Currently Linux does not allow this, so this should fail too +segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)" +# +# Test support for lowest address +# Currently Linux does not allow this, so this should fail too +segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)" +# +# Routing using lowest address as a gateway/endpoint +# Currently Linux does not allow this, so this should fail too +route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)" +# +# Test support for unicast use of class D +# Currently Linux does not allow this, so this should fail too +segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)" +# +# Routing using class D as a gateway +route_test 225.1.42.1 225.1.42.2 9.8.7.6 9.8.7.1 24 "routing using class D (is forbidden)" +# +# Routing using 127/8 +# Currently Linux does not allow this, so this should fail too +route_test 127.99.2.3 127.99.2.4 200.1.2.3 200.1.2.4 24 "routing using 127/8 (is forbidden)" +# +unset expect_failure +# ===================================================== +# ==== END OF TESTS THAT CURRENTLY EXPECT FAILURE ===== +# ===================================================== +exit ${result} diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 7a1bf94c5bd3..bdf450eaf60c 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -202,7 +202,7 @@ check_xfrm() { # 1: iptables -m policy rule count != 0 rval=$1 ip=$2 - lret=0 + local lret=0 ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null @@ -287,6 +287,47 @@ check_hthresh_repeat() return 0 } +# insert non-overlapping policies in a random order and check that +# all of them can be fetched using the traffic selectors. +check_random_order() +{ + local ns=$1 + local log=$2 + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + if ! ip -net $ns xfrm policy get dst $j.0.0.0/24 dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + ip -net $ns xfrm policy add dst $addr dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + if ! ip -net $ns xfrm policy get dst $addr dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + ip -net $ns xfrm policy flush + + echo "PASS: $log" + return 0 +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" @@ -438,6 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal" check_hthresh_repeat "policies with repeated htresh change" +check_random_order ns3 "policies inserted in random order" + for i in 1 2 3 4;do ip netns del ns$i;done exit $ret diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index a374e10ef506..3006a8e5b41a 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -4,7 +4,8 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh nft_meta.sh + nft_queue.sh nft_meta.sh \ + ipip-conntrack-mtu.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh new file mode 100755 index 000000000000..4a6f5c3b3215 --- /dev/null +++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Conntrack needs to reassemble fragments in order to have complete +# packets for rule matching. Reassembly can lead to packet loss. + +# Consider the following setup: +# +--------+ +---------+ +--------+ +# |Router A|-------|Wanrouter|-------|Router B| +# | |.IPIP..| |..IPIP.| | +# +--------+ +---------+ +--------+ +# / mtu 1400 \ +# / \ +#+--------+ +--------+ +#|Client A| |Client B| +#| | | | +#+--------+ +--------+ + +# Router A and Router B use IPIP tunnel interfaces to tunnel traffic +# between Client A and Client B over WAN. Wanrouter has MTU 1400 set +# on its interfaces. + +rnd=$(mktemp -u XXXXXXXX) +rx=$(mktemp) + +r_a="ns-ra-$rnd" +r_b="ns-rb-$rnd" +r_w="ns-rw-$rnd" +c_a="ns-ca-$rnd" +c_b="ns-cb-$rnd" + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "iptables --version" "run test without iptables" +checktool "ip -Version" "run test without ip tool" +checktool "which nc" "run test without nc (netcat)" +checktool "ip netns add ${r_a}" "create net namespace" + +for n in ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns add ${n} +done + +cleanup() { + for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns del ${n} + done + rm -f ${rx} +} + +trap cleanup EXIT + +test_path() { + msg="$1" + + ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null & + + sleep 1 + for i in 1 2 3; do + head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000 + done + + wait + + bytes=$(wc -c < ${rx}) + + if [ $bytes -eq 1400 ];then + echo "OK: PMTU $msg connection tracking" + else + echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" + exit 1 + fi +} + +# Detailed setup for Router A +# --------------------------- +# Interfaces: +# eth0: 10.2.2.1/24 +# eth1: 192.168.10.1/24 +# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 +# Routes: +# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) +# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w} +ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a} + +l_addr="10.2.2.1" +r_addr="10.4.4.1" +ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_a} link set $dev up +done + +ip -net ${r_a} addr add 10.2.2.1/24 dev veth0 +ip -net ${r_a} addr add 192.168.10.1/24 dev veth1 + +ip -net ${r_a} route add 192.168.20.0/24 dev ipip0 +ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254 + +ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Detailed setup for Router B +# --------------------------- +# Interfaces: +# eth0: 10.4.4.1/24 +# eth1: 192.168.20.1/24 +# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 +# Routes: +# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) +# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w} +ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b} + +l_addr="10.4.4.1" +r_addr="10.2.2.1" + +ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_b} link set $dev up +done + +ip -net ${r_b} addr add 10.4.4.1/24 dev veth0 +ip -net ${r_b} addr add 192.168.20.1/24 dev veth1 + +ip -net ${r_b} route add 192.168.10.0/24 dev ipip0 +ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254 +ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Client A +ip -net ${c_a} addr add 192.168.10.2/24 dev veth0 +ip -net ${c_a} link set dev lo up +ip -net ${c_a} link set dev veth0 up +ip -net ${c_a} route add default via 192.168.10.1 + +# Client A +ip -net ${c_b} addr add 192.168.20.2/24 dev veth0 +ip -net ${c_b} link set dev veth0 up +ip -net ${c_b} link set dev lo up +ip -net ${c_b} route add default via 192.168.20.1 + +# Wan +ip -net ${r_w} addr add 10.2.2.254/24 dev veth0 +ip -net ${r_w} addr add 10.4.4.254/24 dev veth1 + +ip -net ${r_w} link set dev lo up +ip -net ${r_w} link set dev veth0 up mtu 1400 +ip -net ${r_w} link set dev veth1 up mtu 1400 + +ip -net ${r_a} link set dev veth0 mtu 1400 +ip -net ${r_b} link set dev veth0 mtu 1400 + +ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Path MTU discovery +# ------------------ +# Running tracepath from Client A to Client B shows PMTU discovery is working +# as expected: +# +# clienta:~# tracepath 192.168.20.2 +# 1?: [LOCALHOST] pmtu 1500 +# 1: 192.168.10.1 0.867ms +# 1: 192.168.10.1 0.302ms +# 2: 192.168.10.1 0.312ms pmtu 1480 +# 2: no reply +# 3: 192.168.10.1 0.510ms pmtu 1380 +# 3: 192.168.20.2 2.320ms reached +# Resume: pmtu 1380 hops 3 back 3 + +# ip netns exec ${c_a} traceroute --mtu 192.168.20.2 + +# Router A has learned PMTU (1400) to Router B from Wanrouter. +# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B +# from Router A. + +#Send large UDP packet +#--------------------- +#Now we send a 1400 bytes UDP packet from Client A to Client B: + +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000 +test_path "without" + +# The IPv4 stack on Client A already knows the PMTU to Client B, so the +# UDP packet is sent as two fragments (1380 + 20). Router A forwards the +# fragments between eth1 and ipip0. The fragments fit into the tunnel and +# reach their destination. + +#When sending the large UDP packet again, Router A now reassembles the +#fragments before routing the packet over ipip0. The resulting IPIP +#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is +#dropped on Router A before sending. + +ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW +test_path "with" diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh index edf0a48da6bf..bf6b9626c7dd 100755 --- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh +++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh @@ -94,7 +94,13 @@ check_for_helper() local message=$2 local port=$3 - ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' + if echo $message |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' if [ $? -ne 0 ] ; then echo "FAIL: ${netns} did not show attached helper $message" 1>&2 ret=1 @@ -111,8 +117,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ip $msg" $port check_for_helper "$ns2" "ip $msg" $port @@ -128,8 +134,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ipv6 $msg" $port check_for_helper "$ns2" "ipv6 $msg" $port diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index 087f0e6e71ce..f33154c04d34 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -23,7 +23,7 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo trap cleanup EXIT -currentyear=$(date +%G) +currentyear=$(date +%Y) lastyear=$((currentyear-1)) ip netns exec "$ns0" nft -f /dev/stdin <<EOF table inet filter { diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index cb53a8b777e6..c25cf7cd45e9 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -443,7 +443,6 @@ int test_alignment_handler_integer(void) LOAD_DFORM_TEST(ldu); LOAD_XFORM_TEST(ldx); LOAD_XFORM_TEST(ldux); - LOAD_DFORM_TEST(lmw); STORE_DFORM_TEST(stb); STORE_XFORM_TEST(stbx); STORE_DFORM_TEST(stbu); @@ -462,7 +461,11 @@ int test_alignment_handler_integer(void) STORE_XFORM_TEST(stdx); STORE_DFORM_TEST(stdu); STORE_XFORM_TEST(stdux); + +#ifdef __BIG_ENDIAN__ + LOAD_DFORM_TEST(lmw); STORE_DFORM_TEST(stmw); +#endif return rc; } diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh index 0d783e1065c8..442b666ccdb5 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -1,28 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only -KSELFTESTS_SKIP=4 - . ./eeh-functions.sh -if ! eeh_supported ; then - echo "EEH not supported on this system, skipping" - exit $KSELFTESTS_SKIP; -fi - -if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ - [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then - echo "debugfs EEH testing files are missing. Is debugfs mounted?" - exit $KSELFTESTS_SKIP; -fi +eeh_test_prep # NB: may exit pre_lspci=`mktemp` lspci > $pre_lspci -# Bump the max freeze count to something absurd so we don't -# trip over it while breaking things. -echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes - # record the devices that we break in here. Assuming everything # goes to plan we should get them back once the recover process # is finished. @@ -30,34 +15,16 @@ devices="" # Build up a list of candidate devices. for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do - # skip bridges since we can't recover them (yet...) - if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then - echo "$dev, Skipped: bridge" + if ! eeh_can_break $dev ; then continue; fi - # Skip VFs for now since we don't have a reliable way - # to break them. + # Skip VFs for now since we don't have a reliable way to break them. if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then echo "$dev, Skipped: virtfn" continue; fi - if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then - echo "$dev, Skipped: ahci doesn't support recovery" - continue - fi - - # Don't inject errosr into an already-frozen PE. This happens with - # PEs that contain multiple PCI devices (e.g. multi-function cards) - # and injecting new errors during the recovery process will probably - # result in the recovery failing and the device being marked as - # failed. - if ! pe_ok $dev ; then - echo "$dev, Skipped: Bad initial PE state" - continue; - fi - echo "$dev, Added" # Add to this list of device to check @@ -86,5 +53,5 @@ echo "$failed devices failed to recover ($dev_count tested)" lspci | diff -u $pre_lspci - rm -f $pre_lspci -test "$failed" == 0 +test "$failed" -eq 0 exit $? diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh index 00dc32c0ed75..70daa3925dcb 100755..100644 --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -1,6 +1,12 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only +export KSELFTESTS_SKIP=4 + +log() { + echo >/dev/stderr $* +} + pe_ok() { local dev="$1" local path="/sys/bus/pci/devices/$dev/eeh_pe_state" @@ -39,6 +45,52 @@ eeh_supported() { grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh } +eeh_test_prep() { + if ! eeh_supported ; then + echo "EEH not supported on this system, skipping" + exit $KSELFTESTS_SKIP; + fi + + if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ + [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then + log "debugfs EEH testing files are missing. Is debugfs mounted?" + exit $KSELFTESTS_SKIP; + fi + + # Bump the max freeze count to something absurd so we don't + # trip over it while breaking things. + echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes +} + +eeh_can_break() { + # skip bridges since we can't recover them (yet...) + if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then + log "$dev, Skipped: bridge" + return 1; + fi + + # The ahci driver doesn't support error recovery. If the ahci device + # happens to be hosting the root filesystem, and then we go and break + # it the system will generally go down. We should probably fix that + # at some point + if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then + log "$dev, Skipped: ahci doesn't support recovery" + return 1; + fi + + # Don't inject errosr into an already-frozen PE. This happens with + # PEs that contain multiple PCI devices (e.g. multi-function cards) + # and injecting new errors during the recovery process will probably + # result in the recovery failing and the device being marked as + # failed. + if ! pe_ok $dev ; then + log "$dev, Skipped: Bad initial PE state" + return 1; + fi + + return 0 +} + eeh_one_dev() { local dev="$1" @@ -46,7 +98,7 @@ eeh_one_dev() { # testing so check that the argument is a well-formed sysfs device # name. if ! test -e /sys/bus/pci/devices/$dev/ ; then - echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" + log "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" return 1; fi @@ -70,16 +122,124 @@ eeh_one_dev() { if pe_ok $dev ; then break; fi - echo "$dev, waited $i/${max_wait}" + log "$dev, waited $i/${max_wait}" sleep 1 done if ! pe_ok $dev ; then - echo "$dev, Failed to recover!" + log "$dev, Failed to recover!" return 1; fi - echo "$dev, Recovered after $i seconds" + log "$dev, Recovered after $i seconds" return 0; } +eeh_has_driver() { + test -e /sys/bus/pci/devices/$1/driver; + return $? +} + +eeh_can_recover() { + # we'll get an IO error if the device's current driver doesn't support + # error recovery + echo $1 > '/sys/kernel/debug/powerpc/eeh_dev_can_recover' 2>/dev/null + + return $? +} + +eeh_find_all_pfs() { + devices="" + + # SR-IOV on pseries requires hypervisor support, so check for that + is_pseries="" + if grep -q pSeries /proc/cpuinfo ; then + if [ ! -f /proc/device-tree/rtas/ibm,open-sriov-allow-unfreeze ] || + [ ! -f /proc/device-tree/rtas/ibm,open-sriov-map-pe-number ] ; then + return 1; + fi + + is_pseries="true" + fi + + for dev in `ls -1 /sys/bus/pci/devices/` ; do + sysfs="/sys/bus/pci/devices/$dev" + if [ ! -e "$sysfs/sriov_numvfs" ] ; then + continue + fi + + # skip unsupported PFs on pseries + if [ -z "$is_pseries" ] && + [ ! -f "$sysfs/of_node/ibm,is-open-sriov-pf" ] && + [ ! -f "$sysfs/of_node/ibm,open-sriov-vf-bar-info" ] ; then + continue; + fi + + # no driver, no vfs + if ! eeh_has_driver $dev ; then + continue + fi + + devices="$devices $dev" + done + + if [ -z "$devices" ] ; then + return 1; + fi + + echo $devices + return 0; +} + +# attempts to enable one VF on each PF so we can do VF specific tests. +# stdout: list of enabled VFs, one per line +# return code: 0 if vfs are found, 1 otherwise +eeh_enable_vfs() { + pf_list="$(eeh_find_all_pfs)" + + vfs=0 + for dev in $pf_list ; do + pf_sysfs="/sys/bus/pci/devices/$dev" + + # make sure we have a single VF + echo 0 > "$pf_sysfs/sriov_numvfs" + echo 1 > "$pf_sysfs/sriov_numvfs" + if [ "$?" != 0 ] ; then + log "Unable to enable VFs on $pf, skipping" + continue; + fi + + vf="$(basename $(realpath "$pf_sysfs/virtfn0"))" + if [ $? != 0 ] ; then + log "unable to find enabled vf on $pf" + echo 0 > "$pf_sysfs/sriov_numvfs" + continue; + fi + + if ! eeh_can_break $vf ; then + log "skipping " + + echo 0 > "$pf_sysfs/sriov_numvfs" + continue; + fi + + vfs="$((vfs + 1))" + echo $vf + done + + test "$vfs" != 0 + return $? +} + +eeh_disable_vfs() { + pf_list="$(eeh_find_all_pfs)" + if [ -z "$pf_list" ] ; then + return 1; + fi + + for dev in $pf_list ; do + echo 0 > "/sys/bus/pci/devices/$dev/sriov_numvfs" + done + + return 0; +} diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh new file mode 100755 index 000000000000..874c11953bb6 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +. ./eeh-functions.sh + +eeh_test_prep # NB: may exit + +vf_list="$(eeh_enable_vfs)"; +if $? != 0 ; then + log "No usable VFs found. Skipping EEH unaware VF test" + exit $KSELFTESTS_SKIP; +fi + +log "Enabled VFs: $vf_list" + +tested=0 +passed=0 +for vf in $vf_list ; do + log "Testing $vf" + + if ! eeh_can_recover $vf ; then + log "Driver for $vf doesn't support error recovery, skipping" + continue; + fi + + tested="$((tested + 1))" + + log "Breaking $vf..." + if ! eeh_one_dev $vf ; then + log "$vf failed to recover" + continue; + fi + + passed="$((passed + 1))" +done + +eeh_disable_vfs + +if [ "$tested" == 0 ] ; then + echo "No VFs with EEH aware drivers found, skipping" + exit $KSELFTESTS_SKIP +fi + +test "$failed" != 0 +exit $?; diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh new file mode 100755 index 000000000000..8a4c147b9d43 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +. ./eeh-functions.sh + +eeh_test_prep # NB: may exit + +vf_list="$(eeh_enable_vfs)"; +if $? != 0 ; then + log "No usable VFs found. Skipping EEH unaware VF test" + exit $KSELFTESTS_SKIP; +fi + +log "Enabled VFs: $vf_list" + +failed=0 +for vf in $vf_list ; do + log "Testing $vf" + + if eeh_can_recover $vf ; then + log "Driver for $vf supports error recovery. Unbinding..." + echo "$vf" > /sys/bus/pci/devices/$vf/driver/unbind + fi + + log "Breaking $vf..." + if ! eeh_one_dev $vf ; then + log "$vf failed to recover" + failed="$((failed + 1))" + fi +done + +eeh_disable_vfs + +test "$failed" != 0 +exit $?; diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c index 9e5c7f3f498a..0af4f02669a1 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c @@ -290,5 +290,5 @@ static int test(void) int main(void) { - test_harness(test, "pkey_exec_prot"); + return test_harness(test, "pkey_exec_prot"); } diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c index 4f815d7c1214..2db76e56d4cb 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c +++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c @@ -329,5 +329,5 @@ static int test(void) int main(void) { - test_harness(test, "pkey_siginfo"); + return test_harness(test, "pkey_siginfo"); } diff --git a/tools/testing/selftests/rcutorture/bin/config2csv.sh b/tools/testing/selftests/rcutorture/bin/config2csv.sh new file mode 100755 index 000000000000..d5a16631b16e --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/config2csv.sh @@ -0,0 +1,67 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Create a spreadsheet from torture-test Kconfig options and kernel boot +# parameters. Run this in the directory containing the scenario files. +# +# Usage: config2csv path.csv [ "scenario1 scenario2 ..." ] +# +# By default, this script will take the list of scenarios from the CFLIST +# file in that directory, otherwise it will consider only the scenarios +# specified on the command line. It will examine each scenario's file +# and also its .boot file, if present, and create a column in the .csv +# output file. Note that "CFLIST" is a synonym for all the scenarios in the +# CFLIST file, which allows easy comparison of those scenarios with selected +# scenarios such as BUSTED that are normally omitted from CFLIST files. + +csvout=${1} +if test -z "$csvout" +then + echo "Need .csv output file as first argument." + exit 1 +fi +shift +defaultconfigs="`tr '\012' ' ' < CFLIST`" +if test "$#" -eq 0 +then + scenariosarg=$defaultconfigs +else + scenariosarg=$* +fi +scenarios="`echo $scenariosarg | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`" + +T=/tmp/config2latex.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T + +cat << '---EOF---' >> $T/p.awk +END { +---EOF--- +for i in $scenarios +do + echo ' s["'$i'"] = 1;' >> $T/p.awk + grep -v '^#' < $i | grep -v '^ *$' > $T/p + if test -r $i.boot + then + tr -s ' ' '\012' < $i.boot | grep -v '^#' >> $T/p + fi + sed -e 's/^[^=]*$/&=?/' < $T/p | + sed -e 's/^\([^=]*\)=\(.*\)$/\tp["\1:'"$i"'"] = "\2";\n\tc["\1"] = 1;/' >> $T/p.awk +done +cat << '---EOF---' >> $T/p.awk + ns = asorti(s, ss); + nc = asorti(c, cs); + for (j = 1; j <= ns; j++) + printf ",\"%s\"", ss[j]; + printf "\n"; + for (i = 1; i <= nc; i++) { + printf "\"%s\"", cs[i]; + for (j = 1; j <= ns; j++) { + printf ",\"%s\"", p[cs[i] ":" ss[j]]; + } + printf "\n"; + } +} +---EOF--- +awk -f $T/p.awk < /dev/null > $T/p.csv +cp $T/p.csv $csvout diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index 80ae7f08b363..e6a132df6172 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -14,4 +14,5 @@ egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls o grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | grep -v 'Warning: unable to open an initial console' | +grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr.*the init process!' | grep -v 'NOHZ tick-stop error: Non-RCU local softirq work is pending, handler' diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index 82663495fb38..c35ba24f994c 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -108,6 +108,39 @@ configfrag_hotplug_cpu () { grep -q '^CONFIG_HOTPLUG_CPU=y$' "$1" } +# get_starttime +# +# Returns a cookie identifying the current time. +get_starttime () { + awk 'BEGIN { print systime() }' < /dev/null +} + +# get_starttime_duration starttime +# +# Given the return value from get_starttime, compute a human-readable +# string denoting the time since get_starttime. +get_starttime_duration () { + awk -v starttime=$1 ' + BEGIN { + ts = systime() - starttime; + tm = int(ts / 60); + th = int(ts / 3600); + td = int(ts / 86400); + d = td; + h = th - td * 24; + m = tm - th * 60; + s = ts - tm * 60; + if (d >= 1) + printf "%dd %d:%02d:%02d\n", d, h, m, s + else if (h >= 1) + printf "%d:%02d:%02d\n", h, m, s + else if (m >= 1) + printf "%d:%02d.0\n", m, s + else + print s " seconds" + }' < /dev/null +} + # identify_boot_image qemu-cmd # # Returns the relative path to the kernel build image. This will be @@ -170,6 +203,7 @@ identify_qemu () { # and the TORTURE_QEMU_INTERACTIVE environment variable. identify_qemu_append () { echo debug_boot_weak_hash + echo panic=-1 local console=ttyS0 case "$1" in qemu-system-x86_64|qemu-system-i386) @@ -232,7 +266,7 @@ identify_qemu_args () { # Returns the number of virtual CPUs available to the aggregate of the # guest OSes. identify_qemu_vcpus () { - lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://' -e 's/[ ]*//g' + getconf _NPROCESSORS_ONLN } # print_bug diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh index 6f50722f251f..0670841122d8 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -39,12 +39,14 @@ done if test -n "$files" then $editor $files + editorret=1 else echo No build errors. fi if grep -q -e "--buildonly" < ${rundir}/log then echo Build-only run, no console logs to check. + exit $editorret fi # Find console logs with errors @@ -62,5 +64,10 @@ then exit 1 else echo No errors in console logs. - exit 0 + if test -n "$editorret" + then + exit $editorret + else + exit 0 + fi fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 840a4679a0d7..47cf4db10896 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -87,15 +87,16 @@ do fi done EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1 -ret=$? builderrors="`tr ' ' '\012' < $T | grep -c '/Make.out.diags'`" if test "$builderrors" -gt 0 then echo $builderrors runs with build errors. + ret=1 fi runerrors="`tr ' ' '\012' < $T | grep -c '/console.log.diags'`" if test "$runerrors" -gt 0 then echo $runerrors runs with runtime errors. + ret=2 fi exit $ret diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 3cd03d01857c..536d103ef166 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -125,7 +125,6 @@ seconds=$4 qemu_args=$5 boot_args=$6 -kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` if test -z "$TORTURE_BUILDONLY" then echo ' ---' `date`: Starting kernel @@ -158,6 +157,8 @@ then boot_args="$boot_args $TORTURE_BOOT_GDB_ARG" fi echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd +echo "# TORTURE_SHUTDOWN_GRACE=$TORTURE_SHUTDOWN_GRACE" >> $resdir/qemu-cmd +echo "# seconds=$seconds" >> $resdir/qemu-cmd if test -n "$TORTURE_BUILDONLY" then @@ -174,6 +175,7 @@ echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log # Attempt to run qemu +kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` ( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & commandcompleted=0 if test -z "$TORTURE_KCONFIG_GDB_ARG" @@ -209,7 +211,7 @@ do if test -n "$TORTURE_KCONFIG_GDB_ARG" then : - elif test $kruntime -ge $seconds || test -f "$TORTURE_STOPFILE" + elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1" then break; fi @@ -252,16 +254,16 @@ then fi if test $commandcompleted -eq 0 -a -n "$qemu_pid" then - if ! test -f "$TORTURE_STOPFILE" + if ! test -f "$resdir/../STOP.1" then echo Grace period for qemu job at pid $qemu_pid fi oldline="`tail $resdir/console.log`" while : do - if test -f "$TORTURE_STOPFILE" + if test -f "$resdir/../STOP.1" then - echo "PID $qemu_pid killed due to run STOP request" >> $resdir/Warnings 2>&1 + echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1 kill -KILL $qemu_pid break fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 45d07b7b69f5..8d3c99b35e06 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -47,6 +47,9 @@ cpus=0 ds=`date +%Y.%m.%d-%H.%M.%S` jitter="-1" +startdate="`date`" +starttime="`get_starttime`" + usage () { echo "Usage: $scriptname optional arguments:" echo " --allcpus" @@ -57,7 +60,7 @@ usage () { echo " --cpus N" echo " --datestamp string" echo " --defconfig string" - echo " --dryrun sched|script" + echo " --dryrun batches|sched|script" echo " --duration minutes | <seconds>s | <hours>h | <days>d" echo " --gdb" echo " --help" @@ -85,7 +88,7 @@ do ;; --bootargs|--bootarg) checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--' - TORTURE_BOOTARGS="$2" + TORTURE_BOOTARGS="$TORTURE_BOOTARGS $2" shift ;; --bootimage) @@ -97,8 +100,8 @@ do TORTURE_BUILDONLY=1 ;; --configs|--config) - checkarg --configs "(list of config files)" "$#" "$2" '^[^/]*$' '^--' - configs="$2" + checkarg --configs "(list of config files)" "$#" "$2" '^[^/]\+$' '^--' + configs="$configs $2" shift ;; --cpus) @@ -113,7 +116,7 @@ do shift ;; --datestamp) - checkarg --datestamp "(relative pathname)" "$#" "$2" '^[^/]*$' '^--' + checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._-/]*$' '^--' ds=$2 shift ;; @@ -123,7 +126,7 @@ do shift ;; --dryrun) - checkarg --dryrun "sched|script" $# "$2" 'sched\|script' '^--' + checkarg --dryrun "batches|sched|script" $# "$2" 'batches\|sched\|script' '^--' dryrun=$2 shift ;; @@ -162,18 +165,18 @@ do ;; --kconfig|--kconfigs) checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$' - TORTURE_KCONFIG_ARG="$2" + TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; --kasan) TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG ;; --kcsan) - TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG + TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_INTERRUPT_WATCHER=y CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG ;; --kmake-arg|--kmake-args) checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' - TORTURE_KMAKE_ARG="$2" + TORTURE_KMAKE_ARG="`echo "$TORTURE_KMAKE_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; --mac) @@ -191,7 +194,7 @@ do ;; --qemu-args|--qemu-arg) checkarg --qemu-args "(qemu arguments)" $# "$2" '^-' '^error' - TORTURE_QEMU_ARG="$2" + TORTURE_QEMU_ARG="`echo "$TORTURE_QEMU_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; --qemu-cmd) @@ -232,7 +235,7 @@ do shift done -if test -z "$TORTURE_INITRD" || tools/testing/selftests/rcutorture/bin/mkinitrd.sh +if test -n "$dryrun" || test -z "$TORTURE_INITRD" || tools/testing/selftests/rcutorture/bin/mkinitrd.sh then : else @@ -283,19 +286,34 @@ then exit 1 fi fi -for CF1 in $configs_derep +echo 'BEGIN {' > $T/cfgcpu.awk +for CF1 in `echo $configs_derep | tr -s ' ' '\012' | sort -u` do if test -f "$CONFIGFRAG/$CF1" then - cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1` + if echo "$TORTURE_KCONFIG_ARG" | grep -q '\<CONFIG_NR_CPUS=' + then + echo "$TORTURE_KCONFIG_ARG" | tr -s ' ' | tr ' ' '\012' > $T/KCONFIG_ARG + cpu_count=`configNR_CPUS.sh $T/KCONFIG_ARG` + else + cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1` + fi cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"` cpu_count=`configfrag_boot_maxcpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"` - echo $CF1 $cpu_count >> $T/cfgcpu + echo 'scenariocpu["'"$CF1"'"] = '"$cpu_count"';' >> $T/cfgcpu.awk else echo "The --configs file $CF1 does not exist, terminating." exit 1 fi done +cat << '___EOF___' >> $T/cfgcpu.awk +} +{ + for (i = 1; i <= NF; i++) + print $i, scenariocpu[$i]; +} +___EOF___ +echo $configs_derep | awk -f $T/cfgcpu.awk > $T/cfgcpu sort -k2nr $T/cfgcpu -T="$T" > $T/cfgcpu.sort # Use a greedy bin-packing algorithm, sorting the list accordingly. @@ -315,11 +333,10 @@ END { batch = 0; nc = -1; - # Each pass through the following loop creates on test batch - # that can be executed concurrently given ncpus. Note that a - # given test that requires more than the available CPUs will run in - # their own batch. Such tests just have to make do with what - # is available. + # Each pass through the following loop creates on test batch that + # can be executed concurrently given ncpus. Note that a given test + # that requires more than the available CPUs will run in its own + # batch. Such tests just have to make do with what is available. while (nc != ncpus) { batch++; nc = ncpus; @@ -375,9 +392,9 @@ if ! test -e $resdir then mkdir -p "$resdir" || : fi -mkdir $resdir/$ds +mkdir -p $resdir/$ds TORTURE_RESDIR="$resdir/$ds"; export TORTURE_RESDIR -TORTURE_STOPFILE="$resdir/$ds/STOP"; export TORTURE_STOPFILE +TORTURE_STOPFILE="$resdir/$ds/STOP.1"; export TORTURE_STOPFILE echo Results directory: $resdir/$ds echo $scriptname $args touch $resdir/$ds/log @@ -517,14 +534,19 @@ END { dump(first, i, batchnum); }' >> $T/script +cat << '___EOF___' >> $T/script +echo | tee -a $TORTURE_RESDIR/log +echo | tee -a $TORTURE_RESDIR/log +echo " --- `date` Test summary:" | tee -a $TORTURE_RESDIR/log +___EOF___ cat << ___EOF___ >> $T/script -echo -echo -echo " --- `date` Test summary:" -echo Results directory: $resdir/$ds -kcsan-collapse.sh $resdir/$ds -kvm-recheck.sh $resdir/$ds +echo Results directory: $resdir/$ds | tee -a $resdir/$ds/log +kcsan-collapse.sh $resdir/$ds | tee -a $resdir/$ds/log +kvm-recheck.sh $resdir/$ds > $T/kvm-recheck.sh.out 2>&1 ___EOF___ +echo 'ret=$?' >> $T/script +echo "cat $T/kvm-recheck.sh.out | tee -a $resdir/$ds/log" >> $T/script +echo 'exit $ret' >> $T/script if test "$dryrun" = script then @@ -533,13 +555,34 @@ then elif test "$dryrun" = sched then # Extract the test run schedule from the script. - egrep 'Start batch|Starting build\.' $T/script | - grep -v ">>" | + egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" | sed -e 's/:.*$//' -e 's/^echo //' + nbuilds="`grep 'Starting build\.' $T/script | + grep -v ">>" | sed -e 's/:.*$//' -e 's/^echo //' | + awk '{ print $1 }' | grep -v '\.' | wc -l`" + echo Total number of builds: $nbuilds + nbatches="`grep 'Start batch' $T/script | grep -v ">>" | wc -l`" + echo Total number of batches: $nbatches exit 0 +elif test "$dryrun" = batches +then + # Extract the tests and their batches from the script. + egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" | + sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' | + awk ' + /^----Start/ { + batchno = $3; + next; + } + { + print batchno, $1, $2 + }' else # Not a dryrun, so run the script. - sh $T/script + bash $T/script + ret=$? + echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a $resdir/$ds/log + exit $ret fi # Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh index 38e424d2392c..70d62fd0d31d 100755 --- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -70,7 +70,7 @@ if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ # architecture supported by nolibc ${CROSS_COMPILE}gcc -fno-asynchronous-unwind-tables -fno-ident \ -nostdlib -include ../../../../include/nolibc/nolibc.h \ - -lgcc -s -static -Os -o init init.c + -s -static -Os -o init init.c -lgcc else ${CROSS_COMPILE}gcc -s -static -Os -o init init.c fi diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh index 09155c15ea65..9313e5065ae9 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-build.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh @@ -21,7 +21,7 @@ mkdir $T . functions.sh -if grep -q CC < $F || test -n "$TORTURE_TRUST_MAKE" +if grep -q CC < $F || test -n "$TORTURE_TRUST_MAKE" || grep -qe --trust-make < `dirname $F`/../log then : else diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 263b1be50008..9f624bd53c27 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -128,7 +128,7 @@ then then summary="$summary Badness: $n_badness" fi - n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'` + n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | egrep -c 'WARNING:|Warn'` if test "$n_warn" -ne 0 then summary="$summary Warnings: $n_warn" diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh new file mode 100755 index 000000000000..ad7525b7ac29 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -0,0 +1,442 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Run a series of torture tests, intended for overnight or +# longer timeframes, and also for large systems. +# +# Usage: torture.sh [ options ] +# +# Copyright (C) 2020 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +scriptname=$0 +args="$*" + +KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM +PATH=${KVM}/bin:$PATH; export PATH +. functions.sh + +TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" +MAKE_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS*2)) +HALF_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS/2)) +if test "$HALF_ALLOTED_CPUS" -lt 1 +then + HALF_ALLOTED_CPUS=1 +fi +VERBOSE_BATCH_CPUS=$((TORTURE_ALLOTED_CPUS/16)) +if test "$VERBOSE_BATCH_CPUS" -lt 2 +then + VERBOSE_BATCH_CPUS=0 +fi + +# Configurations/scenarios. +configs_rcutorture= +configs_locktorture= +configs_scftorture= +kcsan_kmake_args= + +# Default compression, duration, and apportionment. +compress_kasan_vmlinux="`identify_qemu_vcpus`" +duration_base=10 +duration_rcutorture_frac=7 +duration_locktorture_frac=1 +duration_scftorture_frac=2 + +# "yes" or "no" parameters +do_allmodconfig=yes +do_rcutorture=yes +do_locktorture=yes +do_scftorture=yes +do_rcuscale=yes +do_refscale=yes +do_kvfree=yes +do_kasan=yes +do_kcsan=no + +# doyesno - Helper function for yes/no arguments +function doyesno () { + if test "$1" = "$2" + then + echo yes + else + echo no + fi +} + +usage () { + echo "Usage: $scriptname optional arguments:" + echo " --compress-kasan-vmlinux concurrency" + echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\"" + echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\"" + echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" + echo " --doall" + echo " --doallmodconfig / --do-no-allmodconfig" + echo " --do-kasan / --do-no-kasan" + echo " --do-kcsan / --do-no-kcsan" + echo " --do-kvfree / --do-no-kvfree" + echo " --do-locktorture / --do-no-locktorture" + echo " --do-none" + echo " --do-rcuscale / --do-no-rcuscale" + echo " --do-rcutorture / --do-no-rcutorture" + echo " --do-refscale / --do-no-refscale" + echo " --do-scftorture / --do-no-scftorture" + echo " --duration [ <minutes> | <hours>h | <days>d ]" + echo " --kcsan-kmake-arg kernel-make-arguments" + exit 1 +} + +while test $# -gt 0 +do + case "$1" in + --compress-kasan-vmlinux) + checkarg --compress-kasan-vmlinux "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error' + compress_kasan_vmlinux=$2 + shift + ;; + --config-rcutorture|--configs-rcutorture) + checkarg --configs-rcutorture "(list of config files)" "$#" "$2" '^[^/]\+$' '^--' + configs_rcutorture="$configs_rcutorture $2" + shift + ;; + --config-locktorture|--configs-locktorture) + checkarg --configs-locktorture "(list of config files)" "$#" "$2" '^[^/]\+$' '^--' + configs_locktorture="$configs_locktorture $2" + shift + ;; + --config-scftorture|--configs-scftorture) + checkarg --configs-scftorture "(list of config files)" "$#" "$2" '^[^/]\+$' '^--' + configs_scftorture="$configs_scftorture $2" + shift + ;; + --doall) + do_allmodconfig=yes + do_rcutorture=yes + do_locktorture=yes + do_scftorture=yes + do_rcuscale=yes + do_refscale=yes + do_kvfree=yes + do_kasan=yes + do_kcsan=yes + ;; + --do-allmodconfig|--do-no-allmodconfig) + do_allmodconfig=`doyesno "$1" --do-allmodconfig` + ;; + --do-kasan|--do-no-kasan) + do_kasan=`doyesno "$1" --do-kasan` + ;; + --do-kcsan|--do-no-kcsan) + do_kcsan=`doyesno "$1" --do-kcsan` + ;; + --do-kvfree|--do-no-kvfree) + do_kvfree=`doyesno "$1" --do-kvfree` + ;; + --do-locktorture|--do-no-locktorture) + do_locktorture=`doyesno "$1" --do-locktorture` + ;; + --do-none) + do_allmodconfig=no + do_rcutorture=no + do_locktorture=no + do_scftorture=no + do_rcuscale=no + do_refscale=no + do_kvfree=no + do_kasan=no + do_kcsan=no + ;; + --do-rcuscale|--do-no-rcuscale) + do_rcuscale=`doyesno "$1" --do-rcuscale` + ;; + --do-rcutorture|--do-no-rcutorture) + do_rcutorture=`doyesno "$1" --do-rcutorture` + ;; + --do-refscale|--do-no-refscale) + do_refscale=`doyesno "$1" --do-refscale` + ;; + --do-scftorture|--do-no-scftorture) + do_scftorture=`doyesno "$1" --do-scftorture` + ;; + --duration) + checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(m\|h\|d\|\)$' '^error' + mult=1 + if echo "$2" | grep -q 'm$' + then + mult=1 + elif echo "$2" | grep -q 'h$' + then + mult=60 + elif echo "$2" | grep -q 'd$' + then + mult=1440 + fi + ts=`echo $2 | sed -e 's/[smhd]$//'` + duration_base=$(($ts*mult)) + shift + ;; + --kcsan-kmake-arg|--kcsan-kmake-args) + checkarg --kcsan-kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' + kcsan_kmake_args="`echo "$kcsan_kmake_args $2" | sed -e 's/^ *//' -e 's/ *$//'`" + shift + ;; + *) + echo Unknown argument $1 + usage + ;; + esac + shift +done + +ds="`date +%Y.%m.%d-%H.%M.%S`-torture" +startdate="`date`" +starttime="`get_starttime`" + +T=/tmp/torture.sh.$$ +trap 'rm -rf $T' 0 2 +mkdir $T + +echo " --- " $scriptname $args | tee -a $T/log +echo " --- Results directory: " $ds | tee -a $T/log + +# Calculate rcutorture defaults and apportion time +if test -z "$configs_rcutorture" +then + configs_rcutorture=CFLIST +fi +duration_rcutorture=$((duration_base*duration_rcutorture_frac/10)) +if test "$duration_rcutorture" -eq 0 +then + echo " --- Zero time for rcutorture, disabling" | tee -a $T/log + do_rcutorture=no +fi + +# Calculate locktorture defaults and apportion time +if test -z "$configs_locktorture" +then + configs_locktorture=CFLIST +fi +duration_locktorture=$((duration_base*duration_locktorture_frac/10)) +if test "$duration_locktorture" -eq 0 +then + echo " --- Zero time for locktorture, disabling" | tee -a $T/log + do_locktorture=no +fi + +# Calculate scftorture defaults and apportion time +if test -z "$configs_scftorture" +then + configs_scftorture=CFLIST +fi +duration_scftorture=$((duration_base*duration_scftorture_frac/10)) +if test "$duration_scftorture" -eq 0 +then + echo " --- Zero time for scftorture, disabling" | tee -a $T/log + do_scftorture=no +fi + +touch $T/failures +touch $T/successes + +# torture_one - Does a single kvm.sh run. +# +# Usage: +# torture_bootargs="[ kernel boot arguments ]" +# torture_one flavor [ kvm.sh arguments ] +# +# Note that "flavor" is an arbitrary string. Supply --torture if needed. +# Note that quoting is problematic. So on the command line, pass multiple +# values with multiple kvm.sh argument instances. +function torture_one { + local cur_bootargs= + local boottag= + + echo " --- $curflavor:" Start `date` | tee -a $T/log + if test -n "$torture_bootargs" + then + boottag="--bootargs" + cur_bootargs="$torture_bootargs" + fi + "$@" $boottag "$cur_bootargs" --datestamp "$ds/results-$curflavor" > $T/$curflavor.out 2>&1 + retcode=$? + resdir="`grep '^Results directory: ' $T/$curflavor.out | tail -1 | sed -e 's/^Results directory: //'`" + if test -z "$resdir" + then + cat $T/$curflavor.out | tee -a $T/log + echo retcode=$retcode | tee -a $T/log + fi + if test "$retcode" == 0 + then + echo "$curflavor($retcode)" $resdir >> $T/successes + else + echo "$curflavor($retcode)" $resdir >> $T/failures + fi +} + +# torture_set - Does a set of tortures with and without KASAN and KCSAN. +# +# Usage: +# torture_bootargs="[ kernel boot arguments ]" +# torture_set flavor [ kvm.sh arguments ] +# +# Note that "flavor" is an arbitrary string. Supply --torture if needed. +# Note that quoting is problematic. So on the command line, pass multiple +# values with multiple kvm.sh argument instances. +function torture_set { + local cur_kcsan_kmake_args= + local kcsan_kmake_tag= + local flavor=$1 + shift + curflavor=$flavor + torture_one "$@" + if test "$do_kasan" = "yes" + then + curflavor=${flavor}-kasan + torture_one "$@" --kasan + fi + if test "$do_kcsan" = "yes" + then + curflavor=${flavor}-kcsan + if test -n "$kcsan_kmake_args" + then + kcsan_kmake_tag="--kmake-args" + cur_kcsan_kmake_args="$kcsan_kmake_args" + fi + torture_one $* --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan + fi +} + +# make allmodconfig +if test "$do_allmodconfig" = "yes" +then + echo " --- allmodconfig:" Start `date` | tee -a $T/log + amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig" + mkdir -p "$amcdir" + echo " --- make clean" > "$amcdir/Make.out" 2>&1 + make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 + echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1 + make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 + echo " --- make " >> "$amcdir/Make.out" 2>&1 + make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 + retcode="$?" + echo $retcode > "$amcdir/Make.exitcode" + if test "$retcode" == 0 + then + echo "allmodconfig($retcode)" $amcdir >> $T/successes + else + echo "allmodconfig($retcode)" $amcdir >> $T/failures + fi +fi + +# --torture rcu +if test "$do_rcutorture" = "yes" +then + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000" + torture_set "rcutorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "$configs_rcutorture" --trust-make +fi + +if test "$do_locktorture" = "yes" +then + torture_bootargs="torture.disable_onoff_at_boot" + torture_set "locktorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture lock --allcpus --duration "$duration_locktorture" --configs "$configs_locktorture" --trust-make +fi + +if test "$do_scftorture" = "yes" +then + torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot" + torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make +fi + +if test "$do_refscale" = yes +then + primlist="`grep '\.name[ ]*=' kernel/rcu/refscale.c | sed -e 's/^[^"]*"//' -e 's/".*$//'`" +else + primlist= +fi +for prim in $primlist +do + torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make +done + +if test "$do_rcuscale" = yes +then + primlist="`grep '\.name[ ]*=' kernel/rcu/rcuscale.c | sed -e 's/^[^"]*"//' -e 's/".*$//'`" +else + primlist= +fi +for prim in $primlist +do + torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make +done + +if test "$do_kvfree" = "yes" +then + torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot" + torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make +fi + +echo " --- " $scriptname $args +echo " --- " Done `date` | tee -a $T/log +ret=0 +nsuccesses=0 +echo SUCCESSES: | tee -a $T/log +if test -s "$T/successes" +then + cat "$T/successes" | tee -a $T/log + nsuccesses="`wc -l "$T/successes" | awk '{ print $1 }'`" +fi +nfailures=0 +echo FAILURES: | tee -a $T/log +if test -s "$T/failures" +then + cat "$T/failures" | tee -a $T/log + nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" + ret=2 +fi +echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log +echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log +tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" +if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0 +then + # KASAN vmlinux files can approach 1GB in size, so compress them. + echo Looking for KASAN files to compress: `date` > "$tdir/log-xz" 2>&1 + find "$tdir" -type d -name '*-kasan' -print > $T/xz-todo + ncompresses=0 + batchno=1 + if test -s $T/xz-todo + then + echo Size before compressing: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log + for i in `cat $T/xz-todo` + do + echo Compressing vmlinux files in ${i}: `date` >> "$tdir/log-xz" 2>&1 + for j in $i/*/vmlinux + do + xz "$j" >> "$tdir/log-xz" 2>&1 & + ncompresses=$((ncompresses+1)) + if test $ncompresses -ge $compress_kasan_vmlinux + then + echo Waiting for batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log + wait + ncompresses=0 + batchno=$((batchno+1)) + fi + done + done + if test $ncompresses -gt 0 + then + echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log + fi + wait + echo Size after compressing: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log + echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log + else + echo No compression needed: `date` >> "$tdir/log-xz" 2>&1 + fi +fi +if test -n "$tdir" +then + cp $T/log "$tdir" +fi +exit $ret diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot index 9363708c9075..932a0799eb08 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot @@ -1 +1,2 @@ rcutorture.torture_type=tasks-rude +rcutree.use_softirq=0 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot index cd2a188eeb6d..22cdeced98ea 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot @@ -1 +1,2 @@ rcutorture.torture_type=tasks +rcutree.use_softirq=0 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index d6da9a61d44a..40af3df0f397 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -2,5 +2,7 @@ maxcpus=8 nr_cpus=43 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 -rcu_nocbs=0 +rcu_nocbs=0-1,3-7 +rcutorture.nocbs_nthreads=8 +rcutorture.nocbs_toggle=1000 rcutorture.fwd_progress=0 diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 26c72f2b61b1..98c3b647f54d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -315,7 +315,7 @@ TEST(kcmp) ret = __filecmp(getpid(), getpid(), 1, 1); EXPECT_EQ(ret, 0); if (ret != 0 && errno == ENOSYS) - SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)"); + SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)"); } TEST(mode_strict_support) @@ -4019,18 +4019,14 @@ TEST(user_notification_addfd) /* Verify we can set an arbitrary remote fd */ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); - /* - * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd), - * 4(listener), so the newly allocated fd should be 5. - */ - EXPECT_EQ(fd, 5); + EXPECT_GE(fd, 0); EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); /* Verify we can set an arbitrary remote fd with large size */ memset(&big, 0x0, sizeof(big)); big.addfd = addfd; fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); - EXPECT_EQ(fd, 6); + EXPECT_GE(fd, 0); /* Verify we can set a specific remote fd */ addfd.newfd = 42; diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c index 6689f1183dbf..073a03702ff5 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c @@ -22,6 +22,8 @@ # define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 # define PR_SYS_DISPATCH_ON 1 +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif #ifdef __NR_syscalls @@ -55,8 +57,8 @@ unsigned long trapped_call_count = 0; unsigned long native_call_count = 0; char selector; -#define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON) -#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF) +#define SYSCALL_BLOCK (selector = SYSCALL_DISPATCH_FILTER_BLOCK) +#define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW) #define CALIBRATION_STEP 100000 #define CALIBRATE_TO_SECS 5 @@ -170,7 +172,7 @@ int main(void) syscall(MAGIC_SYSCALL_1); #ifdef TEST_BLOCKED_RETURN - if (selector == PR_SYS_DISPATCH_OFF) { + if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) { fprintf(stderr, "Failed to return with selector blocked.\n"); exit(-1); } diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index 6498b050ef89..b5d592d4099e 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -18,6 +18,8 @@ # define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 # define PR_SYS_DISPATCH_ON 1 +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif #ifndef SYS_USER_DISPATCH @@ -30,8 +32,8 @@ # define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ #endif -#define SYSCALL_DISPATCH_ON(x) ((x) = 1) -#define SYSCALL_DISPATCH_OFF(x) ((x) = 0) +#define SYSCALL_DISPATCH_ON(x) ((x) = SYSCALL_DISPATCH_FILTER_BLOCK) +#define SYSCALL_DISPATCH_OFF(x) ((x) = SYSCALL_DISPATCH_FILTER_ALLOW) /* Test Summary: * @@ -56,7 +58,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) { - char sel = 0; + char sel = SYSCALL_DISPATCH_FILTER_ALLOW; struct sysinfo info; int ret; @@ -79,7 +81,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) TEST(bad_prctl_param) { - char sel = 0; + char sel = SYSCALL_DISPATCH_FILTER_ALLOW; int op; /* Invalid op */ @@ -220,7 +222,7 @@ TEST_SIGNAL(bad_selector, SIGSYS) sigset_t mask; struct sysinfo info; - glob_sel = 0; + glob_sel = SYSCALL_DISPATCH_FILTER_ALLOW; nr_syscalls_emulated = 0; si_code = 0; si_errno = 0; @@ -288,7 +290,7 @@ TEST(direct_dispatch_range) { int ret = 0; struct sysinfo info; - char sel = 0; + char sel = SYSCALL_DISPATCH_FILTER_ALLOW; /* * Instead of calculating libc addresses; allow the entire diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile index 91fee5c43274..4d639279f41e 100644 --- a/tools/testing/selftests/tc-testing/Makefile +++ b/tools/testing/selftests/tc-testing/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../../scripts/Makefile.include top_srcdir = $(abspath ../../../..) APIDIR := $(top_scrdir)/include/uapi @@ -7,8 +8,6 @@ TEST_GEN_FILES = action.o KSFT_KHDR_INSTALL := 1 include ../lib.mk -CLANG ?= clang -LLC ?= llc PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) ifeq ($(PROBE),) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index e09d3c0e307f..bd64a4bf11ab 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -201,5 +201,51 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "0692", + "name": "Test u32 sample option, divisor 256", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress prio 99 handle 1: u32 divisor 256" + ], + "cmdUnderTest": "bash -c \"for mask in ff ffff ffffff ffffffff ff00ff ff0000ff ffff00ff; do $TC filter add dev $DEV1 ingress prio 99 u32 ht 1: sample u32 0x10203040 \\$mask match u8 0 0 classid 1:1; done\"", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol all pref 99 u32( (chain|fh|order) [0-9:]+){3} key ht 1 bkt 40 flowid 1:1", + "matchCount": "7", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "2478", + "name": "Test u32 sample option, divisor 16", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress prio 99 handle 1: u32 divisor 256" + ], + "cmdUnderTest": "bash -c \"for mask in 70 f0 ff0 fff0 ff00f0; do $TC filter add dev $DEV1 ingress prio 99 u32 ht 1: sample u32 0x10203040 \\$mask match u8 0 0 classid 1:1; done\"", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol all pref 99 u32( (chain|fh|order) [0-9:]+){3} key ht 1 bkt 4 flowid 1:1", + "matchCount": "5", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] } ] diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore index 2e43851b47c1..fe1eb8271b35 100644 --- a/tools/testing/selftests/timens/.gitignore +++ b/tools/testing/selftests/timens/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only clock_nanosleep exec +futex gettime_perf gettime_perf_cold procfs diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index 5eb64d41e541..a8dc51af5a9c 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only vdso_test +vdso_test_abi +vdso_test_clock_getres +vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu vdso_standalone_test_x86 diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h index 6a6fe8d4ff55..6188b16827d1 100644 --- a/tools/testing/selftests/vDSO/vdso_config.h +++ b/tools/testing/selftests/vDSO/vdso_config.h @@ -47,10 +47,12 @@ #elif defined(__x86_64__) #define VDSO_VERSION 0 #define VDSO_NAMES 1 -#elif defined(__riscv__) +#elif defined(__riscv__) || defined(__riscv) #define VDSO_VERSION 5 #define VDSO_NAMES 1 +#if __riscv_xlen == 32 #define VDSO_32BIT 1 +#endif #else /* nds32 */ #define VDSO_VERSION 4 #define VDSO_NAMES 1 diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index 5029ef9b228c..c4aea794725a 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -349,7 +349,7 @@ static void test_one_clock_gettime64(int clock, const char *name) return; } - printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + printf("\t%llu.%09lld %llu.%09lld %llu.%09lld\n", (unsigned long long)start.tv_sec, start.tv_nsec, (unsigned long long)vdso.tv_sec, vdso.tv_nsec, (unsigned long long)end.tv_sec, end.tv_nsec); diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests.sh index e953f3cd9664..e953f3cd9664 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests.sh diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index b50c2085c1ac..fe07d97df9fa 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -1,5 +1,4 @@ CONFIG_LOCALVERSION="-debug" -CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_POINTER=y CONFIG_STACK_VALIDATION=y CONFIG_DEBUG_KERNEL=y diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h index f5ff2a2615df..4ef42c4559a9 100644 --- a/tools/testing/selftests/x86/helpers.h +++ b/tools/testing/selftests/x86/helpers.h @@ -6,36 +6,20 @@ static inline unsigned long get_eflags(void) { - unsigned long eflags; - - asm volatile ( #ifdef __x86_64__ - "subq $128, %%rsp\n\t" - "pushfq\n\t" - "popq %0\n\t" - "addq $128, %%rsp" + return __builtin_ia32_readeflags_u64(); #else - "pushfl\n\t" - "popl %0" + return __builtin_ia32_readeflags_u32(); #endif - : "=r" (eflags) :: "memory"); - - return eflags; } static inline void set_eflags(unsigned long eflags) { - asm volatile ( #ifdef __x86_64__ - "subq $128, %%rsp\n\t" - "pushq %0\n\t" - "popfq\n\t" - "addq $128, %%rsp" + __builtin_ia32_writeeflags_u64(eflags); #else - "pushl %0\n\t" - "popfl" + __builtin_ia32_writeeflags_u32(eflags); #endif - :: "r" (eflags) : "flags", "memory"); } #endif /* __SELFTESTS_X86_HELPERS_H */ diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 1aef72df20a1..3a29346e1452 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -607,7 +607,7 @@ static void do_multicpu_tests(void) failures++; asm volatile ("mov %0, %%ss" : : "rm" (orig_ss)); - }; + } ftx = 100; /* Kill the thread. */ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile new file mode 100644 index 000000000000..87e0ec48e2e7 --- /dev/null +++ b/tools/tracing/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../scripts/Makefile.include + +all: latency + +clean: latency_clean + +install: latency_install + +latency: + $(call descend,latency) + +latency_install: + $(call descend,latency,install) + +latency_clean: + $(call descend,latency,clean) + +.PHONY: all install clean latency latency_install latency_clean diff --git a/tools/tracing/latency/.gitignore b/tools/tracing/latency/.gitignore new file mode 100644 index 000000000000..0863960761e7 --- /dev/null +++ b/tools/tracing/latency/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +latency-collector diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile new file mode 100644 index 000000000000..40c4ddaf8be1 --- /dev/null +++ b/tools/tracing/latency/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for vm tools +# +VAR_CFLAGS := $(shell pkg-config --cflags libtracefs 2>/dev/null) +VAR_LDLIBS := $(shell pkg-config --libs libtracefs 2>/dev/null) + +TARGETS = latency-collector +CFLAGS = -Wall -Wextra -g -O2 $(VAR_CFLAGS) +LDFLAGS = -lpthread $(VAR_LDLIBS) + +all: $(TARGETS) + +%: %.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +clean: + $(RM) latency-collector + +prefix ?= /usr/local +sbindir ?= ${prefix}/sbin + +install: all + install -d $(DESTDIR)$(sbindir) + install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir) diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c new file mode 100644 index 000000000000..57b20802e71b --- /dev/null +++ b/tools/tracing/latency/latency-collector.c @@ -0,0 +1,2108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH + * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de) + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 200809L + +#include <ctype.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <sched.h> +#include <linux/unistd.h> +#include <signal.h> +#include <sys/inotify.h> +#include <unistd.h> +#include <pthread.h> +#include <tracefs.h> + +static const char *prg_name; +static const char *prg_unknown = "unknown program name"; + +static int fd_stdout; + +static int sched_policy; +static bool sched_policy_set; + +static int sched_pri; +static bool sched_pri_set; + +static bool trace_enable = true; +static bool setup_ftrace = true; +static bool use_random_sleep; + +#define TRACE_OPTS \ + C(FUNC_TR, "function-trace"), \ + C(DISP_GR, "display-graph"), \ + C(NR, NULL) + +#undef C +#define C(a, b) OPTIDX_##a + +enum traceopt { + TRACE_OPTS +}; + +#undef C +#define C(a, b) b + +static const char *const optstr[] = { + TRACE_OPTS +}; + +enum errhandling { + ERR_EXIT = 0, + ERR_WARN, + ERR_CLEANUP, +}; + +static bool use_options[OPTIDX_NR]; + +static char inotify_buffer[655360]; + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define bool2str(x) (x ? "true":"false") + +#define DEFAULT_NR_PRINTER_THREADS (3) +static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS; + +#define DEFAULT_TABLE_SIZE (2) +static unsigned int table_startsize = DEFAULT_TABLE_SIZE; + +static int verbosity; + +#define verbose_sizechange() (verbosity >= 1) +#define verbose_lostevent() (verbosity >= 2) +#define verbose_ftrace() (verbosity >= 1) + +#define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0) +#define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0) + +static const char *debug_tracefile; +static const char *debug_tracefile_dflt; +static const char *debug_maxlat; +static const char *debug_maxlat_dflt; +static const char * const DEBUG_NOFILE = "[file not found]"; + +static const char * const TR_MAXLAT = "tracing_max_latency"; +static const char * const TR_THRESH = "tracing_thresh"; +static const char * const TR_CURRENT = "current_tracer"; +static const char * const TR_OPTIONS = "trace_options"; + +static const char * const NOP_TRACER = "nop"; + +static const char * const OPT_NO_PREFIX = "no"; + +#define DFLT_THRESHOLD_US "0" +static const char *threshold = DFLT_THRESHOLD_US; + +#define DEV_URANDOM "/dev/urandom" +#define RT_DEFAULT_PRI (99) +#define DEFAULT_PRI (0) + +#define USEC_PER_MSEC (1000L) +#define NSEC_PER_USEC (1000L) +#define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC) + +#define MSEC_PER_SEC (1000L) +#define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC) +#define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC) + +#define SLEEP_TIME_MS_DEFAULT (1000L) +#define TRY_PRINTMUTEX_MS (1000) + +static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT); + +static const char * const queue_full_warning = +"Could not queue trace for printing. It is likely that events happen faster\n" +"than what they can be printed. Probably partly because of random sleeping\n"; + +static const char * const no_tracer_msg = +"Could not find any tracers! Running this program as root may help!\n"; + +static const char * const no_latency_tr_msg = +"No latency tracers are supported by your kernel!\n"; + +struct policy { + const char *name; + int policy; + int default_pri; +}; + +static const struct policy policies[] = { + { "other", SCHED_OTHER, DEFAULT_PRI }, + { "batch", SCHED_BATCH, DEFAULT_PRI }, + { "idle", SCHED_IDLE, DEFAULT_PRI }, + { "rr", SCHED_RR, RT_DEFAULT_PRI }, + { "fifo", SCHED_FIFO, RT_DEFAULT_PRI }, + { NULL, 0, DEFAULT_PRI } +}; + +/* + * The default tracer will be the first on this list that is supported by the + * currently running Linux kernel. + */ +static const char * const relevant_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + "wakeup", + "wakeup_rt", + "wakeup_dl", + NULL +}; + +/* This is the list of tracers for which random sleep makes sense */ +static const char * const random_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + NULL +}; + +static const char *current_tracer; +static bool force_tracer; + +struct ftrace_state { + char *tracer; + char *thresh; + bool opt[OPTIDX_NR]; + bool opt_valid[OPTIDX_NR]; + pthread_mutex_t mutex; +}; + +struct entry { + int ticket; + int ticket_completed_ref; +}; + +struct print_state { + int ticket_counter; + int ticket_completed; + pthread_mutex_t mutex; + pthread_cond_t cond; + int cnt; + pthread_mutex_t cnt_mutex; +}; + +struct short_msg { + char buf[160]; + int len; +}; + +static struct print_state printstate; +static struct ftrace_state save_state; +volatile sig_atomic_t signal_flag; + +#define PROB_TABLE_MAX_SIZE (1000) + +int probabilities[PROB_TABLE_MAX_SIZE]; + +struct sleep_table { + int *table; + int size; + pthread_mutex_t mutex; +}; + +static struct sleep_table sleeptable; + +#define QUEUE_SIZE (10) + +struct queue { + struct entry entries[QUEUE_SIZE]; + int next_prod_idx; + int next_cons_idx; + pthread_mutex_t mutex; + pthread_cond_t cond; +}; + +#define MAX_THREADS (40) + +struct queue printqueue; +pthread_t printthread[MAX_THREADS]; +pthread_mutex_t print_mtx; +#define PRINT_BUFFER_SIZE (16 * 1024 * 1024) + +static void cleanup_exit(int status); +static int set_trace_opt(const char *opt, bool value); + +static __always_inline void *malloc_or_die(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) { + warn("malloc() failed"); + cleanup_exit(EXIT_FAILURE); + } + return ptr; +} + +static __always_inline void *malloc_or_die_nocleanup(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) + err(0, "malloc() failed"); + return ptr; +} + +static __always_inline void write_or_die(int fd, const char *buf, size_t count) +{ + ssize_t r; + + do { + r = write(fd, buf, count); + if (unlikely(r < 0)) { + if (errno == EINTR) + continue; + warn("write() failed"); + cleanup_exit(EXIT_FAILURE); + } + count -= r; + buf += r; + } while (count > 0); +} + +static __always_inline void clock_gettime_or_die(clockid_t clk_id, + struct timespec *tp) +{ + int r = clock_gettime(clk_id, tp); + + if (unlikely(r != 0)) + err(EXIT_FAILURE, "clock_gettime() failed"); +} + +static __always_inline void sigemptyset_or_die(sigset_t *s) +{ + if (unlikely(sigemptyset(s) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaddset_or_die(sigset_t *s, int signum) +{ + if (unlikely(sigaddset(s, signum) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaction_or_die(int signum, + const struct sigaction *act, + struct sigaction *oldact) +{ + if (unlikely(sigaction(signum, act, oldact) != 0)) { + warn("sigaction() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void open_stdout(void) +{ + if (setvbuf(stdout, NULL, _IONBF, 0) != 0) + err(EXIT_FAILURE, "setvbuf() failed"); + fd_stdout = fileno(stdout); + if (fd_stdout < 0) + err(EXIT_FAILURE, "fileno() failed"); +} + +/* + * It's not worth it to call cleanup_exit() from mutex functions because + * cleanup_exit() uses mutexes. + */ +static __always_inline void mutex_lock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_lock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_lock() failed"); +} + + +static __always_inline void mutex_unlock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_unlock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_unlock() failed"); +} + +static __always_inline void cond_signal(pthread_cond_t *cond) +{ + errno = pthread_cond_signal(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_signal() failed"); +} + +static __always_inline void cond_wait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex) +{ + errno = pthread_cond_wait(cond, mutex); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_wait() failed"); +} + +static __always_inline void cond_broadcast(pthread_cond_t *cond) +{ + errno = pthread_cond_broadcast(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_broadcast() failed"); +} + +static __always_inline void +mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) +{ + errno = pthread_mutex_init(mutex, attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutex_init() failed"); +} + +static __always_inline void mutexattr_init(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_init() failed"); +} + +static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed"); +} + +static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr, + int type) +{ + errno = pthread_mutexattr_settype(attr, type); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_settype() failed"); +} + +static __always_inline void condattr_init(pthread_condattr_t *attr) +{ + errno = pthread_condattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_init() failed"); +} + +static __always_inline void condattr_destroy(pthread_condattr_t *attr) +{ + errno = pthread_condattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_destroy() failed"); +} + +static __always_inline void condattr_setclock(pthread_condattr_t *attr, + clockid_t clock_id) +{ + errno = pthread_condattr_setclock(attr, clock_id); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_condattr_setclock() failed"); +} + +static __always_inline void cond_init(pthread_cond_t *cond, + const pthread_condattr_t *attr) +{ + errno = pthread_cond_init(cond, attr); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int +cond_timedwait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex, + const struct timespec *restrict abstime) +{ + errno = pthread_cond_timedwait(cond, mutex, abstime); + if (errno && errno != ETIMEDOUT) + err(EXIT_FAILURE, "pthread_cond_timedwait() failed"); + return errno; +} + +static void init_printstate(void) +{ + pthread_condattr_t cattr; + + printstate.ticket_counter = 0; + printstate.ticket_completed = 0; + printstate.cnt = 0; + + mutex_init(&printstate.mutex, NULL); + + condattr_init(&cattr); + condattr_setclock(&cattr, CLOCK_MONOTONIC); + cond_init(&printstate.cond, &cattr); + condattr_destroy(&cattr); +} + +static void init_print_mtx(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&print_mtx, &mattr); + mutexattr_destroy(&mattr); + +} + +static void signal_blocking(int how) +{ + sigset_t s; + + sigemptyset_or_die(&s); + sigaddset_or_die(&s, SIGHUP); + sigaddset_or_die(&s, SIGTERM); + sigaddset_or_die(&s, SIGINT); + + errno = pthread_sigmask(how, &s, NULL); + if (unlikely(errno)) { + warn("pthread_sigmask() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void signal_handler(int num) +{ + signal_flag = num; +} + +static void setup_sig_handler(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = signal_handler; + + sigaction_or_die(SIGHUP, &sa, NULL); + sigaction_or_die(SIGTERM, &sa, NULL); + sigaction_or_die(SIGINT, &sa, NULL); +} + +static void process_signal(int signal) +{ + char *name; + + name = strsignal(signal); + if (name == NULL) + printf("Received signal %d\n", signal); + else + printf("Received signal %d (%s)\n", signal, name); + cleanup_exit(EXIT_SUCCESS); +} + +static __always_inline void check_signals(void) +{ + int signal = signal_flag; + + if (unlikely(signal)) + process_signal(signal); +} + +static __always_inline void get_time_in_future(struct timespec *future, + long time_us) +{ + long nsec; + + clock_gettime_or_die(CLOCK_MONOTONIC, future); + future->tv_sec += time_us / USEC_PER_SEC; + nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC; + if (nsec >= NSEC_PER_SEC) { + future->tv_nsec = nsec % NSEC_PER_SEC; + future->tv_sec += 1; + } +} + +static __always_inline bool time_has_passed(const struct timespec *time) +{ + struct timespec now; + + clock_gettime_or_die(CLOCK_MONOTONIC, &now); + if (now.tv_sec > time->tv_sec) + return true; + if (now.tv_sec < time->tv_sec) + return false; + return (now.tv_nsec >= time->tv_nsec); +} + +static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms) +{ + long time_us = time_ms * USEC_PER_MSEC; + struct timespec limit; + + get_time_in_future(&limit, time_us); + do { + errno = pthread_mutex_trylock(mutex); + if (errno && errno != EBUSY) + err(EXIT_FAILURE, "pthread_mutex_trylock() failed"); + } while (errno && !time_has_passed(&limit)); + return errno == 0; +} + +static void restore_trace_opts(const struct ftrace_state *state, + const bool *cur) +{ + int i; + int r; + + for (i = 0; i < OPTIDX_NR; i++) + if (state->opt_valid[i] && state->opt[i] != cur[i]) { + r = set_trace_opt(optstr[i], state->opt[i]); + if (r < 0) + warnx("Failed to restore the %s option to %s", + optstr[i], bool2str(state->opt[i])); + else if (verbose_ftrace()) + printf("Restored the %s option in %s to %s\n", + optstr[i], TR_OPTIONS, + bool2str(state->opt[i])); + } +} + +static char *read_file(const char *file, enum errhandling h) +{ + int psize; + char *r; + static const char *emsg = "Failed to read the %s file"; + + r = tracefs_instance_file_read(NULL, file, &psize); + if (!r) { + if (h) { + warn(emsg, file); + if (h == ERR_CLEANUP) + cleanup_exit(EXIT_FAILURE); + } else + errx(EXIT_FAILURE, emsg, file); + } + + if (r && r[psize - 1] == '\n') + r[psize - 1] = '\0'; + return r; +} + +static void restore_file(const char *file, char **saved, const char *cur) +{ + if (*saved && was_changed(*saved, cur)) { + if (tracefs_instance_file_write(NULL, file, *saved) < 0) + warnx("Failed to restore %s to %s!", file, *saved); + else if (verbose_ftrace()) + printf("Restored %s to %s\n", file, *saved); + free(*saved); + *saved = NULL; + } +} + +static void restore_ftrace(void) +{ + mutex_lock(&save_state.mutex); + + restore_file(TR_CURRENT, &save_state.tracer, current_tracer); + restore_file(TR_THRESH, &save_state.thresh, threshold); + restore_trace_opts(&save_state, use_options); + + mutex_unlock(&save_state.mutex); +} + +static void cleanup_exit(int status) +{ + char *maxlat; + + if (!setup_ftrace) + exit(status); + + /* + * We try the print_mtx for 1 sec in order to avoid garbled + * output if possible, but if it cannot be obtained we proceed anyway. + */ + mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS); + + maxlat = read_file(TR_MAXLAT, ERR_WARN); + if (maxlat) { + printf("The maximum detected latency was: %sus\n", maxlat); + free(maxlat); + } + + restore_ftrace(); + /* + * We do not need to unlock the print_mtx here because we will exit at + * the end of this function. Unlocking print_mtx causes problems if a + * print thread happens to be waiting for the mutex because we have + * just changed the ftrace settings to the original and thus the + * print thread would output incorrect data from ftrace. + */ + exit(status); +} + +static void init_save_state(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&save_state.mutex, &mattr); + mutexattr_destroy(&mattr); + + save_state.tracer = NULL; + save_state.thresh = NULL; + save_state.opt_valid[OPTIDX_FUNC_TR] = false; + save_state.opt_valid[OPTIDX_DISP_GR] = false; +} + +static int printstate_next_ticket(struct entry *req) +{ + int r; + + r = ++(printstate.ticket_counter); + req->ticket = r; + req->ticket_completed_ref = printstate.ticket_completed; + cond_broadcast(&printstate.cond); + return r; +} + +static __always_inline +void printstate_mark_req_completed(const struct entry *req) +{ + if (req->ticket > printstate.ticket_completed) + printstate.ticket_completed = req->ticket; +} + +static __always_inline +bool printstate_has_new_req_arrived(const struct entry *req) +{ + return (printstate.ticket_counter != req->ticket); +} + +static __always_inline int printstate_cnt_inc(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = ++printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_dec(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = --printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_read(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline +bool prev_req_won_race(const struct entry *req) +{ + return (printstate.ticket_completed != req->ticket_completed_ref); +} + +static void sleeptable_resize(int size, bool printout, struct short_msg *msg) +{ + int bytes; + + if (printout) { + msg->len = 0; + if (unlikely(size > PROB_TABLE_MAX_SIZE)) + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Cannot increase probability table to %d (maximum size reached)\n", size); + else + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Increasing probability table to %d\n", size); + if (bytes < 0) + warn("snprintf() failed"); + else + msg->len = bytes; + } + + if (unlikely(size < 0)) { + /* Should never happen */ + warnx("Bad program state at %s:%d", __FILE__, __LINE__); + cleanup_exit(EXIT_FAILURE); + return; + } + sleeptable.size = size; + sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size]; +} + +static void init_probabilities(void) +{ + int i; + int j = 1000; + + for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) { + probabilities[i] = 1000 / j; + j--; + } + mutex_init(&sleeptable.mutex, NULL); +} + +static int table_get_probability(const struct entry *req, + struct short_msg *msg) +{ + int diff = req->ticket - req->ticket_completed_ref; + int rval = 0; + + msg->len = 0; + diff--; + /* Should never happen...*/ + if (unlikely(diff < 0)) { + warnx("Programmer assumption error at %s:%d\n", __FILE__, + __LINE__); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&sleeptable.mutex); + if (diff >= (sleeptable.size - 1)) { + rval = sleeptable.table[sleeptable.size - 1]; + sleeptable_resize(sleeptable.size + 1, verbose_sizechange(), + msg); + } else { + rval = sleeptable.table[diff]; + } + mutex_unlock(&sleeptable.mutex); + return rval; +} + +static void init_queue(struct queue *q) +{ + q->next_prod_idx = 0; + q->next_cons_idx = 0; + mutex_init(&q->mutex, NULL); + errno = pthread_cond_init(&q->cond, NULL); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int queue_len(const struct queue *q) +{ + if (q->next_prod_idx >= q->next_cons_idx) + return q->next_prod_idx - q->next_cons_idx; + else + return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx; +} + +static __always_inline int queue_nr_free(const struct queue *q) +{ + int nr_free = QUEUE_SIZE - queue_len(q); + + /* + * If there is only one slot left we will anyway lie and claim that the + * queue is full because adding an element will make it appear empty + */ + if (nr_free == 1) + nr_free = 0; + return nr_free; +} + +static __always_inline void queue_idx_inc(int *idx) +{ + *idx = (*idx + 1) % QUEUE_SIZE; +} + +static __always_inline void queue_push_to_back(struct queue *q, + const struct entry *e) +{ + q->entries[q->next_prod_idx] = *e; + queue_idx_inc(&q->next_prod_idx); +} + +static __always_inline struct entry queue_pop_from_front(struct queue *q) +{ + struct entry e = q->entries[q->next_cons_idx]; + + queue_idx_inc(&q->next_cons_idx); + return e; +} + +static __always_inline void queue_cond_signal(struct queue *q) +{ + cond_signal(&q->cond); +} + +static __always_inline void queue_cond_wait(struct queue *q) +{ + cond_wait(&q->cond, &q->mutex); +} + +static __always_inline int queue_try_to_add_entry(struct queue *q, + const struct entry *e) +{ + int r = 0; + + mutex_lock(&q->mutex); + if (queue_nr_free(q) > 0) { + queue_push_to_back(q, e); + cond_signal(&q->cond); + } else + r = -1; + mutex_unlock(&q->mutex); + return r; +} + +static struct entry queue_wait_for_entry(struct queue *q) +{ + struct entry e; + + mutex_lock(&q->mutex); + while (true) { + if (queue_len(&printqueue) > 0) { + e = queue_pop_from_front(q); + break; + } + queue_cond_wait(q); + } + mutex_unlock(&q->mutex); + + return e; +} + +static const struct policy *policy_from_name(const char *name) +{ + const struct policy *p = &policies[0]; + + while (p->name != NULL) { + if (!strcmp(name, p->name)) + return p; + p++; + } + return NULL; +} + +static const char *policy_name(int policy) +{ + const struct policy *p = &policies[0]; + static const char *rval = "unknown"; + + while (p->name != NULL) { + if (p->policy == policy) + return p->name; + p++; + } + return rval; +} + +static bool is_relevant_tracer(const char *name) +{ + unsigned int i; + + for (i = 0; relevant_tracers[i]; i++) + if (!strcmp(name, relevant_tracers[i])) + return true; + return false; +} + +static bool random_makes_sense(const char *name) +{ + unsigned int i; + + for (i = 0; random_tracers[i]; i++) + if (!strcmp(name, random_tracers[i])) + return true; + return false; +} + +static void show_available(void) +{ + char **tracers; + int found = 0; + int i; + + tracers = tracefs_tracers(NULL); + for (i = 0; tracers && tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + found++; + } + + if (!tracers) { + warnx(no_tracer_msg); + return; + } + + if (!found) { + warnx(no_latency_tr_msg); + tracefs_list_free(tracers); + return; + } + + printf("The following latency tracers are available on your system:\n"); + for (i = 0; tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + printf("%s\n", tracers[i]); + } + tracefs_list_free(tracers); +} + +static bool tracer_valid(const char *name, bool *notracer) +{ + char **tracers; + int i; + bool rval = false; + + *notracer = false; + tracers = tracefs_tracers(NULL); + if (!tracers) { + *notracer = true; + return false; + } + for (i = 0; tracers[i]; i++) + if (!strcmp(tracers[i], name)) { + rval = true; + break; + } + tracefs_list_free(tracers); + return rval; +} + +static const char *find_default_tracer(void) +{ + int i; + bool notracer; + bool valid; + + for (i = 0; relevant_tracers[i]; i++) { + valid = tracer_valid(relevant_tracers[i], ¬racer); + if (notracer) + errx(EXIT_FAILURE, no_tracer_msg); + if (valid) + return relevant_tracers[i]; + } + return NULL; +} + +static bool toss_coin(struct drand48_data *buffer, unsigned int prob) +{ + long r; + + if (unlikely(lrand48_r(buffer, &r))) { + warnx("lrand48_r() failed"); + cleanup_exit(EXIT_FAILURE); + } + r = r % 1000L; + if (r < prob) + return true; + else + return false; +} + + +static long go_to_sleep(const struct entry *req) +{ + struct timespec future; + long delay = sleep_time; + + get_time_in_future(&future, delay); + + mutex_lock(&printstate.mutex); + while (!printstate_has_new_req_arrived(req)) { + cond_timedwait(&printstate.cond, &printstate.mutex, &future); + if (time_has_passed(&future)) + break; + }; + + if (printstate_has_new_req_arrived(req)) + delay = -1; + mutex_unlock(&printstate.mutex); + + return delay; +} + + +static void set_priority(void) +{ + int r; + pid_t pid; + struct sched_param param; + + memset(¶m, 0, sizeof(param)); + param.sched_priority = sched_pri; + + pid = getpid(); + r = sched_setscheduler(pid, sched_policy, ¶m); + + if (r != 0) + err(EXIT_FAILURE, "sched_setscheduler() failed"); +} + +pid_t latency_collector_gettid(void) +{ + return (pid_t) syscall(__NR_gettid); +} + +static void print_priority(void) +{ + pid_t tid; + int policy; + int r; + struct sched_param param; + + tid = latency_collector_gettid(); + r = pthread_getschedparam(pthread_self(), &policy, ¶m); + if (r != 0) { + warn("pthread_getschedparam() failed"); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&print_mtx); + printf("Thread %d runs with scheduling policy %s and priority %d\n", + tid, policy_name(policy), param.sched_priority); + mutex_unlock(&print_mtx); +} + +static __always_inline +void __print_skipmessage(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, bool excuse, + const char *str) +{ + ssize_t bytes = 0; + char *p = &buffer[0]; + long us, sec; + int r; + + sec = timestamp->tv_sec; + us = timestamp->tv_nsec / 1000; + + if (resize_msg != NULL && resize_msg->len > 0) { + strncpy(p, resize_msg->buf, resize_msg->len); + bytes += resize_msg->len; + p += resize_msg->len; + bufspace -= resize_msg->len; + } + + if (excuse) + r = snprintf(p, bufspace, +"%ld.%06ld Latency %d printout skipped due to %s\n", + sec, us, req->ticket, str); + else + r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n", + sec, us, req->ticket); + + if (r < 0) + warn("snprintf() failed"); + else + bytes += r; + + /* These prints could happen concurrently */ + mutex_lock(&print_mtx); + write_or_die(fd_stdout, buffer, bytes); + mutex_unlock(&print_mtx); +} + +static void print_skipmessage(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, + bool excuse) +{ + __print_skipmessage(resize_msg, timestamp, buffer, bufspace, req, + excuse, "random delay"); +} + +static void print_lostmessage(const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, + const char *reason) +{ + __print_skipmessage(NULL, timestamp, buffer, bufspace, req, true, + reason); +} + +static void print_tracefile(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, long slept, + const struct entry *req) +{ + static const int reserve = 256; + char *p = &buffer[0]; + ssize_t bytes = 0; + ssize_t bytes_tot = 0; + long us, sec; + long slept_ms; + int trace_fd; + + /* Save some space for the final string and final null char */ + bufspace = bufspace - reserve - 1; + + if (resize_msg != NULL && resize_msg->len > 0) { + bytes = resize_msg->len; + strncpy(p, resize_msg->buf, bytes); + bytes_tot += bytes; + p += bytes; + bufspace -= bytes; + } + + trace_fd = open(debug_tracefile, O_RDONLY); + + if (trace_fd < 0) { + warn("open() failed on %s", debug_tracefile); + return; + } + + sec = timestamp->tv_sec; + us = timestamp->tv_nsec / 1000; + + if (slept != 0) { + slept_ms = slept / 1000; + bytes = snprintf(p, bufspace, +"%ld.%06ld Latency %d randomly sleep for %ld ms before print\n", + sec, us, req->ticket, slept_ms); + } else { + bytes = snprintf(p, bufspace, + "%ld.%06ld Latency %d immediate print\n", sec, + us, req->ticket); + } + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + + bytes = snprintf(p, bufspace, +">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n" + ); + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + + do { + bytes = read(trace_fd, p, bufspace); + if (bytes < 0) { + if (errno == EINTR) + continue; + warn("read() failed on %s", debug_tracefile); + if (unlikely(close(trace_fd) != 0)) + warn("close() failed on %s", debug_tracefile); + return; + } + if (bytes == 0) + break; + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + } while (true); + + if (unlikely(close(trace_fd) != 0)) + warn("close() failed on %s", debug_tracefile); + + printstate_cnt_dec(); + /* Add the reserve space back to the budget for the final string */ + bufspace += reserve; + + bytes = snprintf(p, bufspace, + ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n"); + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + + bytes_tot += bytes; + + /* These prints could happen concurrently */ + mutex_lock(&print_mtx); + write_or_die(fd_stdout, buffer, bytes_tot); + mutex_unlock(&print_mtx); +} + +static char *get_no_opt(const char *opt) +{ + char *no_opt; + int s; + + s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1; + /* We may be called from cleanup_exit() via set_trace_opt() */ + no_opt = malloc_or_die_nocleanup(s); + strcpy(no_opt, OPT_NO_PREFIX); + strcat(no_opt, opt); + return no_opt; +} + +static char *find_next_optstr(const char *allopt, const char **next) +{ + const char *begin; + const char *end; + char *r; + int s = 0; + + if (allopt == NULL) + return NULL; + + for (begin = allopt; *begin != '\0'; begin++) { + if (isgraph(*begin)) + break; + } + + if (*begin == '\0') + return NULL; + + for (end = begin; *end != '\0' && isgraph(*end); end++) + s++; + + r = malloc_or_die_nocleanup(s + 1); + strncpy(r, begin, s); + r[s] = '\0'; + *next = begin + s; + return r; +} + +static bool get_trace_opt(const char *allopt, const char *opt, bool *found) +{ + *found = false; + char *no_opt; + char *str; + const char *next = allopt; + bool rval = false; + + no_opt = get_no_opt(opt); + + do { + str = find_next_optstr(next, &next); + if (str == NULL) + break; + if (!strcmp(str, opt)) { + *found = true; + rval = true; + free(str); + break; + } + if (!strcmp(str, no_opt)) { + *found = true; + rval = false; + free(str); + break; + } + free(str); + } while (true); + free(no_opt); + + return rval; +} + +static int set_trace_opt(const char *opt, bool value) +{ + char *str; + int r; + + if (value) + str = strdup(opt); + else + str = get_no_opt(opt); + + r = tracefs_instance_file_write(NULL, TR_OPTIONS, str); + free(str); + return r; +} + +void save_trace_opts(struct ftrace_state *state) +{ + char *allopt; + int psize; + int i; + + allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize); + if (!allopt) + errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS); + + for (i = 0; i < OPTIDX_NR; i++) + state->opt[i] = get_trace_opt(allopt, optstr[i], + &state->opt_valid[i]); + + free(allopt); +} + +static void write_file(const char *file, const char *cur, const char *new, + enum errhandling h) +{ + int r; + static const char *emsg = "Failed to write to the %s file!"; + + /* Do nothing if we now that the current and new value are equal */ + if (cur && !needs_change(cur, new)) + return; + + r = tracefs_instance_file_write(NULL, file, new); + if (r < 0) { + if (h) { + warnx(emsg, file); + if (h == ERR_CLEANUP) + cleanup_exit(EXIT_FAILURE); + } else + errx(EXIT_FAILURE, emsg, file); + } + if (verbose_ftrace()) { + mutex_lock(&print_mtx); + printf("%s was set to %s\n", file, new); + mutex_unlock(&print_mtx); + } +} + +static void reset_max_latency(void) +{ + write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP); +} + +static void save_and_disable_tracer(void) +{ + char *orig_th; + char *tracer; + bool need_nop = false; + + mutex_lock(&save_state.mutex); + + save_trace_opts(&save_state); + tracer = read_file(TR_CURRENT, ERR_EXIT); + orig_th = read_file(TR_THRESH, ERR_EXIT); + + if (needs_change(tracer, NOP_TRACER)) { + mutex_lock(&print_mtx); + if (force_tracer) { + printf( + "The %s tracer is already in use but proceeding anyway!\n", + tracer); + } else { + printf( + "The %s tracer is already in use, cowardly bailing out!\n" + "This could indicate that another program or instance is tracing.\n" + "Use the -F [--force] option to disregard the current tracer.\n", tracer); + exit(0); + } + mutex_unlock(&print_mtx); + need_nop = true; + } + + save_state.tracer = tracer; + save_state.thresh = orig_th; + + if (need_nop) + write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT); + + mutex_unlock(&save_state.mutex); +} + +void set_trace_opts(struct ftrace_state *state, bool *new) +{ + int i; + int r; + + /* + * We only set options if we earlier detected that the option exists in + * the trace_options file and that the wanted setting is different from + * the one we saw in save_and_disable_tracer() + */ + for (i = 0; i < OPTIDX_NR; i++) + if (state->opt_valid[i] && + state->opt[i] != new[i]) { + r = set_trace_opt(optstr[i], new[i]); + if (r < 0) { + warnx("Failed to set the %s option to %s", + optstr[i], bool2str(new[i])); + cleanup_exit(EXIT_FAILURE); + } + if (verbose_ftrace()) { + mutex_lock(&print_mtx); + printf("%s in %s was set to %s\n", optstr[i], + TR_OPTIONS, bool2str(new[i])); + mutex_unlock(&print_mtx); + } + } +} + +static void enable_tracer(void) +{ + mutex_lock(&save_state.mutex); + set_trace_opts(&save_state, use_options); + + write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP); + write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP); + + mutex_unlock(&save_state.mutex); +} + +static void tracing_loop(void) +{ + int ifd = inotify_init(); + int wd; + const ssize_t bufsize = sizeof(inotify_buffer); + const ssize_t istructsize = sizeof(struct inotify_event); + char *buf = &inotify_buffer[0]; + ssize_t nr_read; + char *p; + int modified; + struct inotify_event *event; + struct entry req; + char *buffer; + const size_t bufspace = PRINT_BUFFER_SIZE; + struct timespec timestamp; + + print_priority(); + + buffer = malloc_or_die(bufspace); + + if (ifd < 0) + err(EXIT_FAILURE, "inotify_init() failed!"); + + + if (setup_ftrace) { + /* + * We must disable the tracer before resetting the max_latency + */ + save_and_disable_tracer(); + /* + * We must reset the max_latency before the inotify_add_watch() + * call. + */ + reset_max_latency(); + } + + wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY); + if (wd < 0) + err(EXIT_FAILURE, "inotify_add_watch() failed!"); + + if (setup_ftrace) + enable_tracer(); + + signal_blocking(SIG_UNBLOCK); + + while (true) { + modified = 0; + check_signals(); + nr_read = read(ifd, buf, bufsize); + check_signals(); + if (nr_read < 0) { + if (errno == EINTR) + continue; + warn("read() failed on inotify fd!"); + cleanup_exit(EXIT_FAILURE); + } + if (nr_read == bufsize) + warnx("inotify() buffer filled, skipping events"); + if (nr_read < istructsize) { + warnx("read() returned too few bytes on inotify fd"); + cleanup_exit(EXIT_FAILURE); + } + + for (p = buf; p < buf + nr_read;) { + event = (struct inotify_event *) p; + if ((event->mask & IN_MODIFY) != 0) + modified++; + p += istructsize + event->len; + } + while (modified > 0) { + check_signals(); + mutex_lock(&printstate.mutex); + check_signals(); + printstate_next_ticket(&req); + if (printstate_cnt_read() > 0) { + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + if (verbose_lostevent()) { + clock_gettime_or_die(CLOCK_MONOTONIC, + ×tamp); + print_lostmessage(×tamp, buffer, + bufspace, &req, + "inotify loop"); + } + break; + } + mutex_unlock(&printstate.mutex); + if (queue_try_to_add_entry(&printqueue, &req) != 0) { + /* These prints could happen concurrently */ + check_signals(); + mutex_lock(&print_mtx); + check_signals(); + write_or_die(fd_stdout, queue_full_warning, + sizeof(queue_full_warning)); + mutex_unlock(&print_mtx); + } + modified--; + } + } +} + +static void *do_printloop(void *arg) +{ + const size_t bufspace = PRINT_BUFFER_SIZE; + char *buffer; + long *rseed = (long *) arg; + struct drand48_data drandbuf; + long slept = 0; + struct entry req; + int prob = 0; + struct timespec timestamp; + struct short_msg resize_msg; + + print_priority(); + + if (srand48_r(*rseed, &drandbuf) != 0) { + warn("srand48_r() failed!\n"); + cleanup_exit(EXIT_FAILURE); + } + + buffer = malloc_or_die(bufspace); + + while (true) { + req = queue_wait_for_entry(&printqueue); + clock_gettime_or_die(CLOCK_MONOTONIC, ×tamp); + mutex_lock(&printstate.mutex); + if (prev_req_won_race(&req)) { + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + if (verbose_lostevent()) + print_lostmessage(×tamp, buffer, bufspace, + &req, "print loop"); + continue; + } + mutex_unlock(&printstate.mutex); + + /* + * Toss a coin to decide if we want to sleep before printing + * out the backtrace. The reason for this is that opening + * /sys/kernel/debug/tracing/trace will cause a blackout of + * hundreds of ms, where no latencies will be noted by the + * latency tracer. Thus by randomly sleeping we try to avoid + * missing traces systematically due to this. With this option + * we will sometimes get the first latency, some other times + * some of the later ones, in case of closely spaced traces. + */ + if (trace_enable && use_random_sleep) { + slept = 0; + prob = table_get_probability(&req, &resize_msg); + if (!toss_coin(&drandbuf, prob)) + slept = go_to_sleep(&req); + if (slept >= 0) { + /* A print is ongoing */ + printstate_cnt_inc(); + /* + * We will do the printout below so we have to + * mark it as completed while we still have the + * mutex. + */ + mutex_lock(&printstate.mutex); + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + } + } + if (trace_enable) { + /* + * slept < 0 means that we detected another + * notification in go_to_sleep() above + */ + if (slept >= 0) + /* + * N.B. printstate_cnt_dec(); will be called + * inside print_tracefile() + */ + print_tracefile(&resize_msg, ×tamp, buffer, + bufspace, slept, &req); + else + print_skipmessage(&resize_msg, ×tamp, + buffer, bufspace, &req, true); + } else { + print_skipmessage(&resize_msg, ×tamp, buffer, + bufspace, &req, false); + } + } + return NULL; +} + +static void start_printthread(void) +{ + unsigned int i; + long *seed; + int ufd; + + ufd = open(DEV_URANDOM, O_RDONLY); + if (nr_threads > MAX_THREADS) { + warnx( +"Number of requested print threads was %d, max number is %d\n", + nr_threads, MAX_THREADS); + nr_threads = MAX_THREADS; + } + for (i = 0; i < nr_threads; i++) { + seed = malloc_or_die(sizeof(*seed)); + if (ufd < 0 || + read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) { + printf( +"Warning! Using trivial random nummer seed, since %s not available\n", + DEV_URANDOM); + fflush(stdout); + *seed = i; + } + errno = pthread_create(&printthread[i], NULL, do_printloop, + seed); + if (errno) + err(EXIT_FAILURE, "pthread_create()"); + } + if (ufd > 0 && close(ufd) != 0) + warn("close() failed"); +} + +static void show_usage(void) +{ + printf( +"Usage: %s [OPTION]...\n\n" +"Collect closely occurring latencies from %s\n" +"with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, " +"wakeup,\nwakeup_dl, or wakeup_rt.\n\n" + +"The occurrence of a latency is detected by monitoring the file\n" +"%s with inotify.\n\n" + +"The following options are supported:\n\n" + +"-l, --list\t\tList the latency tracers that are supported by the\n" +"\t\t\tcurrently running Linux kernel. If you don't see the\n" +"\t\t\ttracer that you want, you will probably need to\n" +"\t\t\tchange your kernel config and build a new kernel.\n\n" + +"-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n" +"\t\t\ttracer that is supported by the kernel in the following\n" +"\t\t\torder of precedence:\n\n" +"\t\t\tpreemptirqsoff\n" +"\t\t\tpreemptoff\n" +"\t\t\tirqsoff\n" +"\t\t\twakeup\n" +"\t\t\twakeup_rt\n" +"\t\t\twakeup_dl\n" +"\n" +"\t\t\tIf TR is not on the list above, then a warning will be\n" +"\t\t\tprinted.\n\n" + +"-F, --force\t\tProceed even if another ftrace tracer is active. Without\n" +"\t\t\tthis option, the program will refuse to start tracing if\n" +"\t\t\tany other tracer than the nop tracer is active.\n\n" + +"-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n" +"\t\t\tfor the tracer. The default is 0, which means that\n" +"\t\t\ttracing_max_latency will be used. tracing_max_latency is\n" +"\t\t\tset to 0 when the program is started and contains the\n" +"\t\t\tmaximum of the latencies that have been encountered.\n\n" + +"-f, --function\t\tEnable the function-trace option in trace_options. With\n" +"\t\t\tthis option, ftrace will trace the functions that are\n" +"\t\t\texecuted during a latency, without it we only get the\n" +"\t\t\tbeginning, end, and backtrace.\n\n" + +"-g, --graph\t\tEnable the display-graph option in trace_option. This\n" +"\t\t\toption causes ftrace to show the functionph of how\n" +"\t\t\tfunctions are calling other functions.\n\n" + +"-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n" +"\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n" +"\t\t\tusing rr or fifo, remember that these policies may cause\n" +"\t\t\tother tasks to experience latencies.\n\n" + +"-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n" +"\t\t\tof PRI depends on the scheduling policy.\n\n" + +"-n, --notrace\t\tIf latency is detected, do not print out the content of\n" +"\t\t\tthe trace file to standard output\n\n" + +"-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" + +"-r, --random\t\tArbitrarily sleep a certain amount of time, default\n" +"\t\t\t%ld ms, before reading the trace file. The\n" +"\t\t\tprobabilities for sleep are chosen so that the\n" +"\t\t\tprobability of obtaining any of a cluster of closely\n" +"\t\t\toccurring latencies are equal, i.e. we will randomly\n" +"\t\t\tchoose which one we collect from the trace file.\n\n" +"\t\t\tThis option is probably only useful with the irqsoff,\n" +"\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n" + +"-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n" +"\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n" +"\t\t\tlatencies are detected during a run, this value will\n" +"\t\t\tautomatically be increased to NRLAT + 1 and then to\n" +"\t\t\tNRLAT + 2 and so on. The default is %d. This option\n" +"\t\t\timplies -r. We need to know this number in order to\n" +"\t\t\tbe able to calculate the probabilities of sleeping.\n" +"\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n" +"\t\t\tdo an immediate printout will be:\n\n" +"\t\t\t1/NRLAT 1/(NRLAT - 1) ... 1/3 1/2 1\n\n" +"\t\t\tThe probability of sleeping will be:\n\n" +"\t\t\t1 - P, where P is from the series above\n\n" +"\t\t\tThis descending probability will cause us to choose\n" +"\t\t\tan occurrence at random. Observe that the final\n" +"\t\t\tprobability is 0, it is when we reach this probability\n" +"\t\t\tthat we increase NRLAT automatically. As an example,\n" +"\t\t\twith the default value of 2, the probabilities will be:\n\n" +"\t\t\t1/2 0\n\n" +"\t\t\tThis means, when a latency is detected we will sleep\n" +"\t\t\twith 50%% probability. If we ever detect another latency\n" +"\t\t\tduring the sleep period, then the probability of sleep\n" +"\t\t\twill be 0%% and the table will be expanded to:\n\n" +"\t\t\t1/3 1/2 0\n\n" + +"-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n" +"\t\t\tthen print a message every time that the NRLAT value\n" +"\t\t\tis automatically increased. It also causes a message to\n" +"\t\t\tbe printed when the ftrace settings are changed. If this\n" +"\t\t\toption is given at least twice, then also print a\n" +"\t\t\twarning for lost events.\n\n" + +"-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n" +"\t\t\tprinting out the trace from the trace file. The default\n" +"\t\t\tis %ld ms. This option implies -r.\n\n" + +"-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n" +"\t\t\tconfigures the ftrace files in sysfs such as\n" +"\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n" + +"-i, --tracefile FILE\tUse FILE as trace file. The default is\n" +"\t\t\t%s.\n" +"\t\t\tThis options implies -x\n\n" + +"-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n" +"\t\t\t%s.\n" +"\t\t\tThis options implies -x\n\n" +, +prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS, +SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT, +debug_tracefile_dflt, debug_maxlat_dflt); +} + +static void find_tracefiles(void) +{ + debug_tracefile_dflt = tracefs_get_tracing_file("trace"); + if (debug_tracefile_dflt == NULL) { + /* This is needed in show_usage() */ + debug_tracefile_dflt = DEBUG_NOFILE; + } + + debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency"); + if (debug_maxlat_dflt == NULL) { + /* This is needed in show_usage() */ + debug_maxlat_dflt = DEBUG_NOFILE; + } + + debug_tracefile = debug_tracefile_dflt; + debug_maxlat = debug_maxlat_dflt; +} + +bool alldigits(const char *s) +{ + for (; *s != '\0'; s++) + if (!isdigit(*s)) + return false; + return true; +} + +void check_alldigits(const char *optarg, const char *argname) +{ + if (!alldigits(optarg)) + errx(EXIT_FAILURE, + "The %s parameter expects a decimal argument\n", argname); +} + +static void scan_arguments(int argc, char *argv[]) +{ + int c; + int i; + int option_idx = 0; + + static struct option long_options[] = { + { "list", no_argument, 0, 'l' }, + { "tracer", required_argument, 0, 't' }, + { "force", no_argument, 0, 'F' }, + { "threshold", required_argument, 0, 's' }, + { "function", no_argument, 0, 'f' }, + { "graph", no_argument, 0, 'g' }, + { "policy", required_argument, 0, 'c' }, + { "priority", required_argument, 0, 'p' }, + { "help", no_argument, 0, 'h' }, + { "notrace", no_argument, 0, 'n' }, + { "random", no_argument, 0, 'r' }, + { "nrlat", required_argument, 0, 'a' }, + { "threads", required_argument, 0, 'e' }, + { "time", required_argument, 0, 'u' }, + { "verbose", no_argument, 0, 'v' }, + { "no-ftrace", no_argument, 0, 'x' }, + { "tracefile", required_argument, 0, 'i' }, + { "max-lat", required_argument, 0, 'm' }, + { 0, 0, 0, 0 } + }; + const struct policy *p; + int max, min; + int value; + bool notracer, valid; + + /* + * We must do this before parsing the arguments because show_usage() + * needs to display these. + */ + find_tracefiles(); + + while (true) { + c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:", + long_options, &option_idx); + if (c == -1) + break; + + switch (c) { + case 'l': + show_available(); + exit(0); + break; + case 't': + current_tracer = strdup(optarg); + if (!is_relevant_tracer(current_tracer)) { + warnx("%s is not a known latency tracer!\n", + current_tracer); + } + valid = tracer_valid(current_tracer, ¬racer); + if (notracer) + errx(EXIT_FAILURE, no_tracer_msg); + if (!valid) + errx(EXIT_FAILURE, +"The tracer %s is not supported by your kernel!\n", current_tracer); + break; + case 'F': + force_tracer = true; + break; + case 's': + check_alldigits(optarg, "-s [--threshold]"); + threshold = strdup(optarg); + break; + case 'f': + use_options[OPTIDX_FUNC_TR] = true; + break; + case 'g': + use_options[OPTIDX_DISP_GR] = true; + break; + case 'c': + p = policy_from_name(optarg); + if (p != NULL) { + sched_policy = p->policy; + sched_policy_set = true; + if (!sched_pri_set) { + sched_pri = p->default_pri; + sched_pri_set = true; + } + } else { + warnx("Unknown scheduling %s\n", optarg); + show_usage(); + exit(0); + } + break; + case 'p': + check_alldigits(optarg, "-p [--priority]"); + sched_pri = atoi(optarg); + sched_pri_set = true; + break; + case 'h': + show_usage(); + exit(0); + break; + case 'n': + trace_enable = false; + use_random_sleep = false; + break; + case 'e': + check_alldigits(optarg, "-e [--threads]"); + value = atoi(optarg); + if (value > 0) + nr_threads = value; + else { + warnx("NRTHR must be > 0\n"); + show_usage(); + exit(0); + } + break; + case 'u': + check_alldigits(optarg, "-u [--time]"); + value = atoi(optarg); + if (value < 0) { + warnx("TIME must be >= 0\n"); + show_usage(); + ; + } + trace_enable = true; + use_random_sleep = true; + sleep_time = value * USEC_PER_MSEC; + break; + case 'v': + verbosity++; + break; + case 'r': + trace_enable = true; + use_random_sleep = true; + break; + case 'a': + check_alldigits(optarg, "-a [--nrlat]"); + value = atoi(optarg); + if (value <= 0) { + warnx("NRLAT must be > 0\n"); + show_usage(); + exit(0); + } + trace_enable = true; + use_random_sleep = true; + table_startsize = value; + break; + case 'x': + setup_ftrace = false; + break; + case 'i': + setup_ftrace = false; + debug_tracefile = strdup(optarg); + break; + case 'm': + setup_ftrace = false; + debug_maxlat = strdup(optarg); + break; + default: + show_usage(); + exit(0); + break; + } + } + + if (setup_ftrace) { + if (!current_tracer) { + current_tracer = find_default_tracer(); + if (!current_tracer) + errx(EXIT_FAILURE, +"No default tracer found and tracer not specified\n"); + } + + if (use_random_sleep && !random_makes_sense(current_tracer)) { + warnx("WARNING: The tracer is %s and random sleep has", + current_tracer); + fprintf(stderr, +"been enabled. Random sleep is intended for the following tracers:\n"); + for (i = 0; random_tracers[i]; i++) + fprintf(stderr, "%s\n", random_tracers[i]); + fprintf(stderr, "\n"); + } + } + + if (debug_tracefile == DEBUG_NOFILE || + debug_maxlat == DEBUG_NOFILE) + errx(EXIT_FAILURE, +"Could not find tracing directory e.g. /sys/kernel/tracing\n"); + + if (!sched_policy_set) { + sched_policy = SCHED_RR; + sched_policy_set = true; + if (!sched_pri_set) { + sched_pri = RT_DEFAULT_PRI; + sched_pri_set = true; + } + } + + max = sched_get_priority_max(sched_policy); + min = sched_get_priority_min(sched_policy); + + if (sched_pri < min) { + printf( +"ATTENTION: Increasing priority to minimum, which is %d\n", min); + sched_pri = min; + } + if (sched_pri > max) { + printf( +"ATTENTION: Reducing priority to maximum, which is %d\n", max); + sched_pri = max; + } +} + +static void show_params(void) +{ + printf( +"\n" +"Running with scheduling policy %s and priority %d. Using %d print threads.\n", + policy_name(sched_policy), sched_pri, nr_threads); + if (trace_enable) { + if (use_random_sleep) { + printf( +"%s will be printed with random delay\n" +"Start size of the probability table:\t\t\t%d\n" +"Print a message when the prob. table changes size:\t%s\n" +"Print a warning when an event has been lost:\t\t%s\n" +"Sleep time is:\t\t\t\t\t\t%ld ms\n", +debug_tracefile, +table_startsize, +bool2str(verbose_sizechange()), +bool2str(verbose_lostevent()), +sleep_time / USEC_PER_MSEC); + } else { + printf("%s will be printed immediately\n", + debug_tracefile); + } + } else { + printf("%s will not be printed\n", + debug_tracefile); + } + if (setup_ftrace) { + printf("Tracer:\t\t\t\t\t\t\t%s\n" + "%s option:\t\t\t\t\t%s\n" + "%s option:\t\t\t\t\t%s\n", + current_tracer, + optstr[OPTIDX_FUNC_TR], + bool2str(use_options[OPTIDX_FUNC_TR]), + optstr[OPTIDX_DISP_GR], + bool2str(use_options[OPTIDX_DISP_GR])); + if (!strcmp(threshold, "0")) + printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n"); + else + printf("Threshold:\t\t\t\t\t\t%s\n", threshold); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + init_save_state(); + signal_blocking(SIG_BLOCK); + setup_sig_handler(); + open_stdout(); + + if (argc >= 1) + prg_name = argv[0]; + else + prg_name = prg_unknown; + + scan_arguments(argc, argv); + show_params(); + + init_printstate(); + init_print_mtx(); + if (use_random_sleep) { + init_probabilities(); + if (verbose_sizechange()) + printf("Initializing probability table to %d\n", + table_startsize); + sleeptable_resize(table_startsize, false, NULL); + } + set_priority(); + init_queue(&printqueue); + start_printthread(); + tracing_loop(); + return 0; +} |