diff options
Diffstat (limited to 'tools')
82 files changed, 2650 insertions, 533 deletions
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 3ef0d9051e10..7031a4bf87a0 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -74,6 +74,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", }; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f506c68b2612..4e455018da65 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -806,7 +806,7 @@ union bpf_attr { * based on a user-provided identifier for all traffic coming from * the tasks belonging to the related cgroup. See also the related * kernel documentation, available from the Linux sources in file - * *Documentation/cgroup-v1/net_cls.rst*. + * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. * * The Linux kernel has two versions for cgroups: there are * cgroups v1 and cgroups v2. Both are available to users, who can @@ -3245,7 +3245,7 @@ struct bpf_sock_addr { __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 user_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __u32 user_port; /* Allows 4-byte read and write. @@ -3257,7 +3257,7 @@ struct bpf_sock_addr { __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __bpf_md_ptr(struct bpf_sock *, sk); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ed07789b3e62..794dd5064ae8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4126,8 +4126,8 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, } attr.size = sizeof(attr); attr.type = type; - attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */ - attr.config2 = offset; /* kprobe_addr or probe_offset */ + attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ + attr.config2 = offset; /* kprobe_addr or probe_offset */ /* pid filter is meaningful only for uprobes */ pfd = syscall(__NR_perf_event_open, &attr, diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index b33740221b7e..5007b5d4fd2c 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -517,7 +517,8 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, err = -errno; goto out_socket; } - strncpy(xsk->ifname, ifname, IFNAMSIZ); + strncpy(xsk->ifname, ifname, IFNAMSIZ - 1); + xsk->ifname[IFNAMSIZ - 1] = '\0'; err = xsk_set_xdp_socket_config(&xsk->config, usr_config); if (err) diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index 580e344db3dd..ced3765c4f44 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -11,22 +11,24 @@ #include "elf.h" #include "cfi.h" -#define INSN_JUMP_CONDITIONAL 1 -#define INSN_JUMP_UNCONDITIONAL 2 -#define INSN_JUMP_DYNAMIC 3 -#define INSN_CALL 4 -#define INSN_CALL_DYNAMIC 5 -#define INSN_RETURN 6 -#define INSN_CONTEXT_SWITCH 7 -#define INSN_STACK 8 -#define INSN_BUG 9 -#define INSN_NOP 10 -#define INSN_STAC 11 -#define INSN_CLAC 12 -#define INSN_STD 13 -#define INSN_CLD 14 -#define INSN_OTHER 15 -#define INSN_LAST INSN_OTHER +enum insn_type { + INSN_JUMP_CONDITIONAL, + INSN_JUMP_UNCONDITIONAL, + INSN_JUMP_DYNAMIC, + INSN_JUMP_DYNAMIC_CONDITIONAL, + INSN_CALL, + INSN_CALL_DYNAMIC, + INSN_RETURN, + INSN_CONTEXT_SWITCH, + INSN_STACK, + INSN_BUG, + INSN_NOP, + INSN_STAC, + INSN_CLAC, + INSN_STD, + INSN_CLD, + INSN_OTHER, +}; enum op_dest_type { OP_DEST_REG, @@ -68,7 +70,7 @@ void arch_initial_func_cfi_state(struct cfi_state *state); int arch_decode_instruction(struct elf *elf, struct section *sec, unsigned long offset, unsigned int maxlen, - unsigned int *len, unsigned char *type, + unsigned int *len, enum insn_type *type, unsigned long *immediate, struct stack_op *op); bool arch_callee_saved_reg(unsigned char reg); diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 584568f27a83..0567c47a91b1 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -68,7 +68,7 @@ bool arch_callee_saved_reg(unsigned char reg) int arch_decode_instruction(struct elf *elf, struct section *sec, unsigned long offset, unsigned int maxlen, - unsigned int *len, unsigned char *type, + unsigned int *len, enum insn_type *type, unsigned long *immediate, struct stack_op *op) { struct insn insn; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 172f99195726..5f26620f13f5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -18,6 +18,8 @@ #define FAKE_JUMP_OFFSET -1 +#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" + struct alternative { struct list_head list; struct instruction *insn; @@ -95,6 +97,20 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, for (insn = next_insn_same_sec(file, insn); insn; \ insn = next_insn_same_sec(file, insn)) +static bool is_sibling_call(struct instruction *insn) +{ + /* An indirect jump is either a sibling call or a jump to a table. */ + if (insn->type == INSN_JUMP_DYNAMIC) + return list_empty(&insn->alts); + + if (insn->type != INSN_JUMP_CONDITIONAL && + insn->type != INSN_JUMP_UNCONDITIONAL) + return false; + + /* add_jump_destinations() sets insn->call_dest for sibling calls. */ + return !!insn->call_dest; +} + /* * This checks to see if the given function is a "noreturn" function. * @@ -103,14 +119,9 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, * * For local functions, we have to detect them manually by simply looking for * the lack of a return instruction. - * - * Returns: - * -1: error - * 0: no dead end - * 1: dead end */ -static int __dead_end_function(struct objtool_file *file, struct symbol *func, - int recursion) +static bool __dead_end_function(struct objtool_file *file, struct symbol *func, + int recursion) { int i; struct instruction *insn; @@ -136,30 +147,33 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "rewind_stack_do_exit", }; + if (!func) + return false; + if (func->bind == STB_WEAK) - return 0; + return false; if (func->bind == STB_GLOBAL) for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) if (!strcmp(func->name, global_noreturns[i])) - return 1; + return true; if (!func->len) - return 0; + return false; insn = find_insn(file, func->sec, func->offset); if (!insn->func) - return 0; + return false; func_for_each_insn_all(file, func, insn) { empty = false; if (insn->type == INSN_RETURN) - return 0; + return false; } if (empty) - return 0; + return false; /* * A function can have a sibling call instead of a return. In that @@ -167,40 +181,31 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, * of the sibling call returns. */ func_for_each_insn_all(file, func, insn) { - if (insn->type == INSN_JUMP_UNCONDITIONAL) { + if (is_sibling_call(insn)) { struct instruction *dest = insn->jump_dest; if (!dest) /* sibling call to another file */ - return 0; - - if (dest->func && dest->func->pfunc != insn->func->pfunc) { + return false; - /* local sibling call */ - if (recursion == 5) { - /* - * Infinite recursion: two functions - * have sibling calls to each other. - * This is a very rare case. It means - * they aren't dead ends. - */ - return 0; - } - - return __dead_end_function(file, dest->func, - recursion + 1); + /* local sibling call */ + if (recursion == 5) { + /* + * Infinite recursion: two functions have + * sibling calls to each other. This is a very + * rare case. It means they aren't dead ends. + */ + return false; } - } - if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) - /* sibling call */ - return 0; + return __dead_end_function(file, dest->func, recursion+1); + } } - return 1; + return true; } -static int dead_end_function(struct objtool_file *file, struct symbol *func) +static bool dead_end_function(struct objtool_file *file, struct symbol *func) { return __dead_end_function(file, func, 0); } @@ -262,19 +267,12 @@ static int decode_instructions(struct objtool_file *file) if (ret) goto err; - if (!insn->type || insn->type > INSN_LAST) { - WARN_FUNC("invalid instruction type %d", - insn->sec, insn->offset, insn->type); - ret = -1; - goto err; - } - hash_add(file->insn_hash, &insn->hash, insn->offset); list_add_tail(&insn->list, &file->insn_list); } list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) + if (func->type != STT_FUNC || func->alias != func) continue; if (!find_insn(file, sec, func->offset)) { @@ -284,8 +282,7 @@ static int decode_instructions(struct objtool_file *file) } func_for_each_insn(file, func, insn) - if (!insn->func) - insn->func = func; + insn->func = func; } } @@ -488,6 +485,7 @@ static const char *uaccess_safe_builtin[] = { /* misc */ "csum_partial_copy_generic", "__memcpy_mcsafe", + "mcsafe_handle_tail", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ NULL }; @@ -505,7 +503,7 @@ static void add_uaccess_safe(struct objtool_file *file) if (!func) continue; - func->alias->uaccess_safe = true; + func->uaccess_safe = true; } } @@ -577,13 +575,16 @@ static int add_jump_destinations(struct objtool_file *file) * Retpoline jumps are really dynamic jumps in * disguise, so convert them accordingly. */ - insn->type = INSN_JUMP_DYNAMIC; + if (insn->type == INSN_JUMP_UNCONDITIONAL) + insn->type = INSN_JUMP_DYNAMIC; + else + insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + insn->retpoline_safe = true; continue; } else { - /* sibling call */ + /* external sibling call */ insn->call_dest = rela->sym; - insn->jump_dest = NULL; continue; } @@ -623,7 +624,7 @@ static int add_jump_destinations(struct objtool_file *file) * However this code can't completely replace the * read_symbols() code because this doesn't detect the * case where the parent function's only reference to a - * subfunction is through a switch table. + * subfunction is through a jump table. */ if (!strstr(insn->func->name, ".cold.") && strstr(insn->jump_dest->func->name, ".cold.")) { @@ -633,9 +634,8 @@ static int add_jump_destinations(struct objtool_file *file) } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && insn->jump_dest->offset == insn->jump_dest->func->offset) { - /* sibling class */ + /* internal sibling call */ insn->call_dest = insn->jump_dest->func; - insn->jump_dest = NULL; } } } @@ -896,20 +896,26 @@ out: return ret; } -static int add_switch_table(struct objtool_file *file, struct instruction *insn, - struct rela *table, struct rela *next_table) +static int add_jump_table(struct objtool_file *file, struct instruction *insn, + struct rela *table) { struct rela *rela = table; - struct instruction *alt_insn; + struct instruction *dest_insn; struct alternative *alt; struct symbol *pfunc = insn->func->pfunc; unsigned int prev_offset = 0; - list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) { - if (rela == next_table) + /* + * Each @rela is a switch table relocation which points to the target + * instruction. + */ + list_for_each_entry_from(rela, &table->sec->rela_list, list) { + + /* Check for the end of the table: */ + if (rela != table && rela->jump_table_start) break; - /* Make sure the switch table entries are consecutive: */ + /* Make sure the table entries are consecutive: */ if (prev_offset && rela->offset != prev_offset + 8) break; @@ -918,12 +924,12 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, rela->addend == pfunc->offset) break; - alt_insn = find_insn(file, rela->sym->sec, rela->addend); - if (!alt_insn) + dest_insn = find_insn(file, rela->sym->sec, rela->addend); + if (!dest_insn) break; - /* Make sure the jmp dest is in the function or subfunction: */ - if (alt_insn->func->pfunc != pfunc) + /* Make sure the destination is in the same function: */ + if (!dest_insn->func || dest_insn->func->pfunc != pfunc) break; alt = malloc(sizeof(*alt)); @@ -932,7 +938,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, return -1; } - alt->insn = alt_insn; + alt->insn = dest_insn; list_add_tail(&alt->list, &insn->alts); prev_offset = rela->offset; } @@ -947,7 +953,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, } /* - * find_switch_table() - Given a dynamic jump, find the switch jump table in + * find_jump_table() - Given a dynamic jump, find the switch jump table in * .rodata associated with it. * * There are 3 basic patterns: @@ -989,13 +995,13 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, * * NOTE: RETPOLINE made it harder still to decode dynamic jumps. */ -static struct rela *find_switch_table(struct objtool_file *file, +static struct rela *find_jump_table(struct objtool_file *file, struct symbol *func, struct instruction *insn) { - struct rela *text_rela, *rodata_rela; + struct rela *text_rela, *table_rela; struct instruction *orig_insn = insn; - struct section *rodata_sec; + struct section *table_sec; unsigned long table_offset; /* @@ -1028,42 +1034,52 @@ static struct rela *find_switch_table(struct objtool_file *file, continue; table_offset = text_rela->addend; - rodata_sec = text_rela->sym->sec; + table_sec = text_rela->sym->sec; if (text_rela->type == R_X86_64_PC32) table_offset += 4; /* * Make sure the .rodata address isn't associated with a - * symbol. gcc jump tables are anonymous data. + * symbol. GCC jump tables are anonymous data. + * + * Also support C jump tables which are in the same format as + * switch jump tables. For objtool to recognize them, they + * need to be placed in the C_JUMP_TABLE_SECTION section. They + * have symbols associated with them. */ - if (find_symbol_containing(rodata_sec, table_offset)) + if (find_symbol_containing(table_sec, table_offset) && + strcmp(table_sec->name, C_JUMP_TABLE_SECTION)) continue; - rodata_rela = find_rela_by_dest(rodata_sec, table_offset); - if (rodata_rela) { - /* - * Use of RIP-relative switch jumps is quite rare, and - * indicates a rare GCC quirk/bug which can leave dead - * code behind. - */ - if (text_rela->type == R_X86_64_PC32) - file->ignore_unreachables = true; + /* Each table entry has a rela associated with it. */ + table_rela = find_rela_by_dest(table_sec, table_offset); + if (!table_rela) + continue; - return rodata_rela; - } + /* + * Use of RIP-relative switch jumps is quite rare, and + * indicates a rare GCC quirk/bug which can leave dead code + * behind. + */ + if (text_rela->type == R_X86_64_PC32) + file->ignore_unreachables = true; + + return table_rela; } return NULL; } - -static int add_func_switch_tables(struct objtool_file *file, - struct symbol *func) +/* + * First pass: Mark the head of each jump table so that in the next pass, + * we know when a given jump table ends and the next one starts. + */ +static void mark_func_jump_tables(struct objtool_file *file, + struct symbol *func) { - struct instruction *insn, *last = NULL, *prev_jump = NULL; - struct rela *rela, *prev_rela = NULL; - int ret; + struct instruction *insn, *last = NULL; + struct rela *rela; func_for_each_insn_all(file, func, insn) { if (!last) @@ -1071,7 +1087,7 @@ static int add_func_switch_tables(struct objtool_file *file, /* * Store back-pointers for unconditional forward jumps such - * that find_switch_table() can back-track using those and + * that find_jump_table() can back-track using those and * avoid some potentially confusing code. */ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && @@ -1086,27 +1102,25 @@ static int add_func_switch_tables(struct objtool_file *file, if (insn->type != INSN_JUMP_DYNAMIC) continue; - rela = find_switch_table(file, func, insn); - if (!rela) - continue; - - /* - * We found a switch table, but we don't know yet how big it - * is. Don't add it until we reach the end of the function or - * the beginning of another switch table in the same function. - */ - if (prev_jump) { - ret = add_switch_table(file, prev_jump, prev_rela, rela); - if (ret) - return ret; + rela = find_jump_table(file, func, insn); + if (rela) { + rela->jump_table_start = true; + insn->jump_table = rela; } - - prev_jump = insn; - prev_rela = rela; } +} + +static int add_func_jump_tables(struct objtool_file *file, + struct symbol *func) +{ + struct instruction *insn; + int ret; - if (prev_jump) { - ret = add_switch_table(file, prev_jump, prev_rela, NULL); + func_for_each_insn_all(file, func, insn) { + if (!insn->jump_table) + continue; + + ret = add_jump_table(file, insn, insn->jump_table); if (ret) return ret; } @@ -1119,7 +1133,7 @@ static int add_func_switch_tables(struct objtool_file *file, * section which contains a list of addresses within the function to jump to. * This finds these jump tables and adds them to the insn->alts lists. */ -static int add_switch_table_alts(struct objtool_file *file) +static int add_jump_table_alts(struct objtool_file *file) { struct section *sec; struct symbol *func; @@ -1133,7 +1147,8 @@ static int add_switch_table_alts(struct objtool_file *file) if (func->type != STT_FUNC) continue; - ret = add_func_switch_tables(file, func); + mark_func_jump_tables(file, func); + ret = add_func_jump_tables(file, func); if (ret) return ret; } @@ -1277,13 +1292,18 @@ static void mark_rodata(struct objtool_file *file) bool found = false; /* - * This searches for the .rodata section or multiple .rodata.func_name - * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8 - * rodata sections are ignored as they don't contain jump tables. + * Search for the following rodata sections, each of which can + * potentially contain jump tables: + * + * - .rodata: can contain GCC switch tables + * - .rodata.<func>: same, if -fdata-sections is being used + * - .rodata..c_jump_table: contains C annotated jump tables + * + * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file, sec) { - if (!strncmp(sec->name, ".rodata", 7) && - !strstr(sec->name, ".str1.")) { + if ((!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.")) || + !strcmp(sec->name, C_JUMP_TABLE_SECTION)) { sec->rodata = true; found = true; } @@ -1325,7 +1345,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = add_switch_table_alts(file); + ret = add_jump_table_alts(file); if (ret) return ret; @@ -1873,12 +1893,12 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state) static inline bool func_uaccess_safe(struct symbol *func) { if (func) - return func->alias->uaccess_safe; + return func->uaccess_safe; return false; } -static inline const char *insn_dest_name(struct instruction *insn) +static inline const char *call_dest_name(struct instruction *insn) { if (insn->call_dest) return insn->call_dest->name; @@ -1890,13 +1910,13 @@ static int validate_call(struct instruction *insn, struct insn_state *state) { if (state->uaccess && !func_uaccess_safe(insn->call_dest)) { WARN_FUNC("call to %s() with UACCESS enabled", - insn->sec, insn->offset, insn_dest_name(insn)); + insn->sec, insn->offset, call_dest_name(insn)); return 1; } if (state->df) { WARN_FUNC("call to %s() with DF set", - insn->sec, insn->offset, insn_dest_name(insn)); + insn->sec, insn->offset, call_dest_name(insn)); return 1; } @@ -1920,13 +1940,12 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st * each instruction and validate all the rules described in * tools/objtool/Documentation/stack-validation.txt. */ -static int validate_branch(struct objtool_file *file, struct instruction *first, - struct insn_state state) +static int validate_branch(struct objtool_file *file, struct symbol *func, + struct instruction *first, struct insn_state state) { struct alternative *alt; struct instruction *insn, *next_insn; struct section *sec; - struct symbol *func = NULL; int ret; insn = first; @@ -1947,9 +1966,6 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, return 1; } - if (insn->func) - func = insn->func->pfunc; - if (func && insn->ignore) { WARN_FUNC("BUG: why am I validating an ignored function?", sec, insn->offset); @@ -1971,7 +1987,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, i = insn; save_insn = NULL; - func_for_each_insn_continue_reverse(file, insn->func, i) { + func_for_each_insn_continue_reverse(file, func, i) { if (i->save) { save_insn = i; break; @@ -2017,7 +2033,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, if (alt->skip_orig) skip_orig = true; - ret = validate_branch(file, alt->insn, state); + ret = validate_branch(file, func, alt->insn, state); if (ret) { if (backtrace) BT_FUNC("(alt)", insn); @@ -2055,7 +2071,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, if (state.bp_scratch) { WARN("%s uses BP as a scratch register", - insn->func->name); + func->name); return 1; } @@ -2067,36 +2083,28 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, if (ret) return ret; - if (insn->type == INSN_CALL) { - if (is_fentry_call(insn)) - break; - - ret = dead_end_function(file, insn->call_dest); - if (ret == 1) - return 0; - if (ret == -1) - return 1; - } - - if (!no_fp && func && !has_valid_stack_frame(&state)) { + if (!no_fp && func && !is_fentry_call(insn) && + !has_valid_stack_frame(&state)) { WARN_FUNC("call without frame pointer save/setup", sec, insn->offset); return 1; } + + if (dead_end_function(file, insn->call_dest)) + return 0; + break; case INSN_JUMP_CONDITIONAL: case INSN_JUMP_UNCONDITIONAL: - if (func && !insn->jump_dest) { + if (func && is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; - } else if (insn->jump_dest && - (!func || !insn->jump_dest->func || - insn->jump_dest->func->pfunc == func)) { - ret = validate_branch(file, insn->jump_dest, - state); + } else if (insn->jump_dest) { + ret = validate_branch(file, func, + insn->jump_dest, state); if (ret) { if (backtrace) BT_FUNC("(branch)", insn); @@ -2110,13 +2118,17 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, break; case INSN_JUMP_DYNAMIC: - if (func && list_empty(&insn->alts)) { + case INSN_JUMP_DYNAMIC_CONDITIONAL: + if (func && is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; } - return 0; + if (insn->type == INSN_JUMP_DYNAMIC) + return 0; + + break; case INSN_CONTEXT_SWITCH: if (func && (!next_insn || !next_insn->hint)) { @@ -2162,7 +2174,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, break; case INSN_CLAC: - if (!state.uaccess && insn->func) { + if (!state.uaccess && func) { WARN_FUNC("redundant UACCESS disable", sec, insn->offset); return 1; } @@ -2183,7 +2195,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, break; case INSN_CLD: - if (!state.df && insn->func) + if (!state.df && func) WARN_FUNC("redundant CLD", sec, insn->offset); state.df = false; @@ -2222,7 +2234,7 @@ static int validate_unwind_hints(struct objtool_file *file) for_each_insn(file, insn) { if (insn->hint && !insn->visited) { - ret = validate_branch(file, insn, state); + ret = validate_branch(file, insn->func, insn, state); if (ret && backtrace) BT_FUNC("<=== (hint)", insn); warnings += ret; @@ -2345,16 +2357,25 @@ static int validate_functions(struct objtool_file *file) for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC || func->pfunc != func) + if (func->type != STT_FUNC) + continue; + + if (!func->len) { + WARN("%s() is missing an ELF size annotation", + func->name); + warnings++; + } + + if (func->pfunc != func || func->alias != func) continue; insn = find_insn(file, sec, func->offset); - if (!insn || insn->ignore) + if (!insn || insn->ignore || insn->visited) continue; - state.uaccess = func->alias->uaccess_safe; + state.uaccess = func->uaccess_safe; - ret = validate_branch(file, insn, state); + ret = validate_branch(file, func, insn, state); if (ret && backtrace) BT_FUNC("<=== (func)", insn); warnings += ret; @@ -2407,7 +2428,7 @@ int check(const char *_objname, bool orc) objname = _objname; - file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY); + file.elf = elf_read(objname, orc ? O_RDWR : O_RDONLY); if (!file.elf) return 1; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index cb60b9acf5cf..b881fafcf55d 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -31,13 +31,14 @@ struct instruction { struct section *sec; unsigned long offset; unsigned int len; - unsigned char type; + enum insn_type type; unsigned long immediate; bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; bool retpoline_safe; struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; + struct rela *jump_table; struct list_head alts; struct symbol *func; struct stack_op stack_op; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index e99e1be19ad9..edba4745f25a 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -278,7 +278,7 @@ static int read_symbols(struct elf *elf) } if (sym->offset == s->offset) { - if (sym->len == s->len && alias == sym) + if (sym->len && sym->len == s->len && alias == sym) alias = s; if (sym->len >= s->len) { @@ -385,7 +385,7 @@ static int read_relas(struct elf *elf) rela->offset = rela->rela.r_offset; symndx = GELF_R_SYM(rela->rela.r_info); rela->sym = find_symbol_by_index(elf, symndx); - rela->rela_sec = sec; + rela->sec = sec; if (!rela->sym) { WARN("can't find rela entry symbol %d for %s", symndx, sec->name); @@ -401,7 +401,7 @@ static int read_relas(struct elf *elf) return 0; } -struct elf *elf_open(const char *name, int flags) +struct elf *elf_read(const char *name, int flags) { struct elf *elf; Elf_Cmd cmd; @@ -463,7 +463,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, { struct section *sec, *shstrtab; size_t size = entsize * nr; - struct Elf_Scn *s; + Elf_Scn *s; Elf_Data *data; sec = malloc(sizeof(*sec)); diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index e44ca5d51871..44150204db4d 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -57,11 +57,12 @@ struct rela { struct list_head list; struct hlist_node hash; GElf_Rela rela; - struct section *rela_sec; + struct section *sec; struct symbol *sym; unsigned int type; unsigned long offset; int addend; + bool jump_table_start; }; struct elf { @@ -74,7 +75,7 @@ struct elf { }; -struct elf *elf_open(const char *name, int flags); +struct elf *elf_read(const char *name, int flags); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_name(struct elf *elf, const char *name); diff --git a/tools/pci/Makefile b/tools/pci/Makefile index 6876ee4bd78c..4b95a5176355 100644 --- a/tools/pci/Makefile +++ b/tools/pci/Makefile @@ -18,7 +18,6 @@ ALL_TARGETS := pcitest ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) SCRIPTS := pcitest.sh -ALL_SCRIPTS := $(patsubst %,$(OUTPUT)%,$(SCRIPTS)) all: $(ALL_PROGRAMS) @@ -47,10 +46,10 @@ clean: install: $(ALL_PROGRAMS) install -d -m 755 $(DESTDIR)$(bindir); \ - for program in $(ALL_PROGRAMS) pcitest.sh; do \ + for program in $(ALL_PROGRAMS); do \ install $$program $(DESTDIR)$(bindir); \ done; \ - for script in $(ALL_SCRIPTS); do \ + for script in $(SCRIPTS); do \ install $$script $(DESTDIR)$(bindir); \ done diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index cb7a47dfd8b6..cb1e51fcc84e 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -36,15 +36,15 @@ struct pci_test { unsigned long size; }; -static void run_test(struct pci_test *test) +static int run_test(struct pci_test *test) { - long ret; + int ret = -EINVAL; int fd; fd = open(test->device, O_RDWR); if (fd < 0) { perror("can't open PCI Endpoint Test device"); - return; + return -ENODEV; } if (test->barnum >= 0 && test->barnum <= 5) { @@ -212,7 +212,7 @@ usage: "\t-r Read buffer test\n" "\t-w Write buffer test\n" "\t-c Copy buffer test\n" - "\t-s <size> Size of buffer {default: 100KB}\n", + "\t-s <size> Size of buffer {default: 100KB}\n" "\t-h Print this help message\n", argv[0]); return -EINVAL; diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index b6866a05edd2..ed3ecfa422e1 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -194,12 +194,13 @@ PROBE ARGUMENT -------------- Each probe argument follows below syntax. - [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] + [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE][@user] 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters. 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo (*). Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail) On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. +"@user" is a special attribute which means the LOCALVAR will be treated as a user-space memory. This is only valid for kprobe event. TYPES ----- diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 79367087bd18..8f24865596af 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2289,6 +2289,12 @@ static int process_switch_event(struct perf_tool *tool, if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; + if (scripting_ops && scripting_ops->process_switch) + scripting_ops->process_switch(event, sample, machine); + + if (!script->show_switch_events) + return 0; + thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) { @@ -2467,7 +2473,7 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap = process_mmap_event; script->tool.mmap2 = process_mmap2_event; } - if (script->show_switch_events) + if (script->show_switch_events || (scripting_ops && scripting_ops->process_switch)) script->tool.context_switch = process_switch_event; if (script->show_namespace_events) script->tool.namespaces = process_namespaces_event; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1aa2ed096f65..4f0bbffee05f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -19,6 +19,7 @@ #include <api/fs/tracing_path.h> #include <bpf/bpf.h> #include "util/bpf_map.h" +#include "util/rlimit.h" #include "builtin.h" #include "util/cgroup.h" #include "util/color.h" @@ -3864,6 +3865,15 @@ int cmd_trace(int argc, const char **argv) goto out; } + /* + * Parsing .perfconfig may entail creating a BPF event, that may need + * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting + * is too small. This affects just this process, not touching the + * global setting. If it fails we'll get something in 'perf trace -v' + * to help diagnose the problem. + */ + rlimit__bump_memlock(); + err = perf_config(trace__config, &trace); if (err) goto out; diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index f470144d1a70..bf114ca9ca87 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -19,6 +19,7 @@ static struct version version; static struct option version_options[] = { OPT_BOOLEAN(0, "build-options", &version.build_options, "display the build options"), + OPT_END(), }; static const char * const version_usage[] = { diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json b/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json new file mode 100644 index 000000000000..17fb5241928b --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json @@ -0,0 +1,58 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "0", + "EventName": "CPU_CYCLES", + "BriefDescription": "CPU Cycles", + "PublicDescription": "Cycle Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "1", + "EventName": "INSTRUCTIONS", + "BriefDescription": "Instructions", + "PublicDescription": "Instruction Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "2", + "EventName": "L1I_DIR_WRITES", + "BriefDescription": "L1I Directory Writes", + "PublicDescription": "Level-1 I-Cache Directory Write Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "3", + "EventName": "L1I_PENALTY_CYCLES", + "BriefDescription": "L1I Penalty Cycles", + "PublicDescription": "Level-1 I-Cache Penalty Cycle Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "4", + "EventName": "L1D_DIR_WRITES", + "BriefDescription": "L1D Directory Writes", + "PublicDescription": "Level-1 D-Cache Directory Write Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "5", + "EventName": "L1D_PENALTY_CYCLES", + "BriefDescription": "L1D Penalty Cycles", + "PublicDescription": "Level-1 D-Cache Penalty Cycle Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "32", + "EventName": "PROBLEM_STATE_CPU_CYCLES", + "BriefDescription": "Problem-State CPU Cycles", + "PublicDescription": "Problem-State Cycle Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "33", + "EventName": "PROBLEM_STATE_INSTRUCTIONS", + "BriefDescription": "Problem-State Instructions", + "PublicDescription": "Problem-State Instruction Count" + }, +] diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json new file mode 100644 index 000000000000..db286f19e7b6 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json @@ -0,0 +1,114 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "64", + "EventName": "PRNG_FUNCTIONS", + "BriefDescription": "PRNG Functions", + "PublicDescription": "Total number of the PRNG functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "65", + "EventName": "PRNG_CYCLES", + "BriefDescription": "PRNG Cycles", + "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "66", + "EventName": "PRNG_BLOCKED_FUNCTIONS", + "BriefDescription": "PRNG Blocked Functions", + "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "67", + "EventName": "PRNG_BLOCKED_CYCLES", + "BriefDescription": "PRNG Blocked Cycles", + "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "68", + "EventName": "SHA_FUNCTIONS", + "BriefDescription": "SHA Functions", + "PublicDescription": "Total number of SHA functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "69", + "EventName": "SHA_CYCLES", + "BriefDescription": "SHA Cycles", + "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "70", + "EventName": "SHA_BLOCKED_FUNCTIONS", + "BriefDescription": "SHA Blocked Functions", + "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "71", + "EventName": "SHA_BLOCKED_CYCLES", + "BriefDescription": "SHA Bloced Cycles", + "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "72", + "EventName": "DEA_FUNCTIONS", + "BriefDescription": "DEA Functions", + "PublicDescription": "Total number of the DEA functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "73", + "EventName": "DEA_CYCLES", + "BriefDescription": "DEA Cycles", + "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "74", + "EventName": "DEA_BLOCKED_FUNCTIONS", + "BriefDescription": "DEA Blocked Functions", + "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "75", + "EventName": "DEA_BLOCKED_CYCLES", + "BriefDescription": "DEA Blocked Cycles", + "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "76", + "EventName": "AES_FUNCTIONS", + "BriefDescription": "AES Functions", + "PublicDescription": "Total number of AES functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "77", + "EventName": "AES_CYCLES", + "BriefDescription": "AES Cycles", + "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "78", + "EventName": "AES_BLOCKED_FUNCTIONS", + "BriefDescription": "AES Blocked Functions", + "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "79", + "EventName": "AES_BLOCKED_CYCLES", + "BriefDescription": "AES Blocked Cycles", + "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, +] diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json new file mode 100644 index 000000000000..5e36bc2468d0 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json @@ -0,0 +1,30 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "80", + "EventName": "ECC_FUNCTION_COUNT", + "BriefDescription": "ECC Function Count", + "PublicDescription": "Long ECC function Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "81", + "EventName": "ECC_CYCLES_COUNT", + "BriefDescription": "ECC Cycles Count", + "PublicDescription": "Long ECC Function cycles count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "82", + "EventName": "ECC_BLOCKED_FUNCTION_COUNT", + "BriefDescription": "Ecc Blocked Function Count", + "PublicDescription": "Long ECC blocked function count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "83", + "EventName": "ECC_BLOCKED_CYCLES_COUNT", + "BriefDescription": "ECC Blocked Cycles Count", + "PublicDescription": "Long ECC blocked cycles count" + }, +] diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json b/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json new file mode 100644 index 000000000000..89e070727e1b --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json @@ -0,0 +1,373 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "128", + "EventName": "L1D_RO_EXCL_WRITES", + "BriefDescription": "L1D Read-only Exclusive Writes", + "PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "129", + "EventName": "DTLB2_WRITES", + "BriefDescription": "DTLB2 Writes", + "PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "130", + "EventName": "DTLB2_MISSES", + "BriefDescription": "DTLB2 Misses", + "PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "131", + "EventName": "DTLB2_HPAGE_WRITES", + "BriefDescription": "DTLB2 One-Megabyte Page Writes", + "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "132", + "EventName": "DTLB2_GPAGE_WRITES", + "BriefDescription": "DTLB2 Two-Gigabyte Page Writes", + "PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "133", + "EventName": "L1D_L2D_SOURCED_WRITES", + "BriefDescription": "L1D L2D Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "134", + "EventName": "ITLB2_WRITES", + "BriefDescription": "ITLB2 Writes", + "PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "135", + "EventName": "ITLB2_MISSES", + "BriefDescription": "ITLB2 Misses", + "PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "136", + "EventName": "L1I_L2I_SOURCED_WRITES", + "BriefDescription": "L1I L2I Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "137", + "EventName": "TLB2_PTE_WRITES", + "BriefDescription": "TLB2 PTE Writes", + "PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "138", + "EventName": "TLB2_CRSTE_WRITES", + "BriefDescription": "TLB2 CRSTE Writes", + "PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "139", + "EventName": "TLB2_ENGINES_BUSY", + "BriefDescription": "TLB2 Engines Busy", + "PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "140", + "EventName": "TX_C_TEND", + "BriefDescription": "Completed TEND instructions in constrained TX mode", + "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "141", + "EventName": "TX_NC_TEND", + "BriefDescription": "Completed TEND instructions in non-constrained TX mode", + "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "143", + "EventName": "L1C_TLB2_MISSES", + "BriefDescription": "L1C TLB2 Misses", + "PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "144", + "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES", + "BriefDescription": "L1D On-Chip L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "145", + "EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1D On-Chip Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "146", + "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "147", + "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES", + "BriefDescription": "L1D On-Cluster L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "148", + "EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1D On-Cluster Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "149", + "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "150", + "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES", + "BriefDescription": "L1D Off-Cluster L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "151", + "EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1D Off-Cluster Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "152", + "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "153", + "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES", + "BriefDescription": "L1D Off-Drawer L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "154", + "EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1D Off-Drawer Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "155", + "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "156", + "EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES", + "BriefDescription": "L1D On-Drawer L4 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "157", + "EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES", + "BriefDescription": "L1D Off-Drawer L4 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "158", + "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO", + "BriefDescription": "L1D On-Chip L3 Sourced Writes read-only", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "162", + "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES", + "BriefDescription": "L1I On-Chip L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "163", + "EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1I On-Chip Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "164", + "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "165", + "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES", + "BriefDescription": "L1I On-Cluster L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "166", + "EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1I On-Cluster Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "167", + "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "168", + "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES", + "BriefDescription": "L1I Off-Cluster L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "169", + "EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1I Off-Cluster Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "170", + "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "171", + "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES", + "BriefDescription": "L1I Off-Drawer L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "172", + "EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1I Off-Drawer Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "173", + "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "174", + "EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES", + "BriefDescription": "L1I On-Drawer L4 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "175", + "EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES", + "BriefDescription": "L1I Off-Drawer L4 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "224", + "EventName": "BCD_DFP_EXECUTION_SLOTS", + "BriefDescription": "BCD DFP Execution Slots", + "PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "225", + "EventName": "VX_BCD_EXECUTION_SLOTS", + "BriefDescription": "VX BCD Execution Slots", + "PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "226", + "EventName": "DECIMAL_INSTRUCTIONS", + "BriefDescription": "Decimal Instructions", + "PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "232", + "EventName": "LAST_HOST_TRANSLATIONS", + "BriefDescription": "Last host translation done", + "PublicDescription": "Last Host Translation done" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "243", + "EventName": "TX_NC_TABORT", + "BriefDescription": "Aborted transactions in non-constrained TX mode", + "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "244", + "EventName": "TX_C_TABORT_NO_SPECIAL", + "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic", + "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "245", + "EventName": "TX_C_TABORT_SPECIAL", + "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", + "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "448", + "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE", + "BriefDescription": "Cycle count with one thread active", + "PublicDescription": "Cycle count with one thread active" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "449", + "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", + "BriefDescription": "Cycle count with two threads active", + "PublicDescription": "Cycle count with two threads active" + }, +] diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv index 78bcf7f8e206..bd3fc577139c 100644 --- a/tools/perf/pmu-events/arch/s390/mapfile.csv +++ b/tools/perf/pmu-events/arch/s390/mapfile.csv @@ -4,3 +4,4 @@ Family-model,Version,Filename,EventType ^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core ^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core ^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core +^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_m8561,core diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 92713d93e956..7bd73a904b4e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -353,7 +353,10 @@ do_query(query, 'CREATE TABLE threads (' 'tid integer)') do_query(query, 'CREATE TABLE comms (' 'id bigint NOT NULL,' - 'comm varchar(16))') + 'comm varchar(16),' + 'c_thread_id bigint,' + 'c_time bigint,' + 'exec_flag boolean)') do_query(query, 'CREATE TABLE comm_threads (' 'id bigint NOT NULL,' 'comm_id bigint,' @@ -479,6 +482,17 @@ do_query(query, 'CREATE TABLE pwrx (' 'last_cstate integer,' 'wake_reason integer)') +do_query(query, 'CREATE TABLE context_switches (' + 'id bigint NOT NULL,' + 'machine_id bigint,' + 'time bigint,' + 'cpu integer,' + 'thread_out_id bigint,' + 'comm_out_id bigint,' + 'thread_in_id bigint,' + 'comm_in_id bigint,' + 'flags integer)') + do_query(query, 'CREATE VIEW machines_view AS ' 'SELECT ' 'id,' @@ -692,6 +706,29 @@ do_query(query, 'CREATE VIEW power_events_view AS ' ' INNER JOIN selected_events ON selected_events.id = samples.evsel_id' ' ORDER BY samples.id') +do_query(query, 'CREATE VIEW context_switches_view AS ' + 'SELECT ' + 'context_switches.id,' + 'context_switches.machine_id,' + 'context_switches.time,' + 'context_switches.cpu,' + 'th_out.pid AS pid_out,' + 'th_out.tid AS tid_out,' + 'comm_out.comm AS comm_out,' + 'th_in.pid AS pid_in,' + 'th_in.tid AS tid_in,' + 'comm_in.comm AS comm_in,' + 'CASE WHEN context_switches.flags = 0 THEN \'in\'' + ' WHEN context_switches.flags = 1 THEN \'out\'' + ' WHEN context_switches.flags = 3 THEN \'out preempt\'' + ' ELSE CAST ( context_switches.flags AS VARCHAR(11) )' + 'END AS flags' + ' FROM context_switches' + ' INNER JOIN threads AS th_out ON th_out.id = context_switches.thread_out_id' + ' INNER JOIN threads AS th_in ON th_in.id = context_switches.thread_in_id' + ' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id' + ' INNER JOIN comms AS comm_in ON comm_in.id = context_switches.comm_in_id') + file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0) file_trailer = b"\377\377" @@ -756,6 +793,7 @@ mwait_file = open_output_file("mwait_table.bin") pwre_file = open_output_file("pwre_table.bin") exstop_file = open_output_file("exstop_table.bin") pwrx_file = open_output_file("pwrx_table.bin") +context_switches_file = open_output_file("context_switches_table.bin") def trace_begin(): printdate("Writing to intermediate files...") @@ -763,7 +801,7 @@ def trace_begin(): evsel_table(0, "unknown") machine_table(0, 0, "unknown") thread_table(0, 0, 0, -1, -1) - comm_table(0, "unknown") + comm_table(0, "unknown", 0, 0, 0) dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) @@ -804,6 +842,7 @@ def trace_end(): copy_output_file(pwre_file, "pwre") copy_output_file(exstop_file, "exstop") copy_output_file(pwrx_file, "pwrx") + copy_output_file(context_switches_file, "context_switches") printdate("Removing intermediate files...") remove_output_file(evsel_file) @@ -825,6 +864,7 @@ def trace_end(): remove_output_file(pwre_file) remove_output_file(exstop_file) remove_output_file(pwrx_file) + remove_output_file(context_switches_file) os.rmdir(output_dir_name) printdate("Adding primary keys") do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') @@ -846,11 +886,14 @@ def trace_end(): do_query(query, 'ALTER TABLE pwre ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE exstop ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE pwrx ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE context_switches ADD PRIMARY KEY (id)') printdate("Adding foreign keys") do_query(query, 'ALTER TABLE threads ' 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)') + do_query(query, 'ALTER TABLE comms ' + 'ADD CONSTRAINT threadfk FOREIGN KEY (c_thread_id) REFERENCES threads (id)') do_query(query, 'ALTER TABLE comm_threads ' 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),' 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id)') @@ -881,6 +924,8 @@ def trace_end(): 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)') do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') + do_query(query, 'ALTER TABLE comms ADD has_calls boolean') + do_query(query, 'UPDATE comms SET has_calls = TRUE WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)') do_query(query, 'ALTER TABLE ptwrite ' 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') do_query(query, 'ALTER TABLE cbr ' @@ -893,6 +938,12 @@ def trace_end(): 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') do_query(query, 'ALTER TABLE pwrx ' 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') + do_query(query, 'ALTER TABLE context_switches ' + 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' + 'ADD CONSTRAINT toutfk FOREIGN KEY (thread_out_id) REFERENCES threads (id),' + 'ADD CONSTRAINT tinfk FOREIGN KEY (thread_in_id) REFERENCES threads (id),' + 'ADD CONSTRAINT coutfk FOREIGN KEY (comm_out_id) REFERENCES comms (id),' + 'ADD CONSTRAINT cinfk FOREIGN KEY (comm_in_id) REFERENCES comms (id)') printdate("Dropping unused tables") if is_table_empty("ptwrite"): @@ -905,6 +956,8 @@ def trace_end(): drop("pwrx") if is_table_empty("cbr"): drop("cbr") + if is_table_empty("context_switches"): + drop("context_switches") if (unhandled_count): printdate("Warning: ", unhandled_count, " unhandled events") @@ -935,11 +988,11 @@ def thread_table(thread_id, machine_id, process_id, pid, tid, *x): value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid) thread_file.write(value) -def comm_table(comm_id, comm_str, *x): +def comm_table(comm_id, comm_str, thread_id, time, exec_flag, *x): comm_str = toserverstr(comm_str) n = len(comm_str) - fmt = "!hiqi" + str(n) + "s" - value = struct.pack(fmt, 2, 8, comm_id, n, comm_str) + fmt = "!hiqi" + str(n) + "s" + "iqiqiB" + value = struct.pack(fmt, 5, 8, comm_id, n, comm_str, 8, thread_id, 8, time, 1, exec_flag) comm_file.write(value) def comm_thread_table(comm_thread_id, comm_id, thread_id, *x): @@ -1051,3 +1104,8 @@ def synth_data(id, config, raw_buf, *x): pwrx(id, raw_buf) elif config == 5: cbr(id, raw_buf) + +def context_switch_table(id, machine_id, time, cpu, thread_out_id, comm_out_id, thread_in_id, comm_in_id, flags, *x): + fmt = "!hiqiqiqiiiqiqiqiqii" + value = struct.pack(fmt, 9, 8, id, 8, machine_id, 8, time, 4, cpu, 8, thread_out_id, 8, comm_out_id, 8, thread_in_id, 8, comm_in_id, 4, flags) + context_switches_file.write(value) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 021326c46285..8043a7272a56 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -177,7 +177,10 @@ do_query(query, 'CREATE TABLE threads (' 'tid integer)') do_query(query, 'CREATE TABLE comms (' 'id integer NOT NULL PRIMARY KEY,' - 'comm varchar(16))') + 'comm varchar(16),' + 'c_thread_id bigint,' + 'c_time bigint,' + 'exec_flag boolean)') do_query(query, 'CREATE TABLE comm_threads (' 'id integer NOT NULL PRIMARY KEY,' 'comm_id bigint,' @@ -303,6 +306,17 @@ do_query(query, 'CREATE TABLE pwrx (' 'last_cstate integer,' 'wake_reason integer)') +do_query(query, 'CREATE TABLE context_switches (' + 'id integer NOT NULL PRIMARY KEY,' + 'machine_id bigint,' + 'time bigint,' + 'cpu integer,' + 'thread_out_id bigint,' + 'comm_out_id bigint,' + 'thread_in_id bigint,' + 'comm_in_id bigint,' + 'flags integer)') + # printf was added to sqlite in version 3.8.3 sqlite_has_printf = False try: @@ -527,6 +541,29 @@ do_query(query, 'CREATE VIEW power_events_view AS ' ' INNER JOIN selected_events ON selected_events.id = evsel_id' ' WHERE selected_events.name IN (\'cbr\',\'mwait\',\'exstop\',\'pwre\',\'pwrx\')') +do_query(query, 'CREATE VIEW context_switches_view AS ' + 'SELECT ' + 'context_switches.id,' + 'context_switches.machine_id,' + 'context_switches.time,' + 'context_switches.cpu,' + 'th_out.pid AS pid_out,' + 'th_out.tid AS tid_out,' + 'comm_out.comm AS comm_out,' + 'th_in.pid AS pid_in,' + 'th_in.tid AS tid_in,' + 'comm_in.comm AS comm_in,' + 'CASE WHEN context_switches.flags = 0 THEN \'in\'' + ' WHEN context_switches.flags = 1 THEN \'out\'' + ' WHEN context_switches.flags = 3 THEN \'out preempt\'' + ' ELSE context_switches.flags ' + 'END AS flags' + ' FROM context_switches' + ' INNER JOIN threads AS th_out ON th_out.id = context_switches.thread_out_id' + ' INNER JOIN threads AS th_in ON th_in.id = context_switches.thread_in_id' + ' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id' + ' INNER JOIN comms AS comm_in ON comm_in.id = context_switches.comm_in_id') + do_query(query, 'END TRANSACTION') evsel_query = QSqlQuery(db) @@ -536,7 +573,7 @@ machine_query.prepare("INSERT INTO machines VALUES (?, ?, ?)") thread_query = QSqlQuery(db) thread_query.prepare("INSERT INTO threads VALUES (?, ?, ?, ?, ?)") comm_query = QSqlQuery(db) -comm_query.prepare("INSERT INTO comms VALUES (?, ?)") +comm_query.prepare("INSERT INTO comms VALUES (?, ?, ?, ?, ?)") comm_thread_query = QSqlQuery(db) comm_thread_query.prepare("INSERT INTO comm_threads VALUES (?, ?, ?)") dso_query = QSqlQuery(db) @@ -568,6 +605,8 @@ exstop_query = QSqlQuery(db) exstop_query.prepare("INSERT INTO exstop VALUES (?, ?)") pwrx_query = QSqlQuery(db) pwrx_query.prepare("INSERT INTO pwrx VALUES (?, ?, ?, ?)") +context_switch_query = QSqlQuery(db) +context_switch_query.prepare("INSERT INTO context_switches VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)") def trace_begin(): printdate("Writing records...") @@ -576,7 +615,7 @@ def trace_begin(): evsel_table(0, "unknown") machine_table(0, 0, "unknown") thread_table(0, 0, 0, -1, -1) - comm_table(0, "unknown") + comm_table(0, "unknown", 0, 0, 0) dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) @@ -603,6 +642,8 @@ def trace_end(): if perf_db_export_calls: do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') + do_query(query, 'ALTER TABLE comms ADD has_calls boolean') + do_query(query, 'UPDATE comms SET has_calls = 1 WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)') printdate("Dropping unused tables") if is_table_empty("ptwrite"): @@ -615,6 +656,8 @@ def trace_end(): drop("pwrx") if is_table_empty("cbr"): drop("cbr") + if is_table_empty("context_switches"): + drop("context_switches") if (unhandled_count): printdate("Warning: ", unhandled_count, " unhandled events") @@ -642,7 +685,7 @@ def thread_table(*x): bind_exec(thread_query, 5, x) def comm_table(*x): - bind_exec(comm_query, 2, x) + bind_exec(comm_query, 5, x) def comm_thread_table(*x): bind_exec(comm_thread_query, 3, x) @@ -748,3 +791,6 @@ def synth_data(id, config, raw_buf, *x): pwrx(id, raw_buf) elif config == 5: cbr(id, raw_buf) + +def context_switch_table(*x): + bind_exec(context_switch_query, 9, x) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 6e7934f2ac9a..61b3911d91e6 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -392,7 +392,7 @@ class FindBar(): self.hbox.addWidget(self.close_button) self.bar = QWidget() - self.bar.setLayout(self.hbox); + self.bar.setLayout(self.hbox) self.bar.hide() def Widget(self): @@ -470,7 +470,7 @@ class CallGraphLevelItemBase(object): self.params = params self.row = row self.parent_item = parent_item - self.query_done = False; + self.query_done = False self.child_count = 0 self.child_items = [] if parent_item: @@ -517,7 +517,7 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): self.time = time def Select(self): - self.query_done = True; + self.query_done = True query = QSqlQuery(self.glb.db) if self.params.have_ipc: ipc_str = ", SUM(insn_count), SUM(cyc_count)" @@ -604,7 +604,7 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase): self.dbid = comm_id def Select(self): - self.query_done = True; + self.query_done = True query = QSqlQuery(self.glb.db) QueryExec(query, "SELECT thread_id, pid, tid" " FROM comm_threads" @@ -622,9 +622,12 @@ class CallGraphRootItem(CallGraphLevelItemBase): def __init__(self, glb, params): super(CallGraphRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 - self.query_done = True; + self.query_done = True + if_has_calls = "" + if IsSelectable(glb.db, "comms", columns = "has_calls"): + if_has_calls = " WHERE has_calls = TRUE" query = QSqlQuery(glb.db) - QueryExec(query, "SELECT id, comm FROM comms") + QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls) while query.next(): if not query.value(0): continue @@ -793,7 +796,7 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): self.time = time def Select(self): - self.query_done = True; + self.query_done = True if self.calls_id == 0: comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id) else: @@ -881,7 +884,7 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase): self.dbid = comm_id def Select(self): - self.query_done = True; + self.query_done = True query = QSqlQuery(self.glb.db) QueryExec(query, "SELECT thread_id, pid, tid" " FROM comm_threads" @@ -899,9 +902,12 @@ class CallTreeRootItem(CallGraphLevelItemBase): def __init__(self, glb, params): super(CallTreeRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 - self.query_done = True; + self.query_done = True + if_has_calls = "" + if IsSelectable(glb.db, "comms", columns = "has_calls"): + if_has_calls = " WHERE has_calls = TRUE" query = QSqlQuery(glb.db) - QueryExec(query, "SELECT id, comm FROM comms") + QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls) while query.next(): if not query.value(0): continue @@ -971,7 +977,7 @@ class VBox(): def __init__(self, w1, w2, w3=None): self.vbox = QWidget() - self.vbox.setLayout(QVBoxLayout()); + self.vbox.setLayout(QVBoxLayout()) self.vbox.layout().setContentsMargins(0, 0, 0, 0) @@ -1391,7 +1397,7 @@ class FetchMoreRecordsBar(): self.hbox.addWidget(self.close_button) self.bar = QWidget() - self.bar.setLayout(self.hbox); + self.bar.setLayout(self.hbox) self.bar.show() self.in_progress = False @@ -2206,7 +2212,7 @@ class ReportDialogBase(QDialog): self.vbox.addLayout(self.grid) self.vbox.addLayout(self.hbox) - self.setLayout(self.vbox); + self.setLayout(self.vbox) def Ok(self): vars = self.report_vars @@ -3139,7 +3145,7 @@ class AboutDialog(QDialog): self.vbox = QVBoxLayout() self.vbox.addWidget(self.text) - self.setLayout(self.vbox); + self.setLayout(self.vbox) # Font resize diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 66a82badc1d1..c3bec9d2c201 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -21,6 +21,7 @@ #include <subcmd/parse-options.h> #include "string2.h" #include "symbol.h" +#include "util/rlimit.h" #include <linux/kernel.h> #include <linux/string.h> #include <subcmd/exec-cmd.h> @@ -727,6 +728,11 @@ int cmd_test(int argc, const char **argv) if (skip != NULL) skiplist = intlist__new(skip); + /* + * Tests that create BPF maps, for instance, need more than the 64K + * default: + */ + rlimit__bump_memlock(); return __cmd_test(argc, argv, skiplist); } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d7e3b008a613..14f812bb07a7 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -20,6 +20,7 @@ perf-y += parse-events.o perf-y += perf_regs.o perf-y += path.o perf-y += print_binary.o +perf-y += rlimit.o perf-y += argv_split.o perf-y += rbtree.o perf-y += libstring.o diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 67b88b599a53..3d1c34fc4d68 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2460,7 +2460,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* Something went wrong, no need to continue */ if (!inode) { - err = PTR_ERR(inode); + err = -ENOMEM; goto err_free_metadata; } @@ -2517,8 +2517,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event, session->auxtrace = &etm->auxtrace; etm->unknown_thread = thread__new(999999999, 999999999); - if (!etm->unknown_thread) + if (!etm->unknown_thread) { + err = -ENOMEM; goto err_free_queues; + } /* * Initialize list node so that at thread__zput() we can avoid @@ -2530,8 +2532,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_delete_thread; - if (thread__init_map_groups(etm->unknown_thread, etm->machine)) + if (thread__init_map_groups(etm->unknown_thread, etm->machine)) { + err = -ENOMEM; goto err_delete_thread; + } if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); @@ -2575,5 +2579,5 @@ err_free_traceid_list: err_free_hdr: zfree(&hdr); - return -EINVAL; + return err; } diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 2394c7506abe..ffbb3e7d3288 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -20,70 +20,14 @@ #include "db-export.h" #include <linux/zalloc.h> -struct deferred_export { - struct list_head node; - struct comm *comm; -}; - -static int db_export__deferred(struct db_export *dbe) -{ - struct deferred_export *de; - int err; - - while (!list_empty(&dbe->deferred)) { - de = list_entry(dbe->deferred.next, struct deferred_export, - node); - err = dbe->export_comm(dbe, de->comm); - list_del_init(&de->node); - free(de); - if (err) - return err; - } - - return 0; -} - -static void db_export__free_deferred(struct db_export *dbe) -{ - struct deferred_export *de; - - while (!list_empty(&dbe->deferred)) { - de = list_entry(dbe->deferred.next, struct deferred_export, - node); - list_del_init(&de->node); - free(de); - } -} - -static int db_export__defer_comm(struct db_export *dbe, struct comm *comm) -{ - struct deferred_export *de; - - de = zalloc(sizeof(struct deferred_export)); - if (!de) - return -ENOMEM; - - de->comm = comm; - list_add_tail(&de->node, &dbe->deferred); - - return 0; -} - int db_export__init(struct db_export *dbe) { memset(dbe, 0, sizeof(struct db_export)); - INIT_LIST_HEAD(&dbe->deferred); return 0; } -int db_export__flush(struct db_export *dbe) -{ - return db_export__deferred(dbe); -} - void db_export__exit(struct db_export *dbe) { - db_export__free_deferred(dbe); call_return_processor__free(dbe->crp); dbe->crp = NULL; } @@ -115,71 +59,73 @@ int db_export__machine(struct db_export *dbe, struct machine *machine) } int db_export__thread(struct db_export *dbe, struct thread *thread, - struct machine *machine, struct comm *comm) + struct machine *machine, struct thread *main_thread) { - struct thread *main_thread; u64 main_thread_db_id = 0; - int err; if (thread->db_id) return 0; thread->db_id = ++dbe->thread_last_db_id; - if (thread->pid_ != -1) { - if (thread->pid_ == thread->tid) { - main_thread = thread; - } else { - main_thread = machine__findnew_thread(machine, - thread->pid_, - thread->pid_); - if (!main_thread) - return -ENOMEM; - err = db_export__thread(dbe, main_thread, machine, - comm); - if (err) - goto out_put; - if (comm) { - err = db_export__comm_thread(dbe, comm, thread); - if (err) - goto out_put; - } - } + if (main_thread) main_thread_db_id = main_thread->db_id; - if (main_thread != thread) - thread__put(main_thread); - } if (dbe->export_thread) return dbe->export_thread(dbe, thread, main_thread_db_id, machine); return 0; +} -out_put: - thread__put(main_thread); - return err; +static int __db_export__comm(struct db_export *dbe, struct comm *comm, + struct thread *thread) +{ + comm->db_id = ++dbe->comm_last_db_id; + + if (dbe->export_comm) + return dbe->export_comm(dbe, comm, thread); + + return 0; } int db_export__comm(struct db_export *dbe, struct comm *comm, - struct thread *main_thread) + struct thread *thread) +{ + if (comm->db_id) + return 0; + + return __db_export__comm(dbe, comm, thread); +} + +/* + * Export the "exec" comm. The "exec" comm is the program / application command + * name at the time it first executes. It is used to group threads for the same + * program. Note that the main thread pid (or thread group id tgid) cannot be + * used because it does not change when a new program is exec'ed. + */ +int db_export__exec_comm(struct db_export *dbe, struct comm *comm, + struct thread *main_thread) { int err; if (comm->db_id) return 0; - comm->db_id = ++dbe->comm_last_db_id; - - if (dbe->export_comm) { - if (main_thread->comm_set) - err = dbe->export_comm(dbe, comm); - else - err = db_export__defer_comm(dbe, comm); - if (err) - return err; - } + err = __db_export__comm(dbe, comm, main_thread); + if (err) + return err; + /* + * Record the main thread for this comm. Note that the main thread can + * have many "exec" comms because there will be a new one every time it + * exec's. An "exec" comm however will only ever have 1 main thread. + * That is different to any other threads for that same program because + * exec() will effectively kill them, so the relationship between the + * "exec" comm and non-main threads is 1-to-1. That is why + * db_export__comm_thread() is called here for the main thread, but it + * is called for non-main threads when they are exported. + */ return db_export__comm_thread(dbe, comm, main_thread); } @@ -340,11 +286,65 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type, return 0; } +static int db_export__threads(struct db_export *dbe, struct thread *thread, + struct thread *main_thread, + struct machine *machine, struct comm **comm_ptr) +{ + struct comm *comm = NULL; + struct comm *curr_comm; + int err; + + if (main_thread) { + /* + * A thread has a reference to the main thread, so export the + * main thread first. + */ + err = db_export__thread(dbe, main_thread, machine, main_thread); + if (err) + return err; + /* + * Export comm before exporting the non-main thread because + * db_export__comm_thread() can be called further below. + */ + comm = machine__thread_exec_comm(machine, main_thread); + if (comm) { + err = db_export__exec_comm(dbe, comm, main_thread); + if (err) + return err; + *comm_ptr = comm; + } + } + + if (thread != main_thread) { + /* + * For a non-main thread, db_export__comm_thread() must be + * called only if thread has not previously been exported. + */ + bool export_comm_thread = comm && !thread->db_id; + + err = db_export__thread(dbe, thread, machine, main_thread); + if (err) + return err; + + if (export_comm_thread) { + err = db_export__comm_thread(dbe, comm, thread); + if (err) + return err; + } + } + + curr_comm = thread__comm(thread); + if (curr_comm) + return db_export__comm(dbe, curr_comm, thread); + + return 0; +} + int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { - struct thread* thread = al->thread; + struct thread *thread = al->thread; struct export_sample es = { .event = event, .sample = sample, @@ -364,19 +364,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, return err; main_thread = thread__main_thread(al->machine, thread); - if (main_thread) - comm = machine__thread_exec_comm(al->machine, main_thread); - err = db_export__thread(dbe, thread, al->machine, comm); + err = db_export__threads(dbe, thread, main_thread, al->machine, &comm); if (err) goto out_put; - if (comm) { - err = db_export__comm(dbe, comm, main_thread); - if (err) - goto out_put; + if (comm) es.comm_db_id = comm->db_id; - } es.db_id = ++dbe->sample_last_db_id; @@ -525,3 +519,92 @@ int db_export__call_return(struct db_export *dbe, struct call_return *cr, return 0; } + +static int db_export__pid_tid(struct db_export *dbe, struct machine *machine, + pid_t pid, pid_t tid, u64 *db_id, + struct comm **comm_ptr, bool *is_idle) +{ + struct thread *thread = machine__find_thread(machine, pid, tid); + struct thread *main_thread; + int err = 0; + + if (!thread || !thread->comm_set) + goto out_put; + + *is_idle = !thread->pid_ && !thread->tid; + + main_thread = thread__main_thread(machine, thread); + + err = db_export__threads(dbe, thread, main_thread, machine, comm_ptr); + + *db_id = thread->db_id; + + thread__put(main_thread); +out_put: + thread__put(thread); + + return err; +} + +int db_export__switch(struct db_export *dbe, union perf_event *event, + struct perf_sample *sample, struct machine *machine) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + bool out_preempt = out && + (event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT); + int flags = out | (out_preempt << 1); + bool is_idle_a = false, is_idle_b = false; + u64 th_a_id = 0, th_b_id = 0; + u64 comm_out_id, comm_in_id; + struct comm *comm_a = NULL; + struct comm *comm_b = NULL; + u64 th_out_id, th_in_id; + u64 db_id; + int err; + + err = db_export__machine(dbe, machine); + if (err) + return err; + + err = db_export__pid_tid(dbe, machine, sample->pid, sample->tid, + &th_a_id, &comm_a, &is_idle_a); + if (err) + return err; + + if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) { + pid_t pid = event->context_switch.next_prev_pid; + pid_t tid = event->context_switch.next_prev_tid; + + err = db_export__pid_tid(dbe, machine, pid, tid, &th_b_id, + &comm_b, &is_idle_b); + if (err) + return err; + } + + /* + * Do not export if both threads are unknown (i.e. not being traced), + * or one is unknown and the other is the idle task. + */ + if ((!th_a_id || is_idle_a) && (!th_b_id || is_idle_b)) + return 0; + + db_id = ++dbe->context_switch_last_db_id; + + if (out) { + th_out_id = th_a_id; + th_in_id = th_b_id; + comm_out_id = comm_a ? comm_a->db_id : 0; + comm_in_id = comm_b ? comm_b->db_id : 0; + } else { + th_out_id = th_b_id; + th_in_id = th_a_id; + comm_out_id = comm_b ? comm_b->db_id : 0; + comm_in_id = comm_a ? comm_a->db_id : 0; + } + + if (dbe->export_context_switch) + return dbe->export_context_switch(dbe, db_id, machine, sample, + th_out_id, comm_out_id, + th_in_id, comm_in_id, flags); + return 0; +} diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index e8a64028a386..ba1f62a5fe10 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -43,7 +43,8 @@ struct db_export { int (*export_machine)(struct db_export *dbe, struct machine *machine); int (*export_thread)(struct db_export *dbe, struct thread *thread, u64 main_thread_db_id, struct machine *machine); - int (*export_comm)(struct db_export *dbe, struct comm *comm); + int (*export_comm)(struct db_export *dbe, struct comm *comm, + struct thread *thread); int (*export_comm_thread)(struct db_export *dbe, u64 db_id, struct comm *comm, struct thread *thread); int (*export_dso)(struct db_export *dbe, struct dso *dso, @@ -56,6 +57,11 @@ struct db_export { int (*export_call_path)(struct db_export *dbe, struct call_path *cp); int (*export_call_return)(struct db_export *dbe, struct call_return *cr); + int (*export_context_switch)(struct db_export *dbe, u64 db_id, + struct machine *machine, + struct perf_sample *sample, + u64 th_out_id, u64 comm_out_id, + u64 th_in_id, u64 comm_in_id, int flags); struct call_return_processor *crp; struct call_path_root *cpr; u64 evsel_last_db_id; @@ -68,18 +74,19 @@ struct db_export { u64 sample_last_db_id; u64 call_path_last_db_id; u64 call_return_last_db_id; - struct list_head deferred; + u64 context_switch_last_db_id; }; int db_export__init(struct db_export *dbe); -int db_export__flush(struct db_export *dbe); void db_export__exit(struct db_export *dbe); int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel); int db_export__machine(struct db_export *dbe, struct machine *machine); int db_export__thread(struct db_export *dbe, struct thread *thread, - struct machine *machine, struct comm *comm); + struct machine *machine, struct thread *main_thread); int db_export__comm(struct db_export *dbe, struct comm *comm, - struct thread *main_thread); + struct thread *thread); +int db_export__exec_comm(struct db_export *dbe, struct comm *comm, + struct thread *main_thread); int db_export__comm_thread(struct db_export *dbe, struct comm *comm, struct thread *thread); int db_export__dso(struct db_export *dbe, struct dso *dso, @@ -97,5 +104,7 @@ int db_export__branch_types(struct db_export *dbe); int db_export__call_path(struct db_export *dbe, struct call_path *cp); int db_export__call_return(struct db_export *dbe, struct call_return *cr, u64 *parent_db_id); +int db_export__switch(struct db_export *dbe, union perf_event *event, + struct perf_sample *sample, struct machine *machine); #endif diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0c3b55d0617d..cd1eb73cfe83 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1562,6 +1562,17 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg) str = tmp + 1; } + tmp = strchr(str, '@'); + if (tmp && tmp != str && strcmp(tmp + 1, "user")) { /* user attr */ + if (!user_access_is_supported()) { + semantic_error("ftrace does not support user access\n"); + return -EINVAL; + } + *tmp = '\0'; + arg->user_access = true; + pr_debug("user_access "); + } + tmp = strchr(str, ':'); if (tmp) { /* Type setting */ *tmp = '\0'; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 05c8d571a901..96a319cd2378 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -37,6 +37,7 @@ struct probe_trace_point { struct probe_trace_arg_ref { struct probe_trace_arg_ref *next; /* Next reference */ long offset; /* Offset value */ + bool user_access; /* User-memory access */ }; /* kprobe-tracer and uprobe-tracer tracing argument */ @@ -82,6 +83,7 @@ struct perf_probe_arg { char *var; /* Variable name */ char *type; /* Type name */ struct perf_probe_arg_field *field; /* Structure fields */ + bool user_access; /* User-memory access */ }; /* Perf probe probing event (point + arg) */ diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index c2998f90b23c..5b4d49382932 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -1005,6 +1005,7 @@ enum ftrace_readme { FTRACE_README_PROBE_TYPE_X = 0, FTRACE_README_KRETPROBE_OFFSET, FTRACE_README_UPROBE_REF_CTR, + FTRACE_README_USER_ACCESS, FTRACE_README_END, }; @@ -1017,6 +1018,7 @@ static struct { DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"), DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"), DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"), + DEFINE_TYPE(FTRACE_README_USER_ACCESS, "*[u]<offset>*"), }; static bool scan_ftrace_readme(enum ftrace_readme type) @@ -1077,3 +1079,8 @@ bool uprobe_ref_ctr_is_supported(void) { return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR); } + +bool user_access_is_supported(void) +{ + return scan_ftrace_readme(FTRACE_README_USER_ACCESS); +} diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 2a249182f2a6..986c1c94f64f 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -70,6 +70,7 @@ int probe_cache__show_all_caches(struct strfilter *filter); bool probe_type_is_available(enum probe_type type); bool kretprobe_offset_is_supported(void); bool uprobe_ref_ctr_is_supported(void); +bool user_access_is_supported(void); #else /* ! HAVE_LIBELF_SUPPORT */ static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 7d8c99734928..025fc4491993 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -280,7 +280,7 @@ static_var: static int convert_variable_type(Dwarf_Die *vr_die, struct probe_trace_arg *tvar, - const char *cast) + const char *cast, bool user_access) { struct probe_trace_arg_ref **ref_ptr = &tvar->ref; Dwarf_Die type; @@ -320,7 +320,8 @@ static int convert_variable_type(Dwarf_Die *vr_die, pr_debug("%s type is %s.\n", dwarf_diename(vr_die), dwarf_diename(&type)); - if (cast && strcmp(cast, "string") == 0) { /* String type */ + if (cast && (!strcmp(cast, "string") || !strcmp(cast, "ustring"))) { + /* String type */ ret = dwarf_tag(&type); if (ret != DW_TAG_pointer_type && ret != DW_TAG_array_type) { @@ -343,6 +344,7 @@ static int convert_variable_type(Dwarf_Die *vr_die, pr_warning("Out of memory error\n"); return -ENOMEM; } + (*ref_ptr)->user_access = user_access; } if (!die_compare_name(&type, "char") && !die_compare_name(&type, "unsigned char")) { @@ -397,7 +399,7 @@ formatted: static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, struct perf_probe_arg_field *field, struct probe_trace_arg_ref **ref_ptr, - Dwarf_Die *die_mem) + Dwarf_Die *die_mem, bool user_access) { struct probe_trace_arg_ref *ref = *ref_ptr; Dwarf_Die type; @@ -434,6 +436,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, *ref_ptr = ref; } ref->offset += dwarf_bytesize(&type) * field->index; + ref->user_access = user_access; goto next; } else if (tag == DW_TAG_pointer_type) { /* Check the pointer and dereference */ @@ -505,17 +508,18 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, } } ref->offset += (long)offs; + ref->user_access = user_access; /* If this member is unnamed, we need to reuse this field */ if (!dwarf_diename(die_mem)) return convert_variable_fields(die_mem, varname, field, - &ref, die_mem); + &ref, die_mem, user_access); next: /* Converting next field */ if (field->next) return convert_variable_fields(die_mem, field->name, - field->next, &ref, die_mem); + field->next, &ref, die_mem, user_access); else return 0; } @@ -541,11 +545,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) else if (ret == 0 && pf->pvar->field) { ret = convert_variable_fields(vr_die, pf->pvar->var, pf->pvar->field, &pf->tvar->ref, - &die_mem); + &die_mem, pf->pvar->user_access); vr_die = &die_mem; } if (ret == 0) - ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type); + ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type, + pf->pvar->user_access); /* *expr will be cached in libdw. Don't free it. */ return ret; } diff --git a/tools/perf/util/rlimit.c b/tools/perf/util/rlimit.c new file mode 100644 index 000000000000..13521d392a22 --- /dev/null +++ b/tools/perf/util/rlimit.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + +#include "util/debug.h" +#include "util/rlimit.h" +#include <sys/time.h> +#include <sys/resource.h> + +/* + * Bump the memlock so that we can get bpf maps of a reasonable size, + * like the ones used with 'perf trace' and with 'perf test bpf', + * improve this to some specific request if needed. + */ +void rlimit__bump_memlock(void) +{ + struct rlimit rlim; + + if (getrlimit(RLIMIT_MEMLOCK, &rlim) == 0) { + rlim.rlim_cur *= 4; + rlim.rlim_max *= 4; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) { + rlim.rlim_cur /= 2; + rlim.rlim_max /= 2; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) + pr_debug("Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc\n"); + } + } +} diff --git a/tools/perf/util/rlimit.h b/tools/perf/util/rlimit.h new file mode 100644 index 000000000000..9f59d8e710a3 --- /dev/null +++ b/tools/perf/util/rlimit.h @@ -0,0 +1,6 @@ +#ifndef __PERF_RLIMIT_H_ +#define __PERF_RLIMIT_H_ +/* SPDX-License-Identifier: LGPL-2.1 */ + +void rlimit__bump_memlock(void); +#endif // __PERF_RLIMIT_H_ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 112bed65232f..25dc1d765553 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -113,6 +113,7 @@ struct tables { PyObject *call_path_handler; PyObject *call_return_handler; PyObject *synth_handler; + PyObject *context_switch_handler; bool db_export_mode; }; @@ -1011,15 +1012,19 @@ static int python_export_thread(struct db_export *dbe, struct thread *thread, return 0; } -static int python_export_comm(struct db_export *dbe, struct comm *comm) +static int python_export_comm(struct db_export *dbe, struct comm *comm, + struct thread *thread) { struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(2); + t = tuple_new(5); tuple_set_u64(t, 0, comm->db_id); tuple_set_string(t, 1, comm__str(comm)); + tuple_set_u64(t, 2, thread->db_id); + tuple_set_u64(t, 3, comm->start); + tuple_set_s32(t, 4, comm->exec); call_object(tables->comm_handler, t, "comm_table"); @@ -1233,6 +1238,34 @@ static int python_export_call_return(struct db_export *dbe, return 0; } +static int python_export_context_switch(struct db_export *dbe, u64 db_id, + struct machine *machine, + struct perf_sample *sample, + u64 th_out_id, u64 comm_out_id, + u64 th_in_id, u64 comm_in_id, int flags) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(9); + + tuple_set_u64(t, 0, db_id); + tuple_set_u64(t, 1, machine->db_id); + tuple_set_u64(t, 2, sample->time); + tuple_set_s32(t, 3, sample->cpu); + tuple_set_u64(t, 4, th_out_id); + tuple_set_u64(t, 5, comm_out_id); + tuple_set_u64(t, 6, th_in_id); + tuple_set_u64(t, 7, comm_in_id); + tuple_set_s32(t, 8, flags); + + call_object(tables->context_switch_handler, t, "context_switch"); + + Py_DECREF(t); + + return 0; +} + static int python_process_call_return(struct call_return *cr, u64 *parent_db_id, void *data) { @@ -1296,6 +1329,16 @@ static void python_process_event(union perf_event *event, } } +static void python_process_switch(union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct tables *tables = &tables_global; + + if (tables->db_export_mode) + db_export__switch(&tables->dbe, event, sample, machine); +} + static void get_handler_name(char *str, size_t size, struct perf_evsel *evsel) { @@ -1511,6 +1554,7 @@ static void set_table_handlers(struct tables *tables) SET_TABLE_HANDLER(sample); SET_TABLE_HANDLER(call_path); SET_TABLE_HANDLER(call_return); + SET_TABLE_HANDLER(context_switch); /* * Synthesized events are samples but with architecture-specific data @@ -1620,9 +1664,7 @@ error: static int python_flush_script(void) { - struct tables *tables = &tables_global; - - return db_export__flush(&tables->dbe); + return 0; } /* @@ -1831,6 +1873,7 @@ struct scripting_ops python_scripting_ops = { .flush_script = python_flush_script, .stop_script = python_stop_script, .process_event = python_process_event, + .process_switch = python_process_switch, .process_stat = python_process_stat, .process_stat_interval = python_process_stat_interval, .generate_script = python_generate_script, diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index d9b0a942090a..c7002fe11673 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -81,6 +81,9 @@ struct scripting_ops { struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al); + void (*process_switch)(union perf_event *event, + struct perf_sample *sample, + struct machine *machine); void (*process_stat)(struct perf_stat_config *config, struct perf_evsel *evsel, u64 tstamp); void (*process_stat_interval)(u64 tstamp); diff --git a/tools/power/cpupower/debug/kernel/Makefile b/tools/power/cpupower/debug/kernel/Makefile index c23e5a6ceb7e..7b5c43684be1 100644 --- a/tools/power/cpupower/debug/kernel/Makefile +++ b/tools/power/cpupower/debug/kernel/Makefile @@ -12,8 +12,8 @@ default: $(MAKE) -C $(KDIR) M=$(CURDIR) clean: - - rm -rf *.o *.ko .tmp-versions .*.cmd .*.mod.* *.mod.c - - rm -rf .tmp_versions* Module.symvers modules.order + - rm -rf *.o *.ko .*.cmd .*.mod.* *.mod.c + - rm -rf Module.symvers modules.order install: default install -d $(KMISC) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2620406a53ec..11c9c62c3362 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../../../scripts/Kbuild.include +include ../../../scripts/Makefile.arch LIBDIR := ../../../lib BPFDIR := $(LIBDIR)/bpf @@ -81,13 +83,14 @@ all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c $(CC) -o $@ $< -Wl,--build-id -$(OUTPUT)/test_maps: map_tests/*.c +$(OUTPUT)/test_stub.o: test_stub.c + $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) -c -o $@ $< BPFOBJ := $(OUTPUT)/libbpf.a -$(TEST_GEN_PROGS): test_stub.o $(BPFOBJ) +$(TEST_GEN_PROGS): $(OUTPUT)/test_stub.o $(BPFOBJ) -$(TEST_GEN_PROGS_EXTENDED): test_stub.o $(OUTPUT)/libbpf.a +$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(OUTPUT)/libbpf.a $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c @@ -138,7 +141,8 @@ CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types + -Wno-compare-distinct-pointer-types \ + -D__TARGET_ARCH_$(SRCARCH) $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline @@ -172,6 +176,7 @@ endif endif TEST_PROGS_CFLAGS := -I. -I$(OUTPUT) +TEST_MAPS_CFLAGS := -I. -I$(OUTPUT) TEST_VERIFIER_CFLAGS := -I. -I$(OUTPUT) -Iverifier ifneq ($(SUBREG_CODEGEN),) @@ -180,12 +185,12 @@ TEST_CUSTOM_PROGS += $(ALU32_BUILD_DIR)/test_progs_32 $(ALU32_BUILD_DIR): mkdir -p $@ -$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read +$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read | $(ALU32_BUILD_DIR) cp $< $@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\ - $(ALU32_BUILD_DIR) \ - $(ALU32_BUILD_DIR)/urandom_read + $(ALU32_BUILD_DIR)/urandom_read \ + | $(ALU32_BUILD_DIR) $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \ -o $(ALU32_BUILD_DIR)/test_progs_32 \ test_progs.c test_stub.c trace_helpers.c prog_tests/*.c \ @@ -194,10 +199,10 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\ $(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H) $(ALU32_BUILD_DIR)/test_progs_32: prog_tests/*.c -$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \ - $(ALU32_BUILD_DIR)/test_progs_32 - $(CLANG) $(CLANG_FLAGS) \ - -O2 -target bpf -emit-llvm -c $< -o - | \ +$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR)/test_progs_32 \ + | $(ALU32_BUILD_DIR) + ($(CLANG) $(CLANG_FLAGS) -O2 -target bpf -emit-llvm -c $< -o - || \ + echo "clang failed") | \ $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \ -filetype=obj -o $@ ifeq ($(DWARF2BTF),y) @@ -208,32 +213,30 @@ endif # Have one program compiled without "-target bpf" to test whether libbpf loads # it successfully $(OUTPUT)/test_xdp.o: progs/test_xdp.c - $(CLANG) $(CLANG_FLAGS) \ - -O2 -emit-llvm -c $< -o - | \ + ($(CLANG) $(CLANG_FLAGS) -O2 -emit-llvm -c $< -o - || \ + echo "clang failed") | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif $(OUTPUT)/%.o: progs/%.c - $(CLANG) $(CLANG_FLAGS) \ - -O2 -target bpf -emit-llvm -c $< -o - | \ + ($(CLANG) $(CLANG_FLAGS) -O2 -target bpf -emit-llvm -c $< -o - || \ + echo "clang failed") | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif -PROG_TESTS_H := $(OUTPUT)/prog_tests/tests.h -test_progs.c: $(PROG_TESTS_H) -$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS) -$(OUTPUT)/test_progs: prog_tests/*.c - PROG_TESTS_DIR = $(OUTPUT)/prog_tests $(PROG_TESTS_DIR): mkdir -p $@ - +PROG_TESTS_H := $(PROG_TESTS_DIR)/tests.h PROG_TESTS_FILES := $(wildcard prog_tests/*.c) -$(PROG_TESTS_H): $(PROG_TESTS_DIR) $(PROG_TESTS_FILES) +test_progs.c: $(PROG_TESTS_H) +$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS) +$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_H) $(PROG_TESTS_FILES) +$(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR) $(shell ( cd prog_tests/; \ echo '/* Generated header, do not edit */'; \ echo '#ifdef DECLARE'; \ @@ -246,15 +249,15 @@ $(PROG_TESTS_H): $(PROG_TESTS_DIR) $(PROG_TESTS_FILES) echo '#endif' \ ) > $(PROG_TESTS_H)) -TEST_MAPS_CFLAGS := -I. -I$(OUTPUT) MAP_TESTS_DIR = $(OUTPUT)/map_tests $(MAP_TESTS_DIR): mkdir -p $@ MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h +MAP_TESTS_FILES := $(wildcard map_tests/*.c) test_maps.c: $(MAP_TESTS_H) $(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS) -MAP_TESTS_FILES := $(wildcard map_tests/*.c) -$(MAP_TESTS_H): $(MAP_TESTS_DIR) $(MAP_TESTS_FILES) +$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_H) $(MAP_TESTS_FILES) +$(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR) $(shell ( cd map_tests/; \ echo '/* Generated header, do not edit */'; \ echo '#ifdef DECLARE'; \ @@ -267,16 +270,15 @@ $(MAP_TESTS_H): $(MAP_TESTS_DIR) $(MAP_TESTS_FILES) echo '#endif' \ ) > $(MAP_TESTS_H)) -VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h -test_verifier.c: $(VERIFIER_TESTS_H) -$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS) - VERIFIER_TESTS_DIR = $(OUTPUT)/verifier $(VERIFIER_TESTS_DIR): mkdir -p $@ - +VERIFIER_TESTS_H := $(VERIFIER_TESTS_DIR)/tests.h VERIFIER_TEST_FILES := $(wildcard verifier/*.c) -$(OUTPUT)/verifier/tests.h: $(VERIFIER_TESTS_DIR) $(VERIFIER_TEST_FILES) +test_verifier.c: $(VERIFIER_TESTS_H) +$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS) +$(OUTPUT)/test_verifier: test_verifier.c $(VERIFIER_TESTS_H) +$(VERIFIER_TESTS_H): $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR) $(shell ( cd verifier/; \ echo '/* Generated header, do not edit */'; \ echo '#ifdef FILL_ARRAY'; \ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 5a3d92c8bec8..f804f210244e 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -315,8 +315,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #if defined(__TARGET_ARCH_x86) #define bpf_target_x86 #define bpf_target_defined -#elif defined(__TARGET_ARCH_s930x) - #define bpf_target_s930x +#elif defined(__TARGET_ARCH_s390) + #define bpf_target_s390 #define bpf_target_defined #elif defined(__TARGET_ARCH_arm) #define bpf_target_arm @@ -341,8 +341,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #ifndef bpf_target_defined #if defined(__x86_64__) #define bpf_target_x86 -#elif defined(__s390x__) - #define bpf_target_s930x +#elif defined(__s390__) + #define bpf_target_s390 #elif defined(__arm__) #define bpf_target_arm #elif defined(__aarch64__) @@ -358,6 +358,7 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #if defined(bpf_target_x86) +#ifdef __KERNEL__ #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) #define PT_REGS_PARM3(x) ((x)->dx) @@ -368,19 +369,49 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #define PT_REGS_RC(x) ((x)->ax) #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) +#else +#ifdef __i386__ +/* i386 kernel is built with -mregparm=3 */ +#define PT_REGS_PARM1(x) ((x)->eax) +#define PT_REGS_PARM2(x) ((x)->edx) +#define PT_REGS_PARM3(x) ((x)->ecx) +#define PT_REGS_PARM4(x) 0 +#define PT_REGS_PARM5(x) 0 +#define PT_REGS_RET(x) ((x)->esp) +#define PT_REGS_FP(x) ((x)->ebp) +#define PT_REGS_RC(x) ((x)->eax) +#define PT_REGS_SP(x) ((x)->esp) +#define PT_REGS_IP(x) ((x)->eip) +#else +#define PT_REGS_PARM1(x) ((x)->rdi) +#define PT_REGS_PARM2(x) ((x)->rsi) +#define PT_REGS_PARM3(x) ((x)->rdx) +#define PT_REGS_PARM4(x) ((x)->rcx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->rsp) +#define PT_REGS_FP(x) ((x)->rbp) +#define PT_REGS_RC(x) ((x)->rax) +#define PT_REGS_SP(x) ((x)->rsp) +#define PT_REGS_IP(x) ((x)->rip) +#endif +#endif -#elif defined(bpf_target_s390x) +#elif defined(bpf_target_s390) -#define PT_REGS_PARM1(x) ((x)->gprs[2]) -#define PT_REGS_PARM2(x) ((x)->gprs[3]) -#define PT_REGS_PARM3(x) ((x)->gprs[4]) -#define PT_REGS_PARM4(x) ((x)->gprs[5]) -#define PT_REGS_PARM5(x) ((x)->gprs[6]) -#define PT_REGS_RET(x) ((x)->gprs[14]) -#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->gprs[2]) -#define PT_REGS_SP(x) ((x)->gprs[15]) -#define PT_REGS_IP(x) ((x)->psw.addr) +/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_S390 const volatile user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) +#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) +#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) +#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) +#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) +#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) +#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) #elif defined(bpf_target_arm) @@ -397,16 +428,20 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #elif defined(bpf_target_arm64) -#define PT_REGS_PARM1(x) ((x)->regs[0]) -#define PT_REGS_PARM2(x) ((x)->regs[1]) -#define PT_REGS_PARM3(x) ((x)->regs[2]) -#define PT_REGS_PARM4(x) ((x)->regs[3]) -#define PT_REGS_PARM5(x) ((x)->regs[4]) -#define PT_REGS_RET(x) ((x)->regs[30]) -#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[0]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->pc) +/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_ARM64 const volatile struct user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) +#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) +#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) +#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) +#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) +#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) #elif defined(bpf_target_mips) @@ -452,10 +487,10 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #endif -#ifdef bpf_target_powerpc +#if defined(bpf_target_powerpc) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif bpf_target_sparc +#elif defined(bpf_target_sparc) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #else diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index a4686395522c..5ecc267d98b0 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -21,12 +21,6 @@ ssize_t get_base_addr() { return -EINVAL; } -#ifdef __x86_64__ -#define SYS_KPROBE_NAME "__x64_sys_nanosleep" -#else -#define SYS_KPROBE_NAME "sys_nanosleep" -#endif - void test_attach_probe(void) { const char *kprobe_name = "kprobe/sys_nanosleep"; @@ -84,7 +78,7 @@ void test_attach_probe(void) kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */, - SYS_KPROBE_NAME); + SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", "err %ld\n", PTR_ERR(kprobe_link))) { kprobe_link = NULL; @@ -92,7 +86,7 @@ void test_attach_probe(void) } kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */, - SYS_KPROBE_NAME); + SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe", "err %ld\n", PTR_ERR(kretprobe_link))) { kretprobe_link = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 3f1ef95865ff..3003fddc0613 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -5,12 +5,6 @@ #include <sys/socket.h> #include <test_progs.h> -#ifdef __x86_64__ -#define SYS_KPROBE_NAME "__x64_sys_nanosleep" -#else -#define SYS_KPROBE_NAME "sys_nanosleep" -#endif - static void on_sample(void *ctx, int cpu, void *data, __u32 size) { int cpu_data = *(int *)data, duration = 0; @@ -56,7 +50,7 @@ void test_perf_buffer(void) /* attach kprobe */ link = bpf_program__attach_kprobe(prog, false /* retprobe */, - SYS_KPROBE_NAME); + SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link))) goto out_close; diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 67cea1686305..54218ee3c004 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -173,6 +173,18 @@ static int test_send_signal_tracepoint(void) return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint"); } +static int test_send_signal_perf(void) +{ + struct perf_event_attr attr = { + .sample_period = 1, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + + return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, + "perf_sw_event"); +} + static int test_send_signal_nmi(void) { struct perf_event_attr attr = { @@ -181,8 +193,26 @@ static int test_send_signal_nmi(void) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; + int pmu_fd; + + /* Some setups (e.g. virtual machines) might run with hardware + * perf events disabled. If this is the case, skip this test. + */ + pmu_fd = syscall(__NR_perf_event_open, &attr, 0 /* pid */, + -1 /* cpu */, -1 /* group_fd */, 0 /* flags */); + if (pmu_fd == -1) { + if (errno == ENOENT) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", + __func__); + return 0; + } + /* Let the test fail with a more informative message */ + } else { + close(pmu_fd); + } - return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, "perf_event"); + return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, + "perf_hw_event"); } void test_send_signal(void) @@ -190,6 +220,7 @@ void test_send_signal(void) int ret = 0; ret |= test_send_signal_tracepoint(); + ret |= test_send_signal_perf(); ret |= test_send_signal_nmi(); if (!ret) printf("test_send_signal:OK\n"); diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c index dea395af9ea9..7cdb7f878310 100644 --- a/tools/testing/selftests/bpf/progs/loop1.c +++ b/tools/testing/selftests/bpf/progs/loop1.c @@ -18,7 +18,7 @@ int nested_loops(volatile struct pt_regs* ctx) for (j = 0; j < 300; j++) for (i = 0; i < j; i++) { if (j & 1) - m = ctx->rax; + m = PT_REGS_RC(ctx); else m = j; sum += i * m; diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c index 0637bd8e8bcf..9b2f808a2863 100644 --- a/tools/testing/selftests/bpf/progs/loop2.c +++ b/tools/testing/selftests/bpf/progs/loop2.c @@ -16,7 +16,7 @@ int while_true(volatile struct pt_regs* ctx) int i = 0; while (true) { - if (ctx->rax & 1) + if (PT_REGS_RC(ctx) & 1) i += 3; else i += 7; diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c index 30a0f6cba080..d727657d51e2 100644 --- a/tools/testing/selftests/bpf/progs/loop3.c +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -16,7 +16,7 @@ int while_true(volatile struct pt_regs* ctx) __u64 i = 0, sum = 0; do { i++; - sum += ctx->rax; + sum += PT_REGS_RC(ctx); } while (i < 0x100000000ULL); return sum; } diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index d06b47a09097..33254b771384 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -47,11 +47,12 @@ struct { * issue and avoid complicated C programming massaging. * This is an acceptable workaround since there is one entry here. */ +typedef __u64 raw_stack_trace_t[2 * MAX_STACK_RAWTP]; struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 1); __type(key, __u32); - __u64 (*value)[2 * MAX_STACK_RAWTP]; + __type(value, raw_stack_trace_t); } rawdata_map SEC(".maps"); SEC("tracepoint/raw_syscalls/sys_enter") diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index bbfc8337b6f0..f5638e26865d 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -36,8 +36,7 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 128); __type(key, __u32); - /* there seems to be a bug in kernel not handling typedef properly */ - struct bpf_stack_build_id (*value)[PERF_MAX_STACK_DEPTH]; + __type(value, stack_trace_t); } stack_amap SEC(".maps"); /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index 803c15dc109d..fa0be3e10a10 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -35,7 +35,7 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 16384); __type(key, __u32); - __u64 (*value)[PERF_MAX_STACK_DEPTH]; + __type(value, stack_trace_t); } stack_amap SEC(".maps"); /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index dad8a7e33eaa..e88d7b9d65ab 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -14,6 +14,7 @@ #include <linux/tcp.h> #include <linux/udp.h> #include "bpf_helpers.h" +#include "bpf_endian.h" static __u32 rol32(__u32 word, unsigned int shift) { @@ -305,7 +306,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, ip6h->nexthdr = IPPROTO_IPV6; ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; ip6h->payload_len = - __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); + bpf_htons(pkt_bytes + sizeof(struct ipv6hdr)); ip6h->hop_limit = 4; ip6h->saddr.in6_u.u6_addr32[0] = 1; @@ -322,7 +323,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, struct real_definition *dst, __u32 pkt_bytes) { - __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); + __u32 ip_suffix = bpf_ntohs(pckt->flow.port16[0]); struct eth_hdr *new_eth; struct eth_hdr *old_eth; __u16 *next_iph_u16; @@ -352,7 +353,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, iph->protocol = IPPROTO_IPIP; iph->check = 0; iph->tos = 1; - iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); + iph->tot_len = bpf_htons(pkt_bytes + sizeof(struct iphdr)); /* don't update iph->daddr, since it will overwrite old eth_proto * and multiple iterations of bpf_prog_run() will fail */ @@ -639,7 +640,7 @@ static int process_l3_headers_v6(struct packet_description *pckt, iph_len = sizeof(struct ipv6hdr); *protocol = ip6h->nexthdr; pckt->flow.proto = *protocol; - *pkt_bytes = __builtin_bswap16(ip6h->payload_len); + *pkt_bytes = bpf_ntohs(ip6h->payload_len); off += iph_len; if (*protocol == 45) { return XDP_DROP; @@ -671,7 +672,7 @@ static int process_l3_headers_v4(struct packet_description *pckt, return XDP_DROP; *protocol = iph->protocol; pckt->flow.proto = *protocol; - *pkt_bytes = __builtin_bswap16(iph->tot_len); + *pkt_bytes = bpf_ntohs(iph->tot_len); off += 20; if (iph->frag_off & 65343) return XDP_DROP; @@ -808,10 +809,10 @@ int balancer_ingress(struct xdp_md *ctx) nh_off = sizeof(struct eth_hdr); if (data + nh_off > data_end) return XDP_DROP; - eth_proto = eth->eth_proto; - if (eth_proto == 8) + eth_proto = bpf_ntohs(eth->eth_proto); + if (eth_proto == ETH_P_IP) return process_packet(data, nh_off, data_end, 0, ctx); - else if (eth_proto == 56710) + else if (eth_proto == ETH_P_IPV6) return process_packet(data, nh_off, data_end, 1, ctx); else return XDP_DROP; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 8351cb5f4a20..3d617e806054 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -3417,6 +3417,94 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 1, .max_entries = 4, }, +/* + * typedef int arr_t[16]; + * struct s { + * arr_t *a; + * }; + */ +{ + .descr = "struct->ptr->typedef->array->int size resolution", + .raw_types = { + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_PTR_ENC(3), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_ARRAY_ENC(5, 5, 16), /* [4] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0a\0arr_t"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_mod_chain_size_resolve_map", + .key_size = sizeof(int), + .value_size = sizeof(int) * 16, + .key_type_id = 5 /* int */, + .value_type_id = 3 /* arr_t */, + .max_entries = 4, +}, +/* + * typedef int arr_t[16][8][4]; + * struct s { + * arr_t *a; + * }; + */ +{ + .descr = "struct->ptr->typedef->multi-array->int size resolution", + .raw_types = { + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_PTR_ENC(3), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_ARRAY_ENC(5, 7, 16), /* [4] */ + BTF_TYPE_ARRAY_ENC(6, 7, 8), /* [5] */ + BTF_TYPE_ARRAY_ENC(7, 7, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [7] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0a\0arr_t"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "multi_arr_size_resolve_map", + .key_size = sizeof(int), + .value_size = sizeof(int) * 16 * 8 * 4, + .key_type_id = 7 /* int */, + .value_type_id = 3 /* arr_t */, + .max_entries = 4, +}, +/* + * typedef int int_t; + * typedef int_t arr3_t[4]; + * typedef arr3_t arr2_t[8]; + * typedef arr2_t arr1_t[16]; + * struct s { + * arr1_t *a; + * }; + */ +{ + .descr = "typedef/multi-arr mix size resolution", + .raw_types = { + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_PTR_ENC(3), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_ARRAY_ENC(5, 10, 16), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 6), /* [5] */ + BTF_TYPE_ARRAY_ENC(7, 10, 8), /* [6] */ + BTF_TYPEDEF_ENC(NAME_TBD, 8), /* [7] */ + BTF_TYPE_ARRAY_ENC(9, 10, 4), /* [8] */ + BTF_TYPEDEF_ENC(NAME_TBD, 10), /* [9] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [10] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0a\0arr1_t\0arr2_t\0arr3_t\0int_t"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_arra_mix_size_resolve_map", + .key_size = sizeof(int), + .value_size = sizeof(int) * 16 * 8 * 4, + .key_type_id = 10 /* int */, + .value_type_id = 3 /* arr_t */, + .max_entries = 4, +}, }; /* struct btf_raw_test raw_tests[] */ diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index f095e1d4c657..49e0f7d85643 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -92,3 +92,11 @@ int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); int extract_build_id(char *build_id, size_t size); void *spin_lock_thread(void *arg); + +#ifdef __x86_64__ +#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" +#elif defined(__s390x__) +#define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep" +#else +#define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" +#endif diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b0773291012a..84135d5f4b35 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -86,7 +86,7 @@ struct bpf_test { int fixup_sk_storage_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; - uint32_t retval, retval_unpriv, insn_processed; + uint32_t insn_processed; int prog_len; enum { UNDEF, @@ -95,16 +95,20 @@ struct bpf_test { } result, result_unpriv; enum bpf_prog_type prog_type; uint8_t flags; - __u8 data[TEST_DATA_LEN]; void (*fill_helper)(struct bpf_test *self); uint8_t runs; - struct { - uint32_t retval, retval_unpriv; - union { - __u8 data[TEST_DATA_LEN]; - __u64 data64[TEST_DATA_LEN / 8]; - }; - } retvals[MAX_TEST_RUNS]; +#define bpf_testdata_struct_t \ + struct { \ + uint32_t retval, retval_unpriv; \ + union { \ + __u8 data[TEST_DATA_LEN]; \ + __u64 data64[TEST_DATA_LEN / 8]; \ + }; \ + } + union { + bpf_testdata_struct_t; + bpf_testdata_struct_t retvals[MAX_TEST_RUNS]; + }; enum bpf_attach_type expected_attach_type; }; @@ -949,17 +953,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, uint32_t expected_val; int i; - if (!test->runs) { - expected_val = unpriv && test->retval_unpriv ? - test->retval_unpriv : test->retval; - - err = do_prog_test_run(fd_prog, unpriv, expected_val, - test->data, sizeof(test->data)); - if (err) - run_errs++; - else - run_successes++; - } + if (!test->runs) + test->runs = 1; for (i = 0; i < test->runs; i++) { if (unpriv && test->retvals[i].retval_unpriv) diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index bcb83196e459..f3c33e128709 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -226,7 +226,7 @@ BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_array_ro = { 3 }, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index c3de1a2c9dc5..a53d99cebd9f 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -183,7 +183,7 @@ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_MOV64_IMM(BPF_REG_3, 0x100000), diff --git a/tools/testing/selftests/bpf/verifier/wide_access.c b/tools/testing/selftests/bpf/verifier/wide_access.c new file mode 100644 index 000000000000..ccade9312d21 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/wide_access.c @@ -0,0 +1,73 @@ +#define BPF_SOCK_ADDR_STORE(field, off, res, err) \ +{ \ + "wide store to bpf_sock_addr." #field "[" #off "]", \ + .insns = { \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, \ + offsetof(struct bpf_sock_addr, field[off])), \ + BPF_EXIT_INSN(), \ + }, \ + .result = res, \ + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \ + .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \ + .errstr = err, \ +} + +/* user_ip6[0] is u64 aligned */ +BPF_SOCK_ADDR_STORE(user_ip6, 0, ACCEPT, + NULL), +BPF_SOCK_ADDR_STORE(user_ip6, 1, REJECT, + "invalid bpf_context access off=12 size=8"), +BPF_SOCK_ADDR_STORE(user_ip6, 2, ACCEPT, + NULL), +BPF_SOCK_ADDR_STORE(user_ip6, 3, REJECT, + "invalid bpf_context access off=20 size=8"), + +/* msg_src_ip6[0] is _not_ u64 aligned */ +BPF_SOCK_ADDR_STORE(msg_src_ip6, 0, REJECT, + "invalid bpf_context access off=44 size=8"), +BPF_SOCK_ADDR_STORE(msg_src_ip6, 1, ACCEPT, + NULL), +BPF_SOCK_ADDR_STORE(msg_src_ip6, 2, REJECT, + "invalid bpf_context access off=52 size=8"), +BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT, + "invalid bpf_context access off=56 size=8"), + +#undef BPF_SOCK_ADDR_STORE + +#define BPF_SOCK_ADDR_LOAD(field, off, res, err) \ +{ \ + "wide load from bpf_sock_addr." #field "[" #off "]", \ + .insns = { \ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, \ + offsetof(struct bpf_sock_addr, field[off])), \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_EXIT_INSN(), \ + }, \ + .result = res, \ + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \ + .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \ + .errstr = err, \ +} + +/* user_ip6[0] is u64 aligned */ +BPF_SOCK_ADDR_LOAD(user_ip6, 0, ACCEPT, + NULL), +BPF_SOCK_ADDR_LOAD(user_ip6, 1, REJECT, + "invalid bpf_context access off=12 size=8"), +BPF_SOCK_ADDR_LOAD(user_ip6, 2, ACCEPT, + NULL), +BPF_SOCK_ADDR_LOAD(user_ip6, 3, REJECT, + "invalid bpf_context access off=20 size=8"), + +/* msg_src_ip6[0] is _not_ u64 aligned */ +BPF_SOCK_ADDR_LOAD(msg_src_ip6, 0, REJECT, + "invalid bpf_context access off=44 size=8"), +BPF_SOCK_ADDR_LOAD(msg_src_ip6, 1, ACCEPT, + NULL), +BPF_SOCK_ADDR_LOAD(msg_src_ip6, 2, REJECT, + "invalid bpf_context access off=52 size=8"), +BPF_SOCK_ADDR_LOAD(msg_src_ip6, 3, REJECT, + "invalid bpf_context access off=56 size=8"), + +#undef BPF_SOCK_ADDR_LOAD diff --git a/tools/testing/selftests/bpf/verifier/wide_store.c b/tools/testing/selftests/bpf/verifier/wide_store.c deleted file mode 100644 index 8fe99602ded4..000000000000 --- a/tools/testing/selftests/bpf/verifier/wide_store.c +++ /dev/null @@ -1,36 +0,0 @@ -#define BPF_SOCK_ADDR(field, off, res, err) \ -{ \ - "wide store to bpf_sock_addr." #field "[" #off "]", \ - .insns = { \ - BPF_MOV64_IMM(BPF_REG_0, 1), \ - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, \ - offsetof(struct bpf_sock_addr, field[off])), \ - BPF_EXIT_INSN(), \ - }, \ - .result = res, \ - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \ - .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \ - .errstr = err, \ -} - -/* user_ip6[0] is u64 aligned */ -BPF_SOCK_ADDR(user_ip6, 0, ACCEPT, - NULL), -BPF_SOCK_ADDR(user_ip6, 1, REJECT, - "invalid bpf_context access off=12 size=8"), -BPF_SOCK_ADDR(user_ip6, 2, ACCEPT, - NULL), -BPF_SOCK_ADDR(user_ip6, 3, REJECT, - "invalid bpf_context access off=20 size=8"), - -/* msg_src_ip6[0] is _not_ u64 aligned */ -BPF_SOCK_ADDR(msg_src_ip6, 0, REJECT, - "invalid bpf_context access off=44 size=8"), -BPF_SOCK_ADDR(msg_src_ip6, 1, ACCEPT, - NULL), -BPF_SOCK_ADDR(msg_src_ip6, 2, REJECT, - "invalid bpf_context access off=52 size=8"), -BPF_SOCK_ADDR(msg_src_ip6, 3, REJECT, - "invalid bpf_context access off=56 size=8"), - -#undef BPF_SOCK_ADDR diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 6d5e9e87c4b7..063ecb290a5a 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -23,9 +23,15 @@ echo " If <dir> is -, all logs output in console only" exit $1 } +# default error +err_ret=1 + +# kselftest skip code is 4 +err_skip=4 + errexit() { # message echo "Error: $1" 1>&2 - exit 1 + exit $err_ret } # Ensuring user privilege @@ -116,11 +122,31 @@ parse_opts() { # opts } # Parameters -DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1` -if [ -z "$DEBUGFS_DIR" ]; then - TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1` -else - TRACING_DIR=$DEBUGFS_DIR/tracing +TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1` +if [ -z "$TRACING_DIR" ]; then + DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1` + if [ -z "$DEBUGFS_DIR" ]; then + # If tracefs exists, then so does /sys/kernel/tracing + if [ -d "/sys/kernel/tracing" ]; then + mount -t tracefs nodev /sys/kernel/tracing || + errexit "Failed to mount /sys/kernel/tracing" + TRACING_DIR="/sys/kernel/tracing" + # If debugfs exists, then so does /sys/kernel/debug + elif [ -d "/sys/kernel/debug" ]; then + mount -t debugfs nodev /sys/kernel/debug || + errexit "Failed to mount /sys/kernel/debug" + TRACING_DIR="/sys/kernel/debug/tracing" + else + err_ret=$err_skip + errexit "debugfs and tracefs are not configured in this kernel" + fi + else + TRACING_DIR="$DEBUGFS_DIR/tracing" + fi +fi +if [ ! -d "$TRACING_DIR" ]; then + err_ret=$err_skip + errexit "ftrace is not configured in this kernel" fi TOP_DIR=`absdir $0` diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 779ec11f61bd..1d96c5f7e402 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -91,8 +91,8 @@ initialize_ftrace() { # Reset ftrace to initial-state reset_events_filter reset_ftrace_filter disable_events - echo > set_event_pid # event tracer is always on - echo > set_ftrace_pid + [ -f set_event_pid ] && echo > set_event_pid + [ -f set_ftrace_pid ] && echo > set_ftrace_pid [ -f set_ftrace_filter ] && echo | tee set_ftrace_* [ -f set_graph_function ] && echo | tee set_graph_* [ -f stack_trace_filter ] && echo > stack_trace_filter diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc new file mode 100644 index 000000000000..0f60087583d8 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event user-memory access + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +grep -q '\$arg<N>' README || exit_unresolved # depends on arch +grep -A10 "fetcharg:" README | grep -q 'ustring' || exit_unsupported +grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported + +:;: "user-memory access syntax and ustring working on user memory";: +echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \ + > kprobe_events + +grep myevent kprobe_events | \ + grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string' +echo 1 > events/kprobes/myevent/enable +echo > /dev/null +echo 0 > events/kprobes/myevent/enable + +grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"' + +:;: "user-memory access syntax and ustring not working with kernel memory";: +echo 'p:myevent vfs_symlink path=+0($arg3):ustring path2=+u0($arg3):string' \ + > kprobe_events +echo 1 > events/kprobes/myevent/enable +ln -s foo $TMPDIR/bar +echo 0 > events/kprobes/myevent/enable + +grep myevent trace | grep -q 'path=(fault) path2=(fault)' + +exit 0 diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 62afd0b43074..ba7849751989 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -10,11 +10,11 @@ UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c LIBKVM_aarch64 = lib/aarch64/processor.c +LIBKVM_s390x = lib/s390x/processor.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid -TEST_GEN_PROGS_x86_64 += x86_64/kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test @@ -26,9 +26,14 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_test +TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus + +TEST_GEN_PROGS_s390x += s390x/sync_regs_test +TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) @@ -43,7 +48,12 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) -LDFLAGS += -pthread $(no-pie-option) +# On s390, build the testcases KVM-enabled +pgste-option = $(call try-run, echo 'int main() { return 0; }' | \ + $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) + + +LDFLAGS += -pthread $(no-pie-option) $(pgste-option) # After inclusion, $(OUTPUT) is defined and # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 00235f5932f0..e0e66b115ef2 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -41,6 +41,12 @@ enum vm_guest_mode { NUM_VM_MODES, }; +#ifdef __aarch64__ +#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#else +#define VM_MODE_DEFAULT VM_MODE_P52V48_4K +#endif + #define vm_guest_mode_string(m) vm_guest_mode_string[m] extern const char * const vm_guest_mode_string[]; @@ -111,10 +117,12 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs); int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs); +#ifdef __KVM_HAVE_VCPU_EVENTS void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); +#endif #ifdef __x86_64__ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state); diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h new file mode 100644 index 000000000000..e0e96a5f608c --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/processor.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * s390x processor specific defines + */ +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +/* Bits in the region/segment table entry */ +#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */ +#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */ +#define REGION_ENTRY_OFFSET 0xc0 /* region table offset */ +#define REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ +#define REGION_ENTRY_TYPE 0x0c /* region/segment table type mask */ +#define REGION_ENTRY_LENGTH 0x03 /* region third length */ + +/* Bits in the page table entry */ +#define PAGE_INVALID 0x400 /* HW invalid bit */ +#define PAGE_PROTECT 0x200 /* HW read-only bit */ +#define PAGE_NOEXEC 0x100 /* HW no-execute bit */ + +#endif diff --git a/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 429226bc6a92..231d79e57774 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -27,7 +27,7 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus) printf("Testing creating %d vCPUs, with IDs %d...%d.\n", num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1); - vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); for (i = 0; i < num_vcpus; i++) { int vcpu_id = first_vcpu_id + i; diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index af2023d818a5..486400a97374 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -227,7 +227,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; struct kvm_vm *vm; - vm = vm_create(VM_MODE_P40V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); vm_vcpu_add_default(vm, vcpuid, guest_code); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 221e3fa46680..6e49bb039376 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -556,6 +556,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, int ret; struct userspace_mem_region *region; size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; + size_t alignment; TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " "address not on a page boundary.\n" @@ -605,9 +606,20 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, TEST_ASSERT(region != NULL, "Insufficient Memory"); region->mmap_size = npages * vm->page_size; - /* Enough memory to align up to a huge page. */ +#ifdef __s390x__ + /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ + alignment = 0x100000; +#else + alignment = 1; +#endif + if (src_type == VM_MEM_SRC_ANONYMOUS_THP) - region->mmap_size += huge_page_size; + alignment = max(huge_page_size, alignment); + + /* Add enough memory to align up if necessary */ + if (alignment > 1) + region->mmap_size += alignment; + region->mmap_start = mmap(NULL, region->mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS @@ -617,9 +629,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "test_malloc failed, mmap_start: %p errno: %i", region->mmap_start, errno); - /* Align THP allocation up to start of a huge page. */ - region->host_mem = align(region->mmap_start, - src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1); + /* Align host address */ + region->host_mem = align(region->mmap_start, alignment); /* As needed perform madvise */ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { @@ -1218,6 +1229,7 @@ void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) ret, errno); } +#ifdef __KVM_HAVE_VCPU_EVENTS void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events) { @@ -1243,6 +1255,7 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", ret, errno); } +#endif #ifdef __x86_64__ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c new file mode 100644 index 000000000000..32a02360b1eb --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KVM selftest s390x library code - CPU-related functions (page tables...) + * + * Copyright (C) 2019, Red Hat, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "processor.h" +#include "kvm_util.h" +#include "../kvm_util_internal.h" + +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +#define PAGES_PER_REGION 4 + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot) +{ + vm_paddr_t paddr; + + TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + vm->page_size); + + if (vm->pgd_created) + return; + + paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot); + memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); + + vm->pgd = paddr; + vm->pgd_created = true; +} + +/* + * Allocate 4 pages for a region/segment table (ri < 4), or one page for + * a page table (ri == 4). Returns a suitable region/segment table entry + * which points to the freshly allocated pages. + */ +static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot) +{ + uint64_t taddr; + + taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot); + memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size); + + return (taddr & REGION_ENTRY_ORIGIN) + | (((4 - ri) << 2) & REGION_ENTRY_TYPE) + | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); +} + +/* + * VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * gva - VM Virtual Address + * gpa - VM Physical Address + * memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a virtual translation for the page + * starting at vaddr to the page starting at paddr. + */ +void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa, + uint32_t memslot) +{ + int ri, idx; + uint64_t *entry; + + TEST_ASSERT((gva % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", + gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (gva >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", + gva); + TEST_ASSERT((gpa % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + gva, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gva, vm->max_gfn, vm->page_size); + + /* Walk through region and segment tables */ + entry = addr_gpa2hva(vm, vm->pgd); + for (ri = 1; ri <= 4; ri++) { + idx = (gva >> (64 - 11 * ri)) & 0x7ffu; + if (entry[idx] & REGION_ENTRY_INVALID) + entry[idx] = virt_alloc_region(vm, ri, memslot); + entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); + } + + /* Fill in page table entry */ + idx = (gva >> 12) & 0x0ffu; /* page index */ + if (!(entry[idx] & PAGE_INVALID)) + fprintf(stderr, + "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); + entry[idx] = gpa; +} + +/* + * Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gpa - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Translates the VM virtual address given by gva to a VM physical + * address and then locates the memory region containing the VM + * physical address, within the VM given by vm. When found, the host + * virtual address providing the memory to the vm physical address is + * returned. + * A TEST_ASSERT failure occurs if no region containing translated + * VM virtual address exists. + */ +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + int ri, idx; + uint64_t *entry; + + TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + vm->page_size); + + entry = addr_gpa2hva(vm, vm->pgd); + for (ri = 1; ri <= 4; ri++) { + idx = (gva >> (64 - 11 * ri)) & 0x7ffu; + TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID), + "No region mapping for vm virtual address 0x%lx", + gva); + entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); + } + + idx = (gva >> 12) & 0x0ffu; /* page index */ + + TEST_ASSERT(!(entry[idx] & PAGE_INVALID), + "No page mapping for vm virtual address 0x%lx", gva); + + return (entry[idx] & ~0xffful) + (gva & 0xffful); +} + +static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, + uint64_t ptea_start) +{ + uint64_t *pte, ptea; + + for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) { + pte = addr_gpa2hva(vm, ptea); + if (*pte & PAGE_INVALID) + continue; + fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n", + indent, "", ptea, *pte); + } +} + +static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, + uint64_t reg_tab_addr) +{ + uint64_t addr, *entry; + + for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) { + entry = addr_gpa2hva(vm, addr); + if (*entry & REGION_ENTRY_INVALID) + continue; + fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n", + indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2), + addr, *entry); + if (*entry & REGION_ENTRY_TYPE) { + virt_dump_region(stream, vm, indent + 2, + *entry & REGION_ENTRY_ORIGIN); + } else { + virt_dump_ptes(stream, vm, indent + 2, + *entry & REGION_ENTRY_ORIGIN); + } + } +} + +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + if (!vm->pgd_created) + return; + + virt_dump_region(stream, vm, indent, vm->pgd); +} + +/* + * Create a VM with reasonable defaults + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * extra_mem_pages - The size of extra memories to add (this will + * decide how much extra space we will need to + * setup the page tables using mem slot 0) + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + /* + * The additional amount of pages required for the page tables is: + * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ... + * which is definitely smaller than (n / 256) * 2. + */ + uint64_t extra_pg_pages = extra_mem_pages / 256 * 2; + struct kvm_vm *vm; + + vm = vm_create(VM_MODE_DEFAULT, + DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} + +/* + * Adds a vCPU with reasonable defaults (i.e. a stack and initial PSW) + * + * Input Args: + * vcpuid - The id of the VCPU to add to the VM. + * guest_code - The vCPU's entry point + */ +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); + uint64_t stack_vaddr; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_run *run; + + TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + vm->page_size); + + stack_vaddr = vm_vaddr_alloc(vm, stack_size, + DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); + + vm_vcpu_add(vm, vcpuid); + + /* Setup guest registers */ + vcpu_regs_get(vm, vcpuid, ®s); + regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160; + vcpu_regs_set(vm, vcpuid, ®s); + + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ + sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ + vcpu_sregs_set(vm, vcpuid, &sregs); + + run = vcpu_state(vm, vcpuid); + run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ + run->psw_addr = (uintptr_t)guest_code; +} + +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +{ + struct vcpu *vcpu = vm->vcpu_head; + + fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", + indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index b430f962e323..6cb34a0fa200 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -821,7 +821,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; /* Create VM */ - vm = vm_create(VM_MODE_P52V48_4K, + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index fe56d159d65f..204f847bd065 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -5,8 +5,6 @@ * Copyright (C) 2018, Google LLC. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include "test_util.h" #include "kvm_util.h" #include "processor.h" diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c new file mode 100644 index 000000000000..e85ff0d69548 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for s390x KVM_CAP_SYNC_REGS + * + * Based on the same test for x86: + * Copyright (C) 2018, Google LLC. + * + * Adaptions for s390x: + * Copyright (C) 2019, Red Hat, Inc. + * + * Test expected behavior of the KVM_CAP_SYNC_REGS functionality. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" + +#define VCPU_ID 5 + +static void guest_code(void) +{ + for (;;) { + asm volatile ("diag 0,0,0x501"); + asm volatile ("ahi 11,1"); + } +} + +#define REG_COMPARE(reg) \ + TEST_ASSERT(left->reg == right->reg, \ + "Register " #reg \ + " values did not match: 0x%llx, 0x%llx\n", \ + left->reg, right->reg) + +static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right) +{ + int i; + + for (i = 0; i < 16; i++) + REG_COMPARE(gprs[i]); +} + +static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right) +{ + int i; + + for (i = 0; i < 16; i++) + REG_COMPARE(acrs[i]); + + for (i = 0; i < 16; i++) + REG_COMPARE(crs[i]); +} + +#undef REG_COMPARE + +#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS) +#define INVALID_SYNC_FIELD 0x80000000 + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_regs regs; + struct kvm_sregs sregs; + int rv, cap; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + cap = kvm_check_cap(KVM_CAP_SYNC_REGS); + if (!cap) { + fprintf(stderr, "CAP_SYNC_REGS not supported, skipping test\n"); + exit(KSFT_SKIP); + } + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + run = vcpu_state(vm, VCPU_ID); + + /* Request and verify all valid register sets. */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, + "Unexpected exit reason: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s390_sieic.icptcode == 4 && + (run->s390_sieic.ipa >> 8) == 0x83 && + (run->s390_sieic.ipb >> 16) == 0x501, + "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n", + run->s390_sieic.icptcode, run->s390_sieic.ipa, + run->s390_sieic.ipb); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs); + + /* Set and verify various register values */ + run->s.regs.gprs[11] = 0xBAD1DEA; + run->s.regs.acrs[0] = 1 << 11; + + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, + "Unexpected exit reason: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.gprs[11]); + TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11, + "acr0 sync regs value incorrect 0x%llx.", + run->s.regs.acrs[0]); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs); + + /* Clear kvm_dirty_regs bits, verify new s.regs values are + * overwritten with existing guest values. + */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = 0; + run->s.regs.gprs[11] = 0xDEADBEEF; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, + "Unexpected exit reason: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.gprs[11]); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 9457aaeae092..4465fc2dae14 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,12 +9,13 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw" +TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no IP="ip -netns ns1" +NS_EXEC="ip netns exec ns1" log_test() { @@ -433,6 +434,37 @@ fib_carrier_test() fib_carrier_unicast_test } +fib_rp_filter_test() +{ + echo + echo "IPv4 rp_filter tests" + + setup + + set -e + $IP link set dev lo address 52:54:00:6a:c7:5e + $IP link set dummy0 address 52:54:00:6a:c7:5e + $IP link add dummy1 type dummy + $IP link set dummy1 address 52:54:00:6a:c7:5e + $IP link set dev dummy1 up + $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 + $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 + $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 + + $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel + $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo + $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo + set +e + + run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1" + log_test $? 0 "rp_filter passes local packets" + + run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1" + log_test $? 0 "rp_filter passes loopback packets" + + cleanup +} + ################################################################################ # Tests on nexthop spec @@ -1557,6 +1589,7 @@ do fib_unreg_test|unregister) fib_unreg_test;; fib_down_test|down) fib_down_test;; fib_carrier_test|carrier) fib_carrier_test;; + fib_rp_filter_test|rp_filter) fib_rp_filter_test;; fib_nexthop_test|nexthop) fib_nexthop_test;; ipv6_route_test|ipv6_rt) ipv6_route_test;; ipv4_route_test|ipv4_rt) ipv4_route_test;; diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 444ad39d3700..66fab4c58ed4 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -12,4 +12,5 @@ /read /self /setns-dcache +/setns-sysvipc /thread-self diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 9f09fcd09ea3..a8ed0f684829 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -17,6 +17,7 @@ TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read TEST_GEN_PROGS += self TEST_GEN_PROGS += setns-dcache +TEST_GEN_PROGS += setns-sysvipc TEST_GEN_PROGS += thread-self include ../lib.mk diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 853aa164a401..18a3bde8bc96 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -215,6 +215,11 @@ static const char str_vsyscall[] = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; #ifdef __x86_64__ +static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) +{ + _exit(1); +} + /* * vsyscall page can't be unmapped, probe it with memory load. */ @@ -231,11 +236,19 @@ static void vsyscall(void) if (pid == 0) { struct rlimit rlim = {0, 0}; (void)setrlimit(RLIMIT_CORE, &rlim); + + /* Hide "segfault at ffffffffff600000" messages. */ + struct sigaction act; + memset(&act, 0, sizeof(struct sigaction)); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = sigaction_SIGSEGV; + (void)sigaction(SIGSEGV, &act, NULL); + *(volatile int *)0xffffffffff600000UL; exit(0); } - wait(&wstatus); - if (WIFEXITED(wstatus)) { + waitpid(pid, &wstatus, 0); + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { g_vsyscall = true; } } diff --git a/tools/testing/selftests/proc/setns-sysvipc.c b/tools/testing/selftests/proc/setns-sysvipc.c new file mode 100644 index 000000000000..903890c5e587 --- /dev/null +++ b/tools/testing/selftests/proc/setns-sysvipc.c @@ -0,0 +1,133 @@ +/* + * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that setns(CLONE_NEWIPC) points to new /proc/sysvipc content even + * if old one is in dcache. + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <sched.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/ipc.h> +#include <sys/shm.h> + +static pid_t pid = -1; + +static void f(void) +{ + if (pid > 0) { + kill(pid, SIGTERM); + } +} + +int main(void) +{ + int fd[2]; + char _ = 0; + int nsfd; + + atexit(f); + + /* Check for priviledges and syscall availability straight away. */ + if (unshare(CLONE_NEWIPC) == -1) { + if (errno == ENOSYS || errno == EPERM) { + return 4; + } + return 1; + } + /* Distinguisher between two otherwise empty IPC namespaces. */ + if (shmget(IPC_PRIVATE, 1, IPC_CREAT) == -1) { + return 1; + } + + if (pipe(fd) == -1) { + return 1; + } + + pid = fork(); + if (pid == -1) { + return 1; + } + + if (pid == 0) { + if (unshare(CLONE_NEWIPC) == -1) { + return 1; + } + + if (write(fd[1], &_, 1) != 1) { + return 1; + } + + pause(); + + return 0; + } + + if (read(fd[0], &_, 1) != 1) { + return 1; + } + + { + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%u/ns/ipc", pid); + nsfd = open(buf, O_RDONLY); + if (nsfd == -1) { + return 1; + } + } + + /* Reliably pin dentry into dcache. */ + (void)open("/proc/sysvipc/shm", O_RDONLY); + + if (setns(nsfd, CLONE_NEWIPC) == -1) { + return 1; + } + + kill(pid, SIGTERM); + pid = 0; + + { + char buf[4096]; + ssize_t rv; + int fd; + + fd = open("/proc/sysvipc/shm", O_RDONLY); + if (fd == -1) { + return 1; + } + +#define S32 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n" +#define S64 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n" + rv = read(fd, buf, sizeof(buf)); + if (rv == strlen(S32)) { + assert(memcmp(buf, S32, strlen(S32)) == 0); + } else if (rv == strlen(S64)) { + assert(memcmp(buf, S64, strlen(S64)) == 0); + } else { + assert(0); + } + } + + return 0; +} diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore index b3e59d41fd82..cfcc49a7def7 100644 --- a/tools/testing/selftests/ptrace/.gitignore +++ b/tools/testing/selftests/ptrace/.gitignore @@ -1 +1,2 @@ +get_syscall_info peeksiginfo diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index cb21c76a18ca..c0b7f89f0930 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -iquote../../../../include/uapi -Wall -TEST_GEN_PROGS := peeksiginfo +TEST_GEN_PROGS := get_syscall_info peeksiginfo include ../lib.mk diff --git a/tools/testing/selftests/ptrace/get_syscall_info.c b/tools/testing/selftests/ptrace/get_syscall_info.c new file mode 100644 index 000000000000..5bcd1c7b5be6 --- /dev/null +++ b/tools/testing/selftests/ptrace/get_syscall_info.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2018 Dmitry V. Levin <ldv@altlinux.org> + * All rights reserved. + * + * Check whether PTRACE_GET_SYSCALL_INFO semantics implemented in the kernel + * matches userspace expectations. + */ + +#include "../kselftest_harness.h" +#include <err.h> +#include <signal.h> +#include <asm/unistd.h> +#include "linux/ptrace.h" + +static int +kill_tracee(pid_t pid) +{ + if (!pid) + return 0; + + int saved_errno = errno; + + int rc = kill(pid, SIGKILL); + + errno = saved_errno; + return rc; +} + +static long +sys_ptrace(int request, pid_t pid, unsigned long addr, unsigned long data) +{ + return syscall(__NR_ptrace, request, pid, addr, data); +} + +#define LOG_KILL_TRACEE(fmt, ...) \ + do { \ + kill_tracee(pid); \ + TH_LOG("wait #%d: " fmt, \ + ptrace_stop, ##__VA_ARGS__); \ + } while (0) + +TEST(get_syscall_info) +{ + static const unsigned long args[][7] = { + /* a sequence of architecture-agnostic syscalls */ + { + __NR_chdir, + (unsigned long) "", + 0xbad1fed1, + 0xbad2fed2, + 0xbad3fed3, + 0xbad4fed4, + 0xbad5fed5 + }, + { + __NR_gettid, + 0xcaf0bea0, + 0xcaf1bea1, + 0xcaf2bea2, + 0xcaf3bea3, + 0xcaf4bea4, + 0xcaf5bea5 + }, + { + __NR_exit_group, + 0, + 0xfac1c0d1, + 0xfac2c0d2, + 0xfac3c0d3, + 0xfac4c0d4, + 0xfac5c0d5 + } + }; + const unsigned long *exp_args; + + pid_t pid = fork(); + + ASSERT_LE(0, pid) { + TH_LOG("fork: %m"); + } + + if (pid == 0) { + /* get the pid before PTRACE_TRACEME */ + pid = getpid(); + ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) { + TH_LOG("PTRACE_TRACEME: %m"); + } + ASSERT_EQ(0, kill(pid, SIGSTOP)) { + /* cannot happen */ + TH_LOG("kill SIGSTOP: %m"); + } + for (unsigned int i = 0; i < ARRAY_SIZE(args); ++i) { + syscall(args[i][0], + args[i][1], args[i][2], args[i][3], + args[i][4], args[i][5], args[i][6]); + } + /* unreachable */ + _exit(1); + } + + const struct { + unsigned int is_error; + int rval; + } *exp_param, exit_param[] = { + { 1, -ENOENT }, /* chdir */ + { 0, pid } /* gettid */ + }; + + unsigned int ptrace_stop; + + for (ptrace_stop = 0; ; ++ptrace_stop) { + struct ptrace_syscall_info info = { + .op = 0xff /* invalid PTRACE_SYSCALL_INFO_* op */ + }; + const size_t size = sizeof(info); + const int expected_none_size = + (void *) &info.entry - (void *) &info; + const int expected_entry_size = + (void *) &info.entry.args[6] - (void *) &info; + const int expected_exit_size = + (void *) (&info.exit.is_error + 1) - + (void *) &info; + int status; + long rc; + + ASSERT_EQ(pid, wait(&status)) { + /* cannot happen */ + LOG_KILL_TRACEE("wait: %m"); + } + if (WIFEXITED(status)) { + pid = 0; /* the tracee is no more */ + ASSERT_EQ(0, WEXITSTATUS(status)); + break; + } + ASSERT_FALSE(WIFSIGNALED(status)) { + pid = 0; /* the tracee is no more */ + LOG_KILL_TRACEE("unexpected signal %u", + WTERMSIG(status)); + } + ASSERT_TRUE(WIFSTOPPED(status)) { + /* cannot happen */ + LOG_KILL_TRACEE("unexpected wait status %#x", status); + } + + switch (WSTOPSIG(status)) { + case SIGSTOP: + ASSERT_EQ(0, ptrace_stop) { + LOG_KILL_TRACEE("unexpected signal stop"); + } + ASSERT_EQ(0, sys_ptrace(PTRACE_SETOPTIONS, pid, 0, + PTRACE_O_TRACESYSGOOD)) { + LOG_KILL_TRACEE("PTRACE_SETOPTIONS: %m"); + } + ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO, + pid, size, + (unsigned long) &info))) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO: %m"); + } + ASSERT_EQ(expected_none_size, rc) { + LOG_KILL_TRACEE("signal stop mismatch"); + } + ASSERT_EQ(PTRACE_SYSCALL_INFO_NONE, info.op) { + LOG_KILL_TRACEE("signal stop mismatch"); + } + ASSERT_TRUE(info.arch) { + LOG_KILL_TRACEE("signal stop mismatch"); + } + ASSERT_TRUE(info.instruction_pointer) { + LOG_KILL_TRACEE("signal stop mismatch"); + } + ASSERT_TRUE(info.stack_pointer) { + LOG_KILL_TRACEE("signal stop mismatch"); + } + break; + + case SIGTRAP | 0x80: + ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO, + pid, size, + (unsigned long) &info))) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO: %m"); + } + switch (ptrace_stop) { + case 1: /* entering chdir */ + case 3: /* entering gettid */ + case 5: /* entering exit_group */ + exp_args = args[ptrace_stop / 2]; + ASSERT_EQ(expected_entry_size, rc) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(PTRACE_SYSCALL_INFO_ENTRY, info.op) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_TRUE(info.arch) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_TRUE(info.instruction_pointer) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_TRUE(info.stack_pointer) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(exp_args[0], info.entry.nr) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(exp_args[1], info.entry.args[0]) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(exp_args[2], info.entry.args[1]) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(exp_args[3], info.entry.args[2]) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(exp_args[4], info.entry.args[3]) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(exp_args[5], info.entry.args[4]) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(exp_args[6], info.entry.args[5]) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + break; + case 2: /* exiting chdir */ + case 4: /* exiting gettid */ + exp_param = &exit_param[ptrace_stop / 2 - 1]; + ASSERT_EQ(expected_exit_size, rc) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_EQ(PTRACE_SYSCALL_INFO_EXIT, info.op) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_TRUE(info.arch) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_TRUE(info.instruction_pointer) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_TRUE(info.stack_pointer) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_EQ(exp_param->is_error, + info.exit.is_error) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_EQ(exp_param->rval, info.exit.rval) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + break; + default: + LOG_KILL_TRACEE("unexpected syscall stop"); + abort(); + } + break; + + default: + LOG_KILL_TRACEE("unexpected stop signal %#x", + WSTOPSIG(status)); + abort(); + } + + ASSERT_EQ(0, sys_ptrace(PTRACE_SYSCALL, pid, 0, 0)) { + LOG_KILL_TRACEE("PTRACE_SYSCALL: %m"); + } + } + + ASSERT_EQ(ARRAY_SIZE(args) * 2, ptrace_stop); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c index 892c8e8b1b8b..8f40c6ecdad1 100644 --- a/tools/testing/selftests/safesetid/safesetid-test.c +++ b/tools/testing/selftests/safesetid/safesetid-test.c @@ -142,23 +142,19 @@ static void ensure_securityfs_mounted(void) static void write_policies(void) { + static char *policy_str = + "1:2\n" + "1:3\n" + "2:2\n" + "3:3\n"; ssize_t written; int fd; fd = open(add_whitelist_policy_file, O_WRONLY); if (fd < 0) die("cant open add_whitelist_policy file\n"); - written = write(fd, "1:2", strlen("1:2")); - if (written != strlen("1:2")) { - if (written >= 0) { - die("short write to %s\n", add_whitelist_policy_file); - } else { - die("write to %s failed: %s\n", - add_whitelist_policy_file, strerror(errno)); - } - } - written = write(fd, "1:3", strlen("1:3")); - if (written != strlen("1:3")) { + written = write(fd, policy_str, strlen(policy_str)); + if (written != strlen(policy_str)) { if (written >= 0) { die("short write to %s\n", add_whitelist_policy_file); } else { diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index dc66fe852768..6ef7f16c4cf5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1775,13 +1775,18 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, unsigned long msg; static bool entry; - /* Make sure we got an empty message. */ + /* + * The traditional way to tell PTRACE_SYSCALL entry/exit + * is by counting. + */ + entry = !entry; + + /* Make sure we got an appropriate message. */ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); EXPECT_EQ(0, ret); - EXPECT_EQ(0, msg); + EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY + : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); - /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */ - entry = !entry; if (!entry) return; diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README index 7972cc512408..110b34834a6f 100644 --- a/tools/testing/selftests/zram/README +++ b/tools/testing/selftests/zram/README @@ -37,4 +37,4 @@ Commands required for testing: - mkfs/ mkfs.ext4 For more information please refer: -kernel-source-tree/Documentation/blockdev/zram.txt +kernel-source-tree/Documentation/admin-guide/blockdev/zram.rst |