diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/bpf_trace.c | 129 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 40 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 18 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 13 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 5 |
6 files changed, 178 insertions, 37 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3e4ffb3ace5f..780bcbe1d4de 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -62,17 +62,21 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { void *dst = (void *) (long) r1; - int size = (int) r2; + int ret, size = (int) r2; void *unsafe_ptr = (void *) (long) r3; - return probe_kernel_read(dst, unsafe_ptr, size); + ret = probe_kernel_read(dst, unsafe_ptr, size); + if (unlikely(ret < 0)) + memset(dst, 0, size); + + return ret; } static const struct bpf_func_proto bpf_probe_read_proto = { .func = bpf_probe_read, .gpl_only = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_STACK, + .arg1_type = ARG_PTR_TO_RAW_STACK, .arg2_type = ARG_CONST_STACK_SIZE, .arg3_type = ARG_ANYTHING, }; @@ -221,11 +225,12 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { .arg2_type = ARG_ANYTHING, }; -static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) { struct pt_regs *regs = (struct pt_regs *) (long) r1; struct bpf_map *map = (struct bpf_map *) (long) r2; struct bpf_array *array = container_of(map, struct bpf_array, map); + u64 index = flags & BPF_F_INDEX_MASK; void *data = (void *) (long) r4; struct perf_sample_data sample_data; struct perf_event *event; @@ -235,6 +240,10 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) .data = data, }; + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) + return -EINVAL; + if (index == BPF_F_CURRENT_CPU) + index = raw_smp_processor_id(); if (unlikely(index >= array->map.max_entries)) return -E2BIG; @@ -268,7 +277,34 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { .arg5_type = ARG_CONST_STACK_SIZE, }; -static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) +static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); + +static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) +{ + struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); + + perf_fetch_caller_regs(regs); + + return bpf_perf_event_output((long)regs, r2, flags, r4, size); +} + +static const struct bpf_func_proto bpf_event_output_proto = { + .func = bpf_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + +const struct bpf_func_proto *bpf_get_event_output_proto(void) +{ + return &bpf_event_output_proto; +} + +static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -295,12 +331,20 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_smp_processor_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; + default: + return NULL; + } +} + +static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto; default: - return NULL; + return tracing_func_proto(func_id); } } @@ -332,9 +376,82 @@ static struct bpf_prog_type_list kprobe_tl = { .type = BPF_PROG_TYPE_KPROBE, }; +static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +{ + /* + * r1 points to perf tracepoint buffer where first 8 bytes are hidden + * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it + * from there and call the same bpf_perf_event_output() helper + */ + u64 ctx = *(long *)(uintptr_t)r1; + + return bpf_perf_event_output(ctx, r2, index, r4, size); +} + +static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { + .func = bpf_perf_event_output_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + +static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + u64 ctx = *(long *)(uintptr_t)r1; + + return bpf_get_stackid(ctx, r2, r3, r4, r5); +} + +static const struct bpf_func_proto bpf_get_stackid_proto_tp = { + .func = bpf_get_stackid_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_perf_event_output_proto_tp; + case BPF_FUNC_get_stackid: + return &bpf_get_stackid_proto_tp; + default: + return tracing_func_proto(func_id); + } +} + +static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type) +{ + if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + return true; +} + +static const struct bpf_verifier_ops tracepoint_prog_ops = { + .get_func_proto = tp_prog_func_proto, + .is_valid_access = tp_prog_is_valid_access, +}; + +static struct bpf_prog_type_list tracepoint_tl = { + .ops = &tracepoint_prog_ops, + .type = BPF_PROG_TYPE_TRACEPOINT, +}; + static int __init register_kprobe_prog_ops(void) { bpf_register_prog_type(&kprobe_tl); + bpf_register_prog_type(&tracepoint_tl); return 0; } late_initcall(register_kprobe_prog_ops); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 00df25fd86ef..5a927075977f 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -260,42 +260,43 @@ void perf_trace_del(struct perf_event *p_event, int flags) tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); } -void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs **regs, int *rctxp) +void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) { - struct trace_entry *entry; - unsigned long flags; char *raw_data; - int pc; + int rctx; BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "perf buffer not large enough")) + "perf buffer not large enough")) return NULL; - pc = preempt_count(); - - *rctxp = perf_swevent_get_recursion_context(); - if (*rctxp < 0) + *rctxp = rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) return NULL; if (regs) - *regs = this_cpu_ptr(&__perf_regs[*rctxp]); - raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); + *regs = this_cpu_ptr(&__perf_regs[rctx]); + raw_data = this_cpu_ptr(perf_trace_buf[rctx]); /* zero the dead bytes from align to not leak stack to user */ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); + return raw_data; +} +EXPORT_SYMBOL_GPL(perf_trace_buf_alloc); +NOKPROBE_SYMBOL(perf_trace_buf_alloc); + +void perf_trace_buf_update(void *record, u16 type) +{ + struct trace_entry *entry = record; + int pc = preempt_count(); + unsigned long flags; - entry = (struct trace_entry *)raw_data; local_save_flags(flags); tracing_generic_entry_update(entry, flags, pc); entry->type = type; - - return raw_data; } -EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); -NOKPROBE_SYMBOL(perf_trace_buf_prepare); +NOKPROBE_SYMBOL(perf_trace_buf_update); #ifdef CONFIG_FUNCTION_TRACER static void @@ -316,15 +317,16 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE); + memset(®s, 0, sizeof(regs)); perf_fetch_caller_regs(®s); - entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx); + entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx); if (!entry) return; entry->ip = ip; entry->parent_ip = parent_ip; - perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, + perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, 1, ®s, head, NULL); #undef ENTRY_SIZE diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6f965864cc02..b7b0760ba6ee 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -204,6 +204,24 @@ static void trace_destroy_fields(struct trace_event_call *call) } } +/* + * run-time version of trace_event_get_offsets_<call>() that returns the last + * accessible offset of trace fields excluding __dynamic_array bytes + */ +int trace_event_get_offsets(struct trace_event_call *call) +{ + struct ftrace_event_field *tail; + struct list_head *head; + + head = trace_get_fields(call); + /* + * head->next points to the last field with the largest offset, + * since it was added last by trace_define_field() + */ + tail = list_first_entry(head, struct ftrace_event_field, link); + return tail->offset + tail->size; +} + int trace_event_raw_init(struct trace_event_call *call) { int id; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 919e0ddd8fcc..5546eec0505f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1149,14 +1149,15 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); + entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) return; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, + head, NULL); } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1184,14 +1185,15 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); + entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) return; entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, + head, NULL); } NOKPROBE_SYMBOL(kretprobe_perf_func); #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index e78f364cc192..b2b6efc083a4 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -587,15 +587,16 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, - sys_data->enter_event->event.type, NULL, &rctx); + rec = perf_trace_buf_alloc(size, NULL, &rctx); if (!rec) return; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(rec, size, rctx, + sys_data->enter_event->event.type, 1, regs, + head, NULL); } static int perf_sysenter_enable(struct trace_event_call *call) @@ -660,14 +661,14 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, - sys_data->exit_event->event.type, NULL, &rctx); + rec = perf_trace_buf_alloc(size, NULL, &rctx); if (!rec) return; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, + 1, regs, head, NULL); } static int perf_sysexit_enable(struct trace_event_call *call) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7915142c89e4..c53485441c88 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1131,7 +1131,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, if (hlist_empty(head)) goto out; - entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); + entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) goto out; @@ -1152,7 +1152,8 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, memset(data + len, 0, size - esize - len); } - perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, + head, NULL); out: preempt_enable(); } |