diff options
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/core.c | 7 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 306 | ||||
-rw-r--r-- | kernel/bpf/inode.c | 2 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 3 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 84 |
5 files changed, 396 insertions, 6 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f5423251c118..5e31ee9f7512 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1363,11 +1363,10 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * __bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers * @insn: is the array of eBPF instructions - * @stack: is the eBPF storage stack * * Decode and execute eBPF instructions. */ -static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) +static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z @@ -1701,7 +1700,7 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ARG1 = (u64) (unsigned long) ctx; \ - return ___bpf_prog_run(regs, insn, stack); \ + return ___bpf_prog_run(regs, insn); \ } #define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size @@ -1718,7 +1717,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ BPF_R3 = r3; \ BPF_R4 = r4; \ BPF_R5 = r5; \ - return ___bpf_prog_run(regs, insn, stack); \ + return ___bpf_prog_run(regs, insn); \ } #define EVAL1(FN, X) FN(X) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f306611c4ddf..85b26ca5aacd 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -669,6 +669,310 @@ const struct bpf_func_proto bpf_this_cpu_ptr_proto = { .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, }; +static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, + size_t bufsz) +{ + void __user *user_ptr = (__force void __user *)unsafe_ptr; + + buf[0] = 0; + + switch (fmt_ptype) { + case 's': +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + if ((unsigned long)unsafe_ptr < TASK_SIZE) + return strncpy_from_user_nofault(buf, user_ptr, bufsz); + fallthrough; +#endif + case 'k': + return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); + case 'u': + return strncpy_from_user_nofault(buf, user_ptr, bufsz); + } + + return -EINVAL; +} + +/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p + */ +#define MAX_PRINTF_BUF_LEN 512 + +struct bpf_printf_buf { + char tmp_buf[MAX_PRINTF_BUF_LEN]; +}; +static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf); +static DEFINE_PER_CPU(int, bpf_printf_buf_used); + +static int try_get_fmt_tmp_buf(char **tmp_buf) +{ + struct bpf_printf_buf *bufs; + int used; + + if (*tmp_buf) + return 0; + + preempt_disable(); + used = this_cpu_inc_return(bpf_printf_buf_used); + if (WARN_ON_ONCE(used > 1)) { + this_cpu_dec(bpf_printf_buf_used); + preempt_enable(); + return -EBUSY; + } + bufs = this_cpu_ptr(&bpf_printf_buf); + *tmp_buf = bufs->tmp_buf; + + return 0; +} + +void bpf_printf_cleanup(void) +{ + if (this_cpu_read(bpf_printf_buf_used)) { + this_cpu_dec(bpf_printf_buf_used); + preempt_enable(); + } +} + +/* + * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers + * + * Returns a negative value if fmt is an invalid format string or 0 otherwise. + * + * This can be used in two ways: + * - Format string verification only: when final_args and mod are NULL + * - Arguments preparation: in addition to the above verification, it writes in + * final_args a copy of raw_args where pointers from BPF have been sanitized + * into pointers safe to use by snprintf. This also writes in the mod array + * the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex. + * + * In argument preparation mode, if 0 is returned, safe temporary buffers are + * allocated and bpf_printf_cleanup should be called to free them after use. + */ +int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, + u64 *final_args, enum bpf_printf_mod_type *mod, + u32 num_args) +{ + char *unsafe_ptr = NULL, *tmp_buf = NULL, *fmt_end; + size_t tmp_buf_len = MAX_PRINTF_BUF_LEN; + int err, i, num_spec = 0, copy_size; + enum bpf_printf_mod_type cur_mod; + u64 cur_arg; + char fmt_ptype; + + if (!!final_args != !!mod) + return -EINVAL; + + fmt_end = strnchr(fmt, fmt_size, 0); + if (!fmt_end) + return -EINVAL; + fmt_size = fmt_end - fmt; + + for (i = 0; i < fmt_size; i++) { + if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { + err = -EINVAL; + goto cleanup; + } + + if (fmt[i] != '%') + continue; + + if (fmt[i + 1] == '%') { + i++; + continue; + } + + if (num_spec >= num_args) { + err = -EINVAL; + goto cleanup; + } + + /* The string is zero-terminated so if fmt[i] != 0, we can + * always access fmt[i + 1], in the worst case it will be a 0 + */ + i++; + + /* skip optional "[0 +-][num]" width formatting field */ + while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || + fmt[i] == ' ') + i++; + if (fmt[i] >= '1' && fmt[i] <= '9') { + i++; + while (fmt[i] >= '0' && fmt[i] <= '9') + i++; + } + + if (fmt[i] == 'p') { + cur_mod = BPF_PRINTF_LONG; + + if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && + fmt[i + 2] == 's') { + fmt_ptype = fmt[i + 1]; + i += 2; + goto fmt_str; + } + + if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || + ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || + fmt[i + 1] == 'x' || fmt[i + 1] == 'B' || + fmt[i + 1] == 's' || fmt[i + 1] == 'S') { + /* just kernel pointers */ + if (final_args) + cur_arg = raw_args[num_spec]; + goto fmt_next; + } + + /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ + if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') || + (fmt[i + 2] != '4' && fmt[i + 2] != '6')) { + err = -EINVAL; + goto cleanup; + } + + i += 2; + if (!final_args) + goto fmt_next; + + if (try_get_fmt_tmp_buf(&tmp_buf)) { + err = -EBUSY; + goto out; + } + + copy_size = (fmt[i + 2] == '4') ? 4 : 16; + if (tmp_buf_len < copy_size) { + err = -ENOSPC; + goto cleanup; + } + + unsafe_ptr = (char *)(long)raw_args[num_spec]; + err = copy_from_kernel_nofault(tmp_buf, unsafe_ptr, + copy_size); + if (err < 0) + memset(tmp_buf, 0, copy_size); + cur_arg = (u64)(long)tmp_buf; + tmp_buf += copy_size; + tmp_buf_len -= copy_size; + + goto fmt_next; + } else if (fmt[i] == 's') { + cur_mod = BPF_PRINTF_LONG; + fmt_ptype = fmt[i]; +fmt_str: + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) { + err = -EINVAL; + goto cleanup; + } + + if (!final_args) + goto fmt_next; + + if (try_get_fmt_tmp_buf(&tmp_buf)) { + err = -EBUSY; + goto out; + } + + if (!tmp_buf_len) { + err = -ENOSPC; + goto cleanup; + } + + unsafe_ptr = (char *)(long)raw_args[num_spec]; + err = bpf_trace_copy_string(tmp_buf, unsafe_ptr, + fmt_ptype, tmp_buf_len); + if (err < 0) { + tmp_buf[0] = '\0'; + err = 1; + } + + cur_arg = (u64)(long)tmp_buf; + tmp_buf += err; + tmp_buf_len -= err; + + goto fmt_next; + } + + cur_mod = BPF_PRINTF_INT; + + if (fmt[i] == 'l') { + cur_mod = BPF_PRINTF_LONG; + i++; + } + if (fmt[i] == 'l') { + cur_mod = BPF_PRINTF_LONG_LONG; + i++; + } + + if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && + fmt[i] != 'x' && fmt[i] != 'X') { + err = -EINVAL; + goto cleanup; + } + + if (final_args) + cur_arg = raw_args[num_spec]; +fmt_next: + if (final_args) { + mod[num_spec] = cur_mod; + final_args[num_spec] = cur_arg; + } + num_spec++; + } + + err = 0; +cleanup: + if (err) + bpf_printf_cleanup(); +out: + return err; +} + +#define MAX_SNPRINTF_VARARGS 12 + +BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, + const void *, data, u32, data_len) +{ + enum bpf_printf_mod_type mod[MAX_SNPRINTF_VARARGS]; + u64 args[MAX_SNPRINTF_VARARGS]; + int err, num_args; + + if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 || + (data_len && !data)) + return -EINVAL; + num_args = data_len / 8; + + /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we + * can safely give an unbounded size. + */ + err = bpf_printf_prepare(fmt, UINT_MAX, data, args, mod, num_args); + if (err < 0) + return err; + + /* Maximumly we can have MAX_SNPRINTF_VARARGS parameters, just give + * all of them to snprintf(). + */ + err = snprintf(str, str_size, fmt, BPF_CAST_FMT_ARG(0, args, mod), + BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod), + BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod), + BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod), + BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod), + BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod), + BPF_CAST_FMT_ARG(11, args, mod)); + + bpf_printf_cleanup(); + + return err + 1; +} + +const struct bpf_func_proto bpf_snprintf_proto = { + .func = bpf_snprintf, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM_OR_NULL, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_PTR_TO_CONST_STR, + .arg4_type = ARG_PTR_TO_MEM_OR_NULL, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; @@ -757,6 +1061,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_probe_read_kernel_str_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; + case BPF_FUNC_snprintf: + return &bpf_snprintf_proto; default: return NULL; } diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index d2de2abec35b..b4ebd60a6c16 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -816,8 +816,6 @@ static int __init bpf_init(void) { int ret; - mutex_init(&bpf_preload_lock); - ret = sysfs_create_mount_point(fs_kobj, "bpf"); if (ret) return ret; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6428634da57e..fd495190115e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2551,6 +2551,9 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, container_of(link, struct bpf_tracing_link, link); info->tracing.attach_type = tr_link->attach_type; + bpf_trampoline_unpack_key(tr_link->trampoline->key, + &info->tracing.target_obj_id, + &info->tracing.target_btf_id); return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5682a02901d3..637462e9b6ee 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4787,6 +4787,7 @@ static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALU static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; +static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -4817,6 +4818,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, [ARG_PTR_TO_FUNC] = &func_ptr_types, [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, + [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -5067,6 +5069,44 @@ skip_type_check: if (err) return err; err = check_ptr_alignment(env, reg, 0, size, true); + } else if (arg_type == ARG_PTR_TO_CONST_STR) { + struct bpf_map *map = reg->map_ptr; + int map_off; + u64 map_addr; + char *str_ptr; + + if (!bpf_map_is_rdonly(map)) { + verbose(env, "R%d does not point to a readonly map'\n", regno); + return -EACCES; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, "R%d is not a constant address'\n", regno); + return -EACCES; + } + + if (!map->ops->map_direct_value_addr) { + verbose(env, "no direct value access support for this map type\n"); + return -EACCES; + } + + err = check_map_access(env, regno, reg->off, + map->value_size - reg->off, false); + if (err) + return err; + + map_off = reg->off + reg->var_off.value; + err = map->ops->map_direct_value_addr(map, &map_addr, map_off); + if (err) { + verbose(env, "direct value access on string failed\n"); + return err; + } + + str_ptr = (char *)(long)(map_addr); + if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) { + verbose(env, "string is not zero-terminated\n"); + return -EINVAL; + } } return err; @@ -5767,6 +5807,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, if (ret_type != RET_INTEGER || (func_id != BPF_FUNC_get_stack && + func_id != BPF_FUNC_get_task_stack && func_id != BPF_FUNC_probe_read_str && func_id != BPF_FUNC_probe_read_kernel_str && func_id != BPF_FUNC_probe_read_user_str)) @@ -5877,6 +5918,43 @@ static int check_reference_leak(struct bpf_verifier_env *env) return state->acquired_refs ? -EINVAL : 0; } +static int check_bpf_snprintf_call(struct bpf_verifier_env *env, + struct bpf_reg_state *regs) +{ + struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; + struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; + struct bpf_map *fmt_map = fmt_reg->map_ptr; + int err, fmt_map_off, num_args; + u64 fmt_addr; + char *fmt; + + /* data must be an array of u64 */ + if (data_len_reg->var_off.value % 8) + return -EINVAL; + num_args = data_len_reg->var_off.value / 8; + + /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const + * and map_direct_value_addr is set. + */ + fmt_map_off = fmt_reg->off + fmt_reg->var_off.value; + err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr, + fmt_map_off); + if (err) { + verbose(env, "verifier bug\n"); + return -EFAULT; + } + fmt = (char *)(long)fmt_addr + fmt_map_off; + + /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we + * can focus on validating the format specifiers. + */ + err = bpf_printf_prepare(fmt, UINT_MAX, NULL, NULL, NULL, num_args); + if (err < 0) + verbose(env, "Invalid format string\n"); + + return err; +} + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -5991,6 +6069,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } + if (func_id == BPF_FUNC_snprintf) { + err = check_bpf_snprintf_call(env, regs); + if (err < 0) + return err; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); |