diff options
Diffstat (limited to 'include/linux/bpf.h')
-rw-r--r-- | include/linux/bpf.h | 306 |
1 files changed, 203 insertions, 103 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e8e2b0393ca9..f4c16f19f83e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -168,6 +168,7 @@ struct bpf_map { u32 max_entries; u32 map_flags; int spin_lock_off; /* >=0 valid offset, <0 error */ + int timer_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; u32 btf_key_type_id; @@ -197,30 +198,53 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map) return map->spin_lock_off >= 0; } -static inline void check_and_init_map_lock(struct bpf_map *map, void *dst) +static inline bool map_value_has_timer(const struct bpf_map *map) { - if (likely(!map_value_has_spin_lock(map))) - return; - *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = - (struct bpf_spin_lock){}; + return map->timer_off >= 0; } -/* copy everything but bpf_spin_lock */ +static inline void check_and_init_map_value(struct bpf_map *map, void *dst) +{ + if (unlikely(map_value_has_spin_lock(map))) + *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = + (struct bpf_spin_lock){}; + if (unlikely(map_value_has_timer(map))) + *(struct bpf_timer *)(dst + map->timer_off) = + (struct bpf_timer){}; +} + +/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { + u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0; + if (unlikely(map_value_has_spin_lock(map))) { - u32 off = map->spin_lock_off; + s_off = map->spin_lock_off; + s_sz = sizeof(struct bpf_spin_lock); + } else if (unlikely(map_value_has_timer(map))) { + t_off = map->timer_off; + t_sz = sizeof(struct bpf_timer); + } - memcpy(dst, src, off); - memcpy(dst + off + sizeof(struct bpf_spin_lock), - src + off + sizeof(struct bpf_spin_lock), - map->value_size - off - sizeof(struct bpf_spin_lock)); + if (unlikely(s_sz || t_sz)) { + if (s_off < t_off || !s_sz) { + swap(s_off, t_off); + swap(s_sz, t_sz); + } + memcpy(dst, src, t_off); + memcpy(dst + t_off + t_sz, + src + t_off + t_sz, + s_off - t_off - t_sz); + memcpy(dst + s_off + s_sz, + src + s_off + s_sz, + map->value_size - s_off - s_sz); } else { memcpy(dst, src, map->value_size); } } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); +void bpf_timer_cancel_and_free(void *timer); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -314,6 +338,7 @@ enum bpf_arg_type { ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ + ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ __BPF_ARG_TYPE_MAX, }; @@ -554,6 +579,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) +/* Store IP address of the caller on the trampoline stack, + * so it's available for trampoline's programs. + */ +#define BPF_TRAMP_F_IP_ARG BIT(3) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ @@ -1073,7 +1103,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, /* an array of programs to be executed under rcu_lock. * * Typical usage: - * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN); + * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run); * * the structure returned by bpf_prog_array_alloc() should be populated * with program pointers and the last pointer must be NULL. @@ -1084,7 +1114,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, */ struct bpf_prog_array_item { struct bpf_prog *prog; - struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; + union { + struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; + u64 bpf_cookie; + }; }; struct bpf_prog_array { @@ -1110,73 +1143,133 @@ int bpf_prog_array_copy_info(struct bpf_prog_array *array, int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *exclude_prog, struct bpf_prog *include_prog, + u64 bpf_cookie, struct bpf_prog_array **new_array); +struct bpf_run_ctx {}; + +struct bpf_cg_run_ctx { + struct bpf_run_ctx run_ctx; + const struct bpf_prog_array_item *prog_item; +}; + +struct bpf_trace_run_ctx { + struct bpf_run_ctx run_ctx; + u64 bpf_cookie; +}; + +static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) +{ + struct bpf_run_ctx *old_ctx = NULL; + +#ifdef CONFIG_BPF_SYSCALL + old_ctx = current->bpf_ctx; + current->bpf_ctx = new_ctx; +#endif + return old_ctx; +} + +static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) +{ +#ifdef CONFIG_BPF_SYSCALL + current->bpf_ctx = old_ctx; +#endif +} + /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */ #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0) /* BPF program asks to set CN on the packet. */ #define BPF_RET_SET_CN (1 << 0) -/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY, - * if bpf_cgroup_storage_set() failed, the rest of programs - * will not execute. This should be a really rare scenario - * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of - * preemptions all between bpf_cgroup_storage_set() and - * bpf_cgroup_storage_unset() on the same cpu. - */ -#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ - ({ \ - struct bpf_prog_array_item *_item; \ - struct bpf_prog *_prog; \ - struct bpf_prog_array *_array; \ - u32 _ret = 1; \ - u32 func_ret; \ - migrate_disable(); \ - rcu_read_lock(); \ - _array = rcu_dereference(array); \ - _item = &_array->items[0]; \ - while ((_prog = READ_ONCE(_item->prog))) { \ - if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ - break; \ - func_ret = func(_prog, ctx); \ - _ret &= (func_ret & 1); \ - *(ret_flags) |= (func_ret >> 1); \ - bpf_cgroup_storage_unset(); \ - _item++; \ - } \ - rcu_read_unlock(); \ - migrate_enable(); \ - _ret; \ - }) - -#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \ - ({ \ - struct bpf_prog_array_item *_item; \ - struct bpf_prog *_prog; \ - struct bpf_prog_array *_array; \ - u32 _ret = 1; \ - migrate_disable(); \ - rcu_read_lock(); \ - _array = rcu_dereference(array); \ - if (unlikely(check_non_null && !_array))\ - goto _out; \ - _item = &_array->items[0]; \ - while ((_prog = READ_ONCE(_item->prog))) { \ - if (!set_cg_storage) { \ - _ret &= func(_prog, ctx); \ - } else { \ - if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ - break; \ - _ret &= func(_prog, ctx); \ - bpf_cgroup_storage_unset(); \ - } \ - _item++; \ - } \ -_out: \ - rcu_read_unlock(); \ - migrate_enable(); \ - _ret; \ - }) +typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); + +static __always_inline u32 +BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, + const void *ctx, bpf_prog_run_fn run_prog, + u32 *ret_flags) +{ + const struct bpf_prog_array_item *item; + const struct bpf_prog *prog; + const struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + u32 ret = 1; + u32 func_ret; + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(array_rcu); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + while ((prog = READ_ONCE(item->prog))) { + run_ctx.prog_item = item; + func_ret = run_prog(prog, ctx); + ret &= (func_ret & 1); + *(ret_flags) |= (func_ret >> 1); + item++; + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + return ret; +} + +static __always_inline u32 +BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, + const void *ctx, bpf_prog_run_fn run_prog) +{ + const struct bpf_prog_array_item *item; + const struct bpf_prog *prog; + const struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + u32 ret = 1; + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(array_rcu); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + while ((prog = READ_ONCE(item->prog))) { + run_ctx.prog_item = item; + ret &= run_prog(prog, ctx); + item++; + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + return ret; +} + +static __always_inline u32 +BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, + const void *ctx, bpf_prog_run_fn run_prog) +{ + const struct bpf_prog_array_item *item; + const struct bpf_prog *prog; + const struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_trace_run_ctx run_ctx; + u32 ret = 1; + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(array_rcu); + if (unlikely(!array)) + goto out; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + run_ctx.bpf_cookie = item->bpf_cookie; + ret &= run_prog(prog, ctx); + item++; + } + bpf_reset_run_ctx(old_run_ctx); +out: + rcu_read_unlock(); + migrate_enable(); + return ret; +} /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs * so BPF programs can request cwr for TCP packets. @@ -1205,7 +1298,7 @@ _out: \ u32 _flags = 0; \ bool _cn; \ u32 _ret; \ - _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \ + _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \ _cn = _flags & BPF_RET_SET_CN; \ if (_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ @@ -1214,12 +1307,6 @@ _out: \ _ret; \ }) -#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ - __BPF_PROG_RUN_ARRAY(array, ctx, func, false, true) - -#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ - __BPF_PROG_RUN_ARRAY(array, ctx, func, true, false) - #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; @@ -1398,6 +1485,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux, struct seq_file *seq); typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); +typedef const struct bpf_func_proto * +(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id, + const struct bpf_prog *prog); enum bpf_iter_feature { BPF_ITER_RESCHED = BIT(0), @@ -1410,6 +1500,7 @@ struct bpf_iter_reg { bpf_iter_detach_target_t detach_target; bpf_iter_show_fdinfo_t show_fdinfo; bpf_iter_fill_link_info_t fill_link_info; + bpf_iter_get_func_proto_t get_func_proto; u32 ctx_arg_info_size; u32 feature; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; @@ -1432,6 +1523,8 @@ struct bpf_iter__bpf_map_elem { int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); bool bpf_iter_prog_supported(struct bpf_prog *prog); +const struct bpf_func_proto * +bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog); int bpf_iter_new_fd(struct bpf_link *link); bool bpf_link_is_iter(struct bpf_link *link); @@ -1509,12 +1602,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog, struct bpf_map *map, bool exclude_ingress); -bool dev_map_can_have_prog(struct bpf_map *map); void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); -bool cpu_map_prog_allowed(struct bpf_map *map); +int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, + struct sk_buff *skb); /* Return map's numa specified by userspace */ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) @@ -1711,6 +1804,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, return 0; } +static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, + struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + static inline bool cpu_map_prog_allowed(struct bpf_map *map) { return false; @@ -1852,6 +1951,12 @@ void bpf_map_offload_map_free(struct bpf_map *map); int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); + +int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); +int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); +void sock_map_unhash(struct sock *sk); +void sock_map_close(struct sock *sk, long timeout); #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) @@ -1884,24 +1989,6 @@ static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog, { return -ENOTSUPP; } -#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ - -#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) -int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); -int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); -int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); -void sock_map_unhash(struct sock *sk); -void sock_map_close(struct sock *sk, long timeout); - -void bpf_sk_reuseport_detach(struct sock *sk); -int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, - void *value); -int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags); -#else -static inline void bpf_sk_reuseport_detach(struct sock *sk) -{ -} #ifdef CONFIG_BPF_SYSCALL static inline int sock_map_get_from_fd(const union bpf_attr *attr, @@ -1921,7 +2008,21 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void { return -EOPNOTSUPP; } +#endif /* CONFIG_BPF_SYSCALL */ +#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) +void bpf_sk_reuseport_detach(struct sock *sk); +int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); +int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); +#else +static inline void bpf_sk_reuseport_detach(struct sock *sk) +{ +} + +#ifdef CONFIG_BPF_SYSCALL static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { @@ -1998,9 +2099,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; - -const struct bpf_func_proto *bpf_tracing_func_proto( - enum bpf_func_id func_id, const struct bpf_prog *prog); +extern const struct bpf_func_proto bpf_sk_setsockopt_proto; +extern const struct bpf_func_proto bpf_sk_getsockopt_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); |