diff options
Diffstat (limited to 'include/linux/bpf.h')
| -rw-r--r-- | include/linux/bpf.h | 285 |
1 files changed, 259 insertions, 26 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d808253f2e94..05b34a6355b0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -124,7 +124,7 @@ struct bpf_map_ops { u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, struct seq_file *m); - int (*map_check_btf)(const struct bpf_map *map, + int (*map_check_btf)(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); @@ -287,6 +287,7 @@ struct bpf_map_owner { enum bpf_prog_type type; bool jited; bool xdp_has_frags; + bool sleepable; u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; const struct btf_type *attach_func_proto; enum bpf_attach_type expected_attach_type; @@ -655,7 +656,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map) map->ops->map_seq_show_elem; } -int map_check_no_btf(const struct bpf_map *map, +int map_check_no_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); @@ -663,6 +664,32 @@ int map_check_no_btf(const struct bpf_map *map, bool bpf_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1); +static inline bool bpf_map_has_internal_structs(struct bpf_map *map) +{ + return btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK); +} + +void bpf_map_free_internal_structs(struct bpf_map *map, void *obj); + +int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, + struct bpf_dynptr *ptr__uninit); + +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) +void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, int node_id, + u64 flags); +void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt); +#else +static inline void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, + int node_id, u64 flags) +{ + return NULL; +} + +static inline void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt) +{ +} +#endif + extern const struct bpf_map_ops bpf_map_offload_ops; /* bpf_type_flag contains a set of flags that are applicable to the values of @@ -727,7 +754,7 @@ enum bpf_type_flag { MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS), /* PTR was passed from the kernel in a trusted context, and may be - * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions. + * passed to kfuncs or BPF helper functions. * Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above. * PTR_UNTRUSTED refers to a kptr that was read directly from a map * without invoking bpf_kptr_xchg(). What we really need to know is @@ -785,12 +812,15 @@ enum bpf_type_flag { /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to file */ + DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ - | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -988,6 +1018,7 @@ enum bpf_reg_type { PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_INSN, /* reg points to a bpf program instruction */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ __BPF_REG_TYPE_MAX, @@ -1199,6 +1230,9 @@ enum { #endif }; +#define BPF_TRAMP_COOKIE_INDEX_SHIFT 8 +#define BPF_TRAMP_IS_RETURN_SHIFT 63 + struct bpf_tramp_links { struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; int nr_links; @@ -1250,6 +1284,18 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start, bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog); bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP +static inline bool bpf_trampoline_use_jmp(u64 flags) +{ + return flags & BPF_TRAMP_F_CALL_ORIG && !(flags & BPF_TRAMP_F_SKIP_FRAME); +} +#else +static inline bool bpf_trampoline_use_jmp(u64 flags) +{ + return false; +} +#endif + struct bpf_ksym { unsigned long start; unsigned long end; @@ -1257,6 +1303,8 @@ struct bpf_ksym { struct list_head lnode; struct latch_tree_node tnode; bool prog; + u32 fp_start; + u32 fp_end; }; enum bpf_tramp_prog_type { @@ -1265,6 +1313,7 @@ enum bpf_tramp_prog_type { BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ + BPF_TRAMP_FSESSION, }; struct bpf_tramp_image { @@ -1281,14 +1330,17 @@ struct bpf_tramp_image { }; struct bpf_trampoline { - /* hlist for trampoline_table */ - struct hlist_node hlist; + /* hlist for trampoline_key_table */ + struct hlist_node hlist_key; + /* hlist for trampoline_ip_table */ + struct hlist_node hlist_ip; struct ftrace_ops *fops; /* serializes access to fields of this trampoline */ struct mutex mutex; refcount_t refcnt; u32 flags; u64 key; + unsigned long ip; struct { struct btf_func_model model; void *addr; @@ -1378,21 +1430,23 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_XDP, /* Points to skb_metadata_end()-skb_metadata_len() */ BPF_DYNPTR_TYPE_SKB_META, + /* Underlying data is a file */ + BPF_DYNPTR_TYPE_FILE, }; -int bpf_dynptr_check_size(u32 size); -u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); -const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); -void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); +int bpf_dynptr_check_size(u64 size); +u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); -int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, - void *src, u32 len, u64 flags); -void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, - void *buffer__opt, u32 buffer__szk); +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, + void *src, u64 len, u64 flags); +void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, + void *buffer__nullable, u64 buffer__szk); -static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len) { - u32 size = __bpf_dynptr_size(ptr); + u64 size = __bpf_dynptr_size(ptr); if (len > size || offset > size - len) return -E2BIG; @@ -1483,6 +1537,7 @@ void bpf_image_ksym_add(struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); +bool bpf_has_frame_pointer(unsigned long ip); int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); @@ -1616,6 +1671,7 @@ struct bpf_prog_aux { u32 ctx_arg_info_size; u32 max_rdonly_access; u32 max_rdwr_access; + u32 subprog_start; struct btf *attach_btf; struct bpf_ctx_arg_aux *ctx_arg_info; void __percpu *priv_stack_ptr; @@ -1710,8 +1766,12 @@ struct bpf_prog_aux { struct rcu_head rcu; }; struct bpf_stream stream[2]; + struct mutex st_ops_assoc_mutex; + struct bpf_map __rcu *st_ops_assoc; }; +#define BPF_NR_CONTEXTS 4 /* normal, softirq, hardirq, NMI */ + struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ @@ -1727,6 +1787,7 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ + call_session_cookie:1, /* Do we call bpf_session_cookie() */ tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ @@ -1738,7 +1799,7 @@ struct bpf_prog { u8 tag[BPF_TAG_SIZE]; }; struct bpf_prog_stats __percpu *stats; - int __percpu *active; + u8 __percpu *active; /* u8[BPF_NR_CONTEXTS] for recursion protection */ unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); struct bpf_prog_aux *aux; /* Auxiliary fields */ @@ -1823,6 +1884,11 @@ struct bpf_tracing_link { struct bpf_prog *tgt_prog; }; +struct bpf_fsession_link { + struct bpf_tracing_link link; + struct bpf_tramp_link fexit; +}; + struct bpf_raw_tp_link { struct bpf_link link; struct bpf_raw_event_map *btp; @@ -1905,12 +1971,14 @@ struct btf_member; * reason, if this callback is not defined, the check is skipped as * the struct_ops map will have final verification performed in * @reg. - * @type: BTF type. - * @value_type: Value type. + * @cfi_stubs: Pointer to a structure of stub functions for CFI. These stubs + * provide the correct Control Flow Integrity hashes for the + * trampolines generated by BPF struct_ops. + * @owner: The module that owns this struct_ops. Used for module reference + * counting to ensure the module providing the struct_ops cannot be + * unloaded while in use. * @name: The name of the struct bpf_struct_ops object. * @func_models: Func models - * @type_id: BTF type id. - * @value_id: BTF value id. */ struct bpf_struct_ops { const struct bpf_verifier_ops *verifier_ops; @@ -1968,6 +2036,40 @@ struct bpf_struct_ops_common_value { enum bpf_struct_ops_state state; }; +static inline bool bpf_prog_get_recursion_context(struct bpf_prog *prog) +{ +#ifdef CONFIG_ARM64 + u8 rctx = interrupt_context_level(); + u8 *active = this_cpu_ptr(prog->active); + u32 val; + + preempt_disable(); + active[rctx]++; + val = le32_to_cpu(*(__le32 *)active); + preempt_enable(); + if (val != BIT(rctx * 8)) + return false; + + return true; +#else + return this_cpu_inc_return(*(int __percpu *)(prog->active)) == 1; +#endif +} + +static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog) +{ +#ifdef CONFIG_ARM64 + u8 rctx = interrupt_context_level(); + u8 *active = this_cpu_ptr(prog->active); + + preempt_disable(); + active[rctx]--; + preempt_enable(); +#else + this_cpu_dec(*(int __percpu *)(prog->active)); +#endif +} + #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) /* This macro helps developer to register a struct_ops type and generate * type information correctly. Developers should use this macro to register @@ -2010,6 +2112,9 @@ static inline void bpf_module_put(const void *data, struct module *owner) module_put(owner); } int bpf_struct_ops_link_create(union bpf_attr *attr); +int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map); +void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog); +void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux); u32 bpf_struct_ops_id(const void *kdata); #ifdef CONFIG_NET @@ -2057,6 +2162,17 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map) +{ + return -EOPNOTSUPP; +} +static inline void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog) +{ +} +static inline void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux) +{ + return NULL; +} static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) { } @@ -2067,6 +2183,37 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op #endif +static inline int bpf_fsession_cnt(struct bpf_tramp_links *links) +{ + struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY]; + int cnt = 0; + + for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) { + if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION) + cnt++; + } + + return cnt; +} + +static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_link *link) +{ + return link->link.prog->call_session_cookie; +} + +static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links) +{ + struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY]; + int cnt = 0; + + for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) { + if (bpf_prog_calls_session_cookie(fentries.links[i])) + cnt++; + } + + return cnt; +} + int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, const struct bpf_ctx_arg_aux *info, u32 cnt); @@ -2099,6 +2246,12 @@ struct bpf_array { }; }; +/* + * The bpf_array_get_next_key() function may be used for all array-like + * maps, i.e., maps with u32 keys with range [0 ,..., max_entries) + */ +int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key); + #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 @@ -2146,7 +2299,7 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags) static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map) { - return kzalloc(sizeof(*map->owner), GFP_ATOMIC); + return kzalloc_obj(*map->owner, GFP_ATOMIC); } static inline void bpf_map_owner_free(struct bpf_map *map) @@ -2374,6 +2527,9 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array *array, bool bpf_jit_bypass_spec_v1(void); bool bpf_jit_bypass_spec_v4(void); +#define bpf_rcu_lock_held() \ + (rcu_read_lock_held() || rcu_read_lock_trace_held() || rcu_read_lock_bh_held()) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; @@ -2497,6 +2653,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG +void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, + struct mem_cgroup **new_memcg); +void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *memcg); void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, @@ -2521,6 +2681,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, kvcalloc(_n, _size, _flags) #define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ __alloc_percpu_gfp(_size, _align, _flags) +static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, + struct mem_cgroup **new_memcg) +{ + *new_memcg = NULL; + *old_memcg = NULL; +} + +static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *memcg) +{ +} #endif static inline int @@ -2721,8 +2892,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); -int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); -int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags); +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, @@ -3200,6 +3371,11 @@ static inline void bpf_prog_report_arena_violation(bool write, unsigned long add } #endif /* CONFIG_BPF_SYSCALL */ +static inline bool bpf_net_capable(void) +{ + return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); +} + static __always_inline int bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) { @@ -3670,12 +3846,14 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, #endif /* CONFIG_INET */ enum bpf_text_poke_type { + BPF_MOD_NOP, BPF_MOD_CALL, BPF_MOD_JUMP, }; -int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, - void *addr1, void *addr2); +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, + enum bpf_text_poke_type new_t, void *old_addr, + void *new_addr); void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, struct bpf_prog *new, struct bpf_prog *old); @@ -3772,4 +3950,59 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char * const char **linep, int *nump); struct bpf_prog *bpf_prog_find_from_stack(void); +int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog); +int bpf_insn_array_ready(struct bpf_map *map); +void bpf_insn_array_release(struct bpf_map *map); +void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len); +void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len); + +#ifdef CONFIG_BPF_SYSCALL +void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image); +#else +static inline void +bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image) +{ +} +#endif + +static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type) +{ + switch (map_type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + return true; + default: + return false; + } +} + +static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags) +{ + u32 cpu; + + if ((u32)flags & ~allowed_flags) + return -EINVAL; + + if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) + return -EINVAL; + + if (!(flags & BPF_F_CPU) && flags >> 32) + return -EINVAL; + + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) { + if (!bpf_map_supports_cpu_flags(map->map_type)) + return -EINVAL; + if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS)) + return -EINVAL; + + cpu = flags >> 32; + if ((flags & BPF_F_CPU) && cpu >= num_possible_cpus()) + return -ERANGE; + } + + return 0; +} + #endif /* _LINUX_BPF_H */ |
