summaryrefslogtreecommitdiff
path: root/include/linux/bpf.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/bpf.h')
-rw-r--r--include/linux/bpf.h285
1 files changed, 259 insertions, 26 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d808253f2e94..05b34a6355b0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -124,7 +124,7 @@ struct bpf_map_ops {
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
- int (*map_check_btf)(const struct bpf_map *map,
+ int (*map_check_btf)(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
@@ -287,6 +287,7 @@ struct bpf_map_owner {
enum bpf_prog_type type;
bool jited;
bool xdp_has_frags;
+ bool sleepable;
u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE];
const struct btf_type *attach_func_proto;
enum bpf_attach_type expected_attach_type;
@@ -655,7 +656,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
map->ops->map_seq_show_elem;
}
-int map_check_no_btf(const struct bpf_map *map,
+int map_check_no_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
@@ -663,6 +664,32 @@ int map_check_no_btf(const struct bpf_map *map,
bool bpf_map_meta_equal(const struct bpf_map *meta0,
const struct bpf_map *meta1);
+static inline bool bpf_map_has_internal_structs(struct bpf_map *map)
+{
+ return btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK);
+}
+
+void bpf_map_free_internal_structs(struct bpf_map *map, void *obj);
+
+int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
+ struct bpf_dynptr *ptr__uninit);
+
+#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, int node_id,
+ u64 flags);
+void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt);
+#else
+static inline void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt,
+ int node_id, u64 flags)
+{
+ return NULL;
+}
+
+static inline void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt)
+{
+}
+#endif
+
extern const struct bpf_map_ops bpf_map_offload_ops;
/* bpf_type_flag contains a set of flags that are applicable to the values of
@@ -727,7 +754,7 @@ enum bpf_type_flag {
MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS),
/* PTR was passed from the kernel in a trusted context, and may be
- * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions.
+ * passed to kfuncs or BPF helper functions.
* Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above.
* PTR_UNTRUSTED refers to a kptr that was read directly from a map
* without invoking bpf_kptr_xchg(). What we really need to know is
@@ -785,12 +812,15 @@ enum bpf_type_flag {
/* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS),
+ /* DYNPTR points to file */
+ DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
- | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)
+ | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -988,6 +1018,7 @@ enum bpf_reg_type {
PTR_TO_ARENA,
PTR_TO_BUF, /* reg points to a read/write buffer */
PTR_TO_FUNC, /* reg points to a bpf program function */
+ PTR_TO_INSN, /* reg points to a bpf program instruction */
CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
__BPF_REG_TYPE_MAX,
@@ -1199,6 +1230,9 @@ enum {
#endif
};
+#define BPF_TRAMP_COOKIE_INDEX_SHIFT 8
+#define BPF_TRAMP_IS_RETURN_SHIFT 63
+
struct bpf_tramp_links {
struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS];
int nr_links;
@@ -1250,6 +1284,18 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
+static inline bool bpf_trampoline_use_jmp(u64 flags)
+{
+ return flags & BPF_TRAMP_F_CALL_ORIG && !(flags & BPF_TRAMP_F_SKIP_FRAME);
+}
+#else
+static inline bool bpf_trampoline_use_jmp(u64 flags)
+{
+ return false;
+}
+#endif
+
struct bpf_ksym {
unsigned long start;
unsigned long end;
@@ -1257,6 +1303,8 @@ struct bpf_ksym {
struct list_head lnode;
struct latch_tree_node tnode;
bool prog;
+ u32 fp_start;
+ u32 fp_end;
};
enum bpf_tramp_prog_type {
@@ -1265,6 +1313,7 @@ enum bpf_tramp_prog_type {
BPF_TRAMP_MODIFY_RETURN,
BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */
+ BPF_TRAMP_FSESSION,
};
struct bpf_tramp_image {
@@ -1281,14 +1330,17 @@ struct bpf_tramp_image {
};
struct bpf_trampoline {
- /* hlist for trampoline_table */
- struct hlist_node hlist;
+ /* hlist for trampoline_key_table */
+ struct hlist_node hlist_key;
+ /* hlist for trampoline_ip_table */
+ struct hlist_node hlist_ip;
struct ftrace_ops *fops;
/* serializes access to fields of this trampoline */
struct mutex mutex;
refcount_t refcnt;
u32 flags;
u64 key;
+ unsigned long ip;
struct {
struct btf_func_model model;
void *addr;
@@ -1378,21 +1430,23 @@ enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_XDP,
/* Points to skb_metadata_end()-skb_metadata_len() */
BPF_DYNPTR_TYPE_SKB_META,
+ /* Underlying data is a file */
+ BPF_DYNPTR_TYPE_FILE,
};
-int bpf_dynptr_check_size(u32 size);
-u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
-const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
-void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
+int bpf_dynptr_check_size(u64 size);
+u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
+const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len);
+void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len);
bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
-int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset,
- void *src, u32 len, u64 flags);
-void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
- void *buffer__opt, u32 buffer__szk);
+int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset,
+ void *src, u64 len, u64 flags);
+void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
+ void *buffer__nullable, u64 buffer__szk);
-static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len)
{
- u32 size = __bpf_dynptr_size(ptr);
+ u64 size = __bpf_dynptr_size(ptr);
if (len > size || offset > size - len)
return -E2BIG;
@@ -1483,6 +1537,7 @@ void bpf_image_ksym_add(struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
+bool bpf_has_frame_pointer(unsigned long ip);
int bpf_jit_charge_modmem(u32 size);
void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
@@ -1616,6 +1671,7 @@ struct bpf_prog_aux {
u32 ctx_arg_info_size;
u32 max_rdonly_access;
u32 max_rdwr_access;
+ u32 subprog_start;
struct btf *attach_btf;
struct bpf_ctx_arg_aux *ctx_arg_info;
void __percpu *priv_stack_ptr;
@@ -1710,8 +1766,12 @@ struct bpf_prog_aux {
struct rcu_head rcu;
};
struct bpf_stream stream[2];
+ struct mutex st_ops_assoc_mutex;
+ struct bpf_map __rcu *st_ops_assoc;
};
+#define BPF_NR_CONTEXTS 4 /* normal, softirq, hardirq, NMI */
+
struct bpf_prog {
u16 pages; /* Number of allocated pages */
u16 jited:1, /* Is our filter JIT'ed? */
@@ -1727,6 +1787,7 @@ struct bpf_prog {
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_func_ip:1, /* Do we call get_func_ip() */
+ call_session_cookie:1, /* Do we call bpf_session_cookie() */
tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
sleepable:1; /* BPF program is sleepable */
enum bpf_prog_type type; /* Type of BPF program */
@@ -1738,7 +1799,7 @@ struct bpf_prog {
u8 tag[BPF_TAG_SIZE];
};
struct bpf_prog_stats __percpu *stats;
- int __percpu *active;
+ u8 __percpu *active; /* u8[BPF_NR_CONTEXTS] for recursion protection */
unsigned int (*bpf_func)(const void *ctx,
const struct bpf_insn *insn);
struct bpf_prog_aux *aux; /* Auxiliary fields */
@@ -1823,6 +1884,11 @@ struct bpf_tracing_link {
struct bpf_prog *tgt_prog;
};
+struct bpf_fsession_link {
+ struct bpf_tracing_link link;
+ struct bpf_tramp_link fexit;
+};
+
struct bpf_raw_tp_link {
struct bpf_link link;
struct bpf_raw_event_map *btp;
@@ -1905,12 +1971,14 @@ struct btf_member;
* reason, if this callback is not defined, the check is skipped as
* the struct_ops map will have final verification performed in
* @reg.
- * @type: BTF type.
- * @value_type: Value type.
+ * @cfi_stubs: Pointer to a structure of stub functions for CFI. These stubs
+ * provide the correct Control Flow Integrity hashes for the
+ * trampolines generated by BPF struct_ops.
+ * @owner: The module that owns this struct_ops. Used for module reference
+ * counting to ensure the module providing the struct_ops cannot be
+ * unloaded while in use.
* @name: The name of the struct bpf_struct_ops object.
* @func_models: Func models
- * @type_id: BTF type id.
- * @value_id: BTF value id.
*/
struct bpf_struct_ops {
const struct bpf_verifier_ops *verifier_ops;
@@ -1968,6 +2036,40 @@ struct bpf_struct_ops_common_value {
enum bpf_struct_ops_state state;
};
+static inline bool bpf_prog_get_recursion_context(struct bpf_prog *prog)
+{
+#ifdef CONFIG_ARM64
+ u8 rctx = interrupt_context_level();
+ u8 *active = this_cpu_ptr(prog->active);
+ u32 val;
+
+ preempt_disable();
+ active[rctx]++;
+ val = le32_to_cpu(*(__le32 *)active);
+ preempt_enable();
+ if (val != BIT(rctx * 8))
+ return false;
+
+ return true;
+#else
+ return this_cpu_inc_return(*(int __percpu *)(prog->active)) == 1;
+#endif
+}
+
+static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog)
+{
+#ifdef CONFIG_ARM64
+ u8 rctx = interrupt_context_level();
+ u8 *active = this_cpu_ptr(prog->active);
+
+ preempt_disable();
+ active[rctx]--;
+ preempt_enable();
+#else
+ this_cpu_dec(*(int __percpu *)(prog->active));
+#endif
+}
+
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
/* This macro helps developer to register a struct_ops type and generate
* type information correctly. Developers should use this macro to register
@@ -2010,6 +2112,9 @@ static inline void bpf_module_put(const void *data, struct module *owner)
module_put(owner);
}
int bpf_struct_ops_link_create(union bpf_attr *attr);
+int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map);
+void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog);
+void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux);
u32 bpf_struct_ops_id(const void *kdata);
#ifdef CONFIG_NET
@@ -2057,6 +2162,17 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
{
return -EOPNOTSUPP;
}
+static inline int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map)
+{
+ return -EOPNOTSUPP;
+}
+static inline void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog)
+{
+}
+static inline void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux)
+{
+ return NULL;
+}
static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
{
}
@@ -2067,6 +2183,37 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
#endif
+static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
+{
+ struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ int cnt = 0;
+
+ for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
+ if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION)
+ cnt++;
+ }
+
+ return cnt;
+}
+
+static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_link *link)
+{
+ return link->link.prog->call_session_cookie;
+}
+
+static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links)
+{
+ struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ int cnt = 0;
+
+ for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
+ if (bpf_prog_calls_session_cookie(fentries.links[i]))
+ cnt++;
+ }
+
+ return cnt;
+}
+
int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
const struct bpf_ctx_arg_aux *info, u32 cnt);
@@ -2099,6 +2246,12 @@ struct bpf_array {
};
};
+/*
+ * The bpf_array_get_next_key() function may be used for all array-like
+ * maps, i.e., maps with u32 keys with range [0 ,..., max_entries)
+ */
+int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key);
+
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 33
@@ -2146,7 +2299,7 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags)
static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map)
{
- return kzalloc(sizeof(*map->owner), GFP_ATOMIC);
+ return kzalloc_obj(*map->owner, GFP_ATOMIC);
}
static inline void bpf_map_owner_free(struct bpf_map *map)
@@ -2374,6 +2527,9 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array *array,
bool bpf_jit_bypass_spec_v1(void);
bool bpf_jit_bypass_spec_v4(void);
+#define bpf_rcu_lock_held() \
+ (rcu_read_lock_held() || rcu_read_lock_trace_held() || rcu_read_lock_bh_held())
+
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
@@ -2497,6 +2653,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
unsigned long nr_pages, struct page **page_array);
#ifdef CONFIG_MEMCG
+void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg);
+void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *memcg);
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node);
void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
@@ -2521,6 +2681,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
kvcalloc(_n, _size, _flags)
#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \
__alloc_percpu_gfp(_size, _align, _flags)
+static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg)
+{
+ *new_memcg = NULL;
+ *old_memcg = NULL;
+}
+
+static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *memcg)
+{
+}
#endif
static inline int
@@ -2721,8 +2892,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
-int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags);
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
@@ -3200,6 +3371,11 @@ static inline void bpf_prog_report_arena_violation(bool write, unsigned long add
}
#endif /* CONFIG_BPF_SYSCALL */
+static inline bool bpf_net_capable(void)
+{
+ return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
+}
+
static __always_inline int
bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
{
@@ -3670,12 +3846,14 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
#endif /* CONFIG_INET */
enum bpf_text_poke_type {
+ BPF_MOD_NOP,
BPF_MOD_CALL,
BPF_MOD_JUMP,
};
-int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
- void *addr1, void *addr2);
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
+ enum bpf_text_poke_type new_t, void *old_addr,
+ void *new_addr);
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old);
@@ -3772,4 +3950,59 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char *
const char **linep, int *nump);
struct bpf_prog *bpf_prog_find_from_stack(void);
+int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog);
+int bpf_insn_array_ready(struct bpf_map *map);
+void bpf_insn_array_release(struct bpf_map *map);
+void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len);
+void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len);
+
+#ifdef CONFIG_BPF_SYSCALL
+void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image);
+#else
+static inline void
+bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image)
+{
+}
+#endif
+
+static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type)
+{
+ switch (map_type) {
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags)
+{
+ u32 cpu;
+
+ if ((u32)flags & ~allowed_flags)
+ return -EINVAL;
+
+ if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
+ return -EINVAL;
+
+ if (!(flags & BPF_F_CPU) && flags >> 32)
+ return -EINVAL;
+
+ if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) {
+ if (!bpf_map_supports_cpu_flags(map->map_type))
+ return -EINVAL;
+ if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS))
+ return -EINVAL;
+
+ cpu = flags >> 32;
+ if ((flags & BPF_F_CPU) && cpu >= num_possible_cpus())
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
#endif /* _LINUX_BPF_H */