diff options
| author | David S. Miller <davem@davemloft.net> | 2020-03-14 06:52:03 +0300 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2020-03-14 06:52:03 +0300 |
| commit | 44ef976ab3c4ccd6c886714e5349caa53c477010 (patch) | |
| tree | fad7059aad1e1ac040e59a2f4870400bc8e9e30a /include/linux | |
| parent | 48f5d5cb80b4e414cb97dd7dba43b0370bdee130 (diff) | |
| parent | 832165d225f71040a2c1fc2407752e462d00de1f (diff) | |
| download | linux-44ef976ab3c4ccd6c886714e5349caa53c477010.tar.xz | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2020-03-13
The following pull-request contains BPF updates for your *net-next* tree.
We've added 86 non-merge commits during the last 12 day(s) which contain
a total of 107 files changed, 5771 insertions(+), 1700 deletions(-).
The main changes are:
1) Add modify_return attach type which allows to attach to a function via
BPF trampoline and is run after the fentry and before the fexit programs
and can pass a return code to the original caller, from KP Singh.
2) Generalize BPF's kallsyms handling and add BPF trampoline and dispatcher
objects to be visible in /proc/kallsyms so they can be annotated in
stack traces, from Jiri Olsa.
3) Extend BPF sockmap to allow for UDP next to existing TCP support in order
in order to enable this for BPF based socket dispatch, from Lorenz Bauer.
4) Introduce a new bpftool 'prog profile' command which attaches to existing
BPF programs via fentry and fexit hooks and reads out hardware counters
during that period, from Song Liu. Example usage:
bpftool prog profile id 337 duration 3 cycles instructions llc_misses
4228 run_cnt
3403698 cycles (84.08%)
3525294 instructions # 1.04 insn per cycle (84.05%)
13 llc_misses # 3.69 LLC misses per million isns (83.50%)
5) Batch of improvements to libbpf, bpftool and BPF selftests. Also addition
of a new bpf_link abstraction to keep in particular BPF tracing programs
attached even when the applicaion owning them exits, from Andrii Nakryiko.
6) New bpf_get_current_pid_tgid() helper for tracing to perform PID filtering
and which returns the PID as seen by the init namespace, from Carlos Neira.
7) Refactor of RISC-V JIT code to move out common pieces and addition of a
new RV32G BPF JIT compiler, from Luke Nelson.
8) Add gso_size context member to __sk_buff in order to be able to know whether
a given skb is GSO or not, from Willem de Bruijn.
9) Add a new bpf_xdp_output() helper which reuses XDP's existing perf RB output
implementation but can be called from tracepoint programs, from Eelco Chaudron.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/bpf.h | 109 | ||||
| -rw-r--r-- | include/linux/filter.h | 15 | ||||
| -rw-r--r-- | include/linux/proc_ns.h | 2 | ||||
| -rw-r--r-- | include/linux/skmsg.h | 56 |
4 files changed, 110 insertions, 72 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6015a4daf118..bdb981c204fa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -18,6 +18,7 @@ #include <linux/refcount.h> #include <linux/mutex.h> #include <linux/module.h> +#include <linux/kallsyms.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -433,6 +434,16 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) +/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 + * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 + */ +#define BPF_MAX_TRAMP_PROGS 40 + +struct bpf_tramp_progs { + struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; + int nr_progs; +}; + /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS @@ -455,16 +466,25 @@ struct btf_func_model { */ int arch_prepare_bpf_trampoline(void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_prog **fentry_progs, int fentry_cnt, - struct bpf_prog **fexit_progs, int fexit_cnt, + struct bpf_tramp_progs *tprogs, void *orig_call); /* these two functions are called from generated trampoline */ u64 notrace __bpf_prog_enter(void); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); +struct bpf_ksym { + unsigned long start; + unsigned long end; + char name[KSYM_NAME_LEN]; + struct list_head lnode; + struct latch_tree_node tnode; + bool prog; +}; + enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, + BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ }; @@ -493,6 +513,7 @@ struct bpf_trampoline { /* Executable image of trampoline */ void *image; u64 selector; + struct bpf_ksym ksym; }; #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ @@ -510,9 +531,10 @@ struct bpf_dispatcher { int num_progs; void *image; u32 image_off; + struct bpf_ksym ksym; }; -static __always_inline unsigned int bpf_dispatcher_nopfunc( +static __always_inline unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, @@ -525,17 +547,21 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); -#define BPF_DISPATCHER_INIT(name) { \ - .mutex = __MUTEX_INITIALIZER(name.mutex), \ - .func = &name##func, \ - .progs = {}, \ - .num_progs = 0, \ - .image = NULL, \ - .image_off = 0 \ +#define BPF_DISPATCHER_INIT(_name) { \ + .mutex = __MUTEX_INITIALIZER(_name.mutex), \ + .func = &_name##_func, \ + .progs = {}, \ + .num_progs = 0, \ + .image = NULL, \ + .image_off = 0, \ + .ksym = { \ + .name = #_name, \ + .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ + }, \ } #define DEFINE_BPF_DISPATCHER(name) \ - noinline unsigned int name##func( \ + noinline unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ @@ -543,26 +569,26 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); { \ return bpf_func(ctx, insnsi); \ } \ - EXPORT_SYMBOL(name##func); \ - struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); + EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ + struct bpf_dispatcher bpf_dispatcher_##name = \ + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ - unsigned int name##func( \ + unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ const struct bpf_insn *)); \ - extern struct bpf_dispatcher name; -#define BPF_DISPATCHER_FUNC(name) name##func -#define BPF_DISPATCHER_PTR(name) (&name) + extern struct bpf_dispatcher bpf_dispatcher_##name; +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func +#define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); -struct bpf_image { - struct latch_tree_node tnode; - unsigned char data[]; -}; -#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) -bool is_bpf_image_address(unsigned long address); -void *bpf_image_alloc(void); +/* Called only from JIT-enabled code, so there's no need for stubs. */ +void *bpf_jit_alloc_exec_page(void); +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); +void bpf_image_ksym_del(struct bpf_ksym *ksym); +void bpf_ksym_add(struct bpf_ksym *ksym); +void bpf_ksym_del(struct bpf_ksym *ksym); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { @@ -579,7 +605,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) #define DECLARE_BPF_DISPATCHER(name) -#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func #define BPF_DISPATCHER_PTR(name) NULL static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, @@ -640,8 +666,7 @@ struct bpf_prog_aux { void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; u32 size_poke_tab; - struct latch_tree_node ksym_tnode; - struct list_head ksym_lnode; + struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; @@ -1056,6 +1081,21 @@ extern int sysctl_unprivileged_bpf_disabled; int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); +struct bpf_link; + +struct bpf_link_ops { + void (*release)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); +}; + +void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, + struct bpf_prog *prog); +void bpf_link_inc(struct bpf_link *link); +void bpf_link_put(struct bpf_link *link); +int bpf_link_new_fd(struct bpf_link *link); +struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); +struct bpf_link *bpf_link_get_from_fd(u32 ufd); + int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); @@ -1133,6 +1173,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -1290,6 +1333,13 @@ static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) @@ -1386,6 +1436,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #if defined(CONFIG_BPF_STREAM_PARSER) int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +void sock_map_unhash(struct sock *sk); +void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which) @@ -1398,7 +1450,7 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } -#endif +#endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); @@ -1459,6 +1511,7 @@ extern const struct bpf_func_proto bpf_strtol_proto; extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index 43b5e455d2f5..9b5aa5c483cc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -577,7 +577,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); ret; }) #define BPF_PROG_RUN(prog, ctx) \ - __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc) + __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) /* * Use in preemptible and therefore migratable context to make sure that @@ -596,7 +596,7 @@ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, u32 ret; migrate_disable(); - ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc); + ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func); migrate_enable(); return ret; } @@ -722,7 +722,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return res; } -DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) +DECLARE_BPF_DISPATCHER(xdp) static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) @@ -733,8 +733,7 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ - return __BPF_PROG_RUN(prog, xdp, - BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); + return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); @@ -1084,7 +1083,6 @@ bpf_address_lookup(unsigned long addr, unsigned long *size, void bpf_prog_kallsyms_add(struct bpf_prog *fp); void bpf_prog_kallsyms_del(struct bpf_prog *fp); -void bpf_get_prog_name(const struct bpf_prog *prog, char *sym); #else /* CONFIG_BPF_JIT */ @@ -1153,11 +1151,6 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } -static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) -{ - sym[0] = '\0'; -} - #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 4626b1ac3b6c..adff08bfecf9 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -85,6 +85,8 @@ typedef struct ns_common *ns_get_path_helper_t(void *); extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, void *private_data); +extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino); + extern int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops); extern void nsfs_init(void); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 112765bd146d..8a709f63c5e5 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -323,14 +323,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link) } struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); -#if defined(CONFIG_BPF_STREAM_PARSER) -void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link); -#else -static inline void sk_psock_unlink(struct sock *sk, - struct sk_psock_link *link) -{ -} -#endif void __sk_psock_purge_ingress_msg(struct sk_psock *psock); @@ -347,11 +339,23 @@ static inline void sk_psock_update_proto(struct sock *sk, struct sk_psock *psock, struct proto *ops) { - psock->saved_unhash = sk->sk_prot->unhash; - psock->saved_close = sk->sk_prot->close; - psock->saved_write_space = sk->sk_write_space; + /* Initialize saved callbacks and original proto only once, since this + * function may be called multiple times for a psock, e.g. when + * psock->progs.msg_parser is updated. + * + * Since we've not installed the new proto, psock is not yet in use and + * we can initialize it without synchronization. + */ + if (!psock->sk_proto) { + struct proto *orig = READ_ONCE(sk->sk_prot); + + psock->saved_unhash = orig->unhash; + psock->saved_close = orig->close; + psock->saved_write_space = sk->sk_write_space; + + psock->sk_proto = orig; + } - psock->sk_proto = sk->sk_prot; /* Pairs with lockless read in sk_clone_lock() */ WRITE_ONCE(sk->sk_prot, ops); } @@ -360,7 +364,13 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + if (inet_csk_has_ulp(sk)) { + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + } else { + sk->sk_write_space = psock->saved_write_space; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + } } static inline void sk_psock_set_state(struct sk_psock *psock, @@ -381,26 +391,6 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, return test_bit(bit, &psock->state); } -static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) -{ - struct sk_psock *psock; - - rcu_read_lock(); - psock = sk_psock(sk); - if (psock) { - if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { - psock = ERR_PTR(-EBUSY); - goto out; - } - - if (!refcount_inc_not_zero(&psock->refcnt)) - psock = ERR_PTR(-EBUSY); - } -out: - rcu_read_unlock(); - return psock; -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; |
