summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/filter.txt64
-rw-r--r--include/linux/bpf.h34
-rw-r--r--include/linux/bpf_verifier.h37
-rw-r--r--include/uapi/linux/bpf.h93
-rw-r--r--kernel/bpf/verifier.c604
-rw-r--r--net/core/filter.c181
-rw-r--r--tools/include/uapi/linux/bpf.h93
-rw-r--r--tools/lib/bpf/libbpf.c4
-rw-r--r--tools/lib/bpf/libbpf.h3
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h12
-rw-r--r--tools/testing/selftests/bpf/test_progs.c38
-rw-r--r--tools/testing/selftests/bpf/test_sk_lookup_kern.c180
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c814
14 files changed, 2002 insertions, 157 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index e6b4ebb2b243..4443ce958862 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1125,6 +1125,14 @@ pointer type. The types of pointers describe their base, as follows:
PTR_TO_STACK Frame pointer.
PTR_TO_PACKET skb->data.
PTR_TO_PACKET_END skb->data + headlen; arithmetic forbidden.
+ PTR_TO_SOCKET Pointer to struct bpf_sock_ops, implicitly refcounted.
+ PTR_TO_SOCKET_OR_NULL
+ Either a pointer to a socket, or NULL; socket lookup
+ returns this type, which becomes a PTR_TO_SOCKET when
+ checked != NULL. PTR_TO_SOCKET is reference-counted,
+ so programs must release the reference through the
+ socket release function before the end of the program.
+ Arithmetic on these pointers is forbidden.
However, a pointer may be offset from this base (as a result of pointer
arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable
offset'. The former is used when an exactly-known value (e.g. an immediate
@@ -1171,6 +1179,13 @@ over the Ethernet header, then reads IHL and addes (IHL * 4), the resulting
pointer will have a variable offset known to be 4n+2 for some n, so adding the 2
bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through
that pointer are safe.
+The 'id' field is also used on PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL, common
+to all copies of the pointer returned from a socket lookup. This has similar
+behaviour to the handling for PTR_TO_MAP_VALUE_OR_NULL->PTR_TO_MAP_VALUE, but
+it also handles reference tracking for the pointer. PTR_TO_SOCKET implicitly
+represents a reference to the corresponding 'struct sock'. To ensure that the
+reference is not leaked, it is imperative to NULL-check the reference and in
+the non-NULL case, and pass the valid reference to the socket release function.
Direct packet access
--------------------
@@ -1444,6 +1459,55 @@ Error:
8: (7a) *(u64 *)(r0 +0) = 1
R0 invalid mem access 'imm'
+Program that performs a socket lookup then sets the pointer to NULL without
+checking it:
+value:
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (b7) r2 = 0
+ 1: (63) *(u32 *)(r10 -8) = r2
+ 2: (bf) r2 = r10
+ 3: (07) r2 += -8
+ 4: (b7) r3 = 4
+ 5: (b7) r4 = 0
+ 6: (b7) r5 = 0
+ 7: (85) call bpf_sk_lookup_tcp#65
+ 8: (b7) r0 = 0
+ 9: (95) exit
+ Unreleased reference id=1, alloc_insn=7
+
+Program that performs a socket lookup but does not NULL-check the returned
+value:
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (b7) r2 = 0
+ 1: (63) *(u32 *)(r10 -8) = r2
+ 2: (bf) r2 = r10
+ 3: (07) r2 += -8
+ 4: (b7) r3 = 4
+ 5: (b7) r4 = 0
+ 6: (b7) r5 = 0
+ 7: (85) call bpf_sk_lookup_tcp#65
+ 8: (95) exit
+ Unreleased reference id=1, alloc_insn=7
+
Testing
-------
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 018299a595c8..027697b6a22f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -154,6 +154,7 @@ enum bpf_arg_type {
ARG_PTR_TO_CTX, /* pointer to context */
ARG_ANYTHING, /* any (initialized) argument is ok */
+ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
};
/* type of values returned from helper functions */
@@ -162,6 +163,7 @@ enum bpf_return_type {
RET_VOID, /* function doesn't return anything */
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
+ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -213,6 +215,8 @@ enum bpf_reg_type {
PTR_TO_PACKET, /* reg points to skb->data */
PTR_TO_PACKET_END, /* skb->data + headlen */
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
+ PTR_TO_SOCKET, /* reg points to struct bpf_sock */
+ PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
};
/* The information passed from prog-specific *_is_valid_access
@@ -343,6 +347,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
unsigned long off, unsigned long len);
+typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
+ const struct bpf_insn *src,
+ struct bpf_insn *dst,
+ struct bpf_prog *prog,
+ u32 *target_size);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
@@ -836,4 +845,29 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+#if defined(CONFIG_NET)
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+ struct bpf_insn_access_aux *info);
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size);
+#else
+static inline bool bpf_sock_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ return false;
+}
+static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ return 0;
+}
+#endif
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b42b60a83e19..7b6fd2ab3263 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -58,6 +58,8 @@ struct bpf_reg_state {
* offset, so they can share range knowledge.
* For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
* came from, when one is tested for != NULL.
+ * For PTR_TO_SOCKET this is used to share which pointers retain the
+ * same reference to the socket, to determine proper reference freeing.
*/
u32 id;
/* For scalar types (SCALAR_VALUE), this represents our knowledge of
@@ -102,6 +104,17 @@ struct bpf_stack_state {
u8 slot_type[BPF_REG_SIZE];
};
+struct bpf_reference_state {
+ /* Track each reference created with a unique id, even if the same
+ * instruction creates the reference multiple times (eg, via CALL).
+ */
+ int id;
+ /* Instruction where the allocation of this reference occurred. This
+ * is used purely to inform the user of a reference leak.
+ */
+ int insn_idx;
+};
+
/* state of the program:
* type of all registers and stack info
*/
@@ -119,7 +132,9 @@ struct bpf_func_state {
*/
u32 subprogno;
- /* should be second to last. See copy_func_state() */
+ /* The following fields should be last. See copy_func_state() */
+ int acquired_refs;
+ struct bpf_reference_state *refs;
int allocated_stack;
struct bpf_stack_state *stack;
};
@@ -131,6 +146,17 @@ struct bpf_verifier_state {
u32 curframe;
};
+#define bpf_get_spilled_reg(slot, frame) \
+ (((slot < frame->allocated_stack / BPF_REG_SIZE) && \
+ (frame->stack[slot].slot_type[0] == STACK_SPILL)) \
+ ? &frame->stack[slot].spilled_ptr : NULL)
+
+/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
+#define bpf_for_each_spilled_reg(iter, frame, reg) \
+ for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \
+ iter < frame->allocated_stack / BPF_REG_SIZE; \
+ iter++, reg = bpf_get_spilled_reg(iter, frame))
+
/* linked list of verifier states used to prune search */
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
@@ -204,11 +230,16 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...);
-static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *cur = env->cur_state;
- return cur->frame[cur->curframe]->regs;
+ return cur->frame[cur->curframe];
+}
+
+static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+{
+ return cur_func(env)->regs;
}
int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e2070d819e04..f9187b41dff6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2144,6 +2144,77 @@ union bpf_attr {
* request in the skb.
* Return
* 0 on success, or a negative error in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-NULL, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is zero, then the socket lookup table in the
+ * netns associated with the *ctx* will be used. For the TC hooks,
+ * this in the netns of the device in the skb. For socket hooks,
+ * this in the netns of the socket. If *netns* is non-zero, then
+ * it specifies the ID of the netns relative to the netns
+ * associated with the *ctx*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * Description
+ * Look for UDP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-NULL, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is zero, then the socket lookup table in the
+ * netns associated with the *ctx* will be used. For the TC hooks,
+ * this in the netns of the device in the skb. For socket hooks,
+ * this in the netns of the socket. If *netns* is non-zero, then
+ * it specifies the ID of the netns relative to the netns
+ * associated with the *ctx*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *
+ * int bpf_sk_release(struct bpf_sock *sk)
+ * Description
+ * Release the reference held by *sock*. *sock* must be a non-NULL
+ * pointer that was returned from bpf_sk_lookup_xxx\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2229,7 +2300,10 @@ union bpf_attr {
FN(get_current_cgroup_id), \
FN(get_local_storage), \
FN(sk_select_reuseport), \
- FN(skb_ancestor_cgroup_id),
+ FN(skb_ancestor_cgroup_id), \
+ FN(sk_lookup_tcp), \
+ FN(sk_lookup_udp), \
+ FN(sk_release),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2399,6 +2473,23 @@ struct bpf_sock {
*/
};
+struct bpf_sock_tuple {
+ union {
+ struct {
+ __be32 saddr;
+ __be32 daddr;
+ __be16 sport;
+ __be16 dport;
+ } ipv4;
+ struct {
+ __be32 saddr[4];
+ __be32 daddr[4];
+ __be16 sport;
+ __be16 dport;
+ } ipv6;
+ };
+};
+
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a8cc83a970d1..73c81bef6ae8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1,5 +1,6 @@
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook
+ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -80,8 +81,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
* (like pointer plus pointer becomes SCALAR_VALUE type)
*
* When verifier sees load or store instructions the type of base register
- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
- * types recognized by check_mem_access() function.
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
+ * four pointer types recognized by check_mem_access() function.
*
* PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
* and the range of [ptr, ptr + map's value_size) is accessible.
@@ -140,6 +141,24 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
*
* After the call R0 is set to return type of the function and registers R1-R5
* are set to NOT_INIT to indicate that they are no longer readable.
+ *
+ * The following reference types represent a potential reference to a kernel
+ * resource which, after first being allocated, must be checked and freed by
+ * the BPF program:
+ * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
+ *
+ * When the verifier sees a helper call return a reference type, it allocates a
+ * pointer id for the reference and stores it in the current function state.
+ * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
+ * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
+ * passes through a NULL-check conditional. For the branch wherein the state is
+ * changed to CONST_IMM, the verifier releases the reference.
+ *
+ * For each helper function that allocates a reference, such as
+ * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
+ * bpf_sk_release(). When a reference type passes into the release function,
+ * the verifier also releases the reference. If any unchecked or unreleased
+ * reference remains at the end of the program, the verifier rejects it.
*/
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
@@ -189,6 +208,7 @@ struct bpf_call_arg_meta {
int access_size;
s64 msize_smax_value;
u64 msize_umax_value;
+ int ptr_id;
};
static DEFINE_MUTEX(bpf_verifier_lock);
@@ -249,6 +269,46 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
type == PTR_TO_PACKET_META;
}
+static bool reg_type_may_be_null(enum bpf_reg_type type)
+{
+ return type == PTR_TO_MAP_VALUE_OR_NULL ||
+ type == PTR_TO_SOCKET_OR_NULL;
+}
+
+static bool type_is_refcounted(enum bpf_reg_type type)
+{
+ return type == PTR_TO_SOCKET;
+}
+
+static bool type_is_refcounted_or_null(enum bpf_reg_type type)
+{
+ return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
+}
+
+static bool reg_is_refcounted(const struct bpf_reg_state *reg)
+{
+ return type_is_refcounted(reg->type);
+}
+
+static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
+{
+ return type_is_refcounted_or_null(reg->type);
+}
+
+static bool arg_type_is_refcounted(enum bpf_arg_type type)
+{
+ return type == ARG_PTR_TO_SOCKET;
+}
+
+/* Determine whether the function releases some resources allocated by another
+ * function call. The first reference type argument will be assumed to be
+ * released by release_reference().
+ */
+static bool is_release_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_sk_release;
+}
+
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
[NOT_INIT] = "?",
@@ -262,6 +322,8 @@ static const char * const reg_type_str[] = {
[PTR_TO_PACKET_META] = "pkt_meta",
[PTR_TO_PACKET_END] = "pkt_end",
[PTR_TO_FLOW_KEYS] = "flow_keys",
+ [PTR_TO_SOCKET] = "sock",
+ [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
};
static char slot_type_char[] = {
@@ -378,62 +440,158 @@ static void print_verifier_state(struct bpf_verifier_env *env,
else
verbose(env, "=%s", types_buf);
}
+ if (state->acquired_refs && state->refs[0].id) {
+ verbose(env, " refs=%d", state->refs[0].id);
+ for (i = 1; i < state->acquired_refs; i++)
+ if (state->refs[i].id)
+ verbose(env, ",%d", state->refs[i].id);
+ }
verbose(env, "\n");
}
-static int copy_stack_state(struct bpf_func_state *dst,
- const struct bpf_func_state *src)
-{
- if (!src->stack)
- return 0;
- if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
- /* internal bug, make state invalid to reject the program */
- memset(dst, 0, sizeof(*dst));
- return -EFAULT;
- }
- memcpy(dst->stack, src->stack,
- sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
- return 0;
-}
+#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
+static int copy_##NAME##_state(struct bpf_func_state *dst, \
+ const struct bpf_func_state *src) \
+{ \
+ if (!src->FIELD) \
+ return 0; \
+ if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
+ /* internal bug, make state invalid to reject the program */ \
+ memset(dst, 0, sizeof(*dst)); \
+ return -EFAULT; \
+ } \
+ memcpy(dst->FIELD, src->FIELD, \
+ sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
+ return 0; \
+}
+/* copy_reference_state() */
+COPY_STATE_FN(reference, acquired_refs, refs, 1)
+/* copy_stack_state() */
+COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
+#undef COPY_STATE_FN
+
+#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
+static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
+ bool copy_old) \
+{ \
+ u32 old_size = state->COUNT; \
+ struct bpf_##NAME##_state *new_##FIELD; \
+ int slot = size / SIZE; \
+ \
+ if (size <= old_size || !size) { \
+ if (copy_old) \
+ return 0; \
+ state->COUNT = slot * SIZE; \
+ if (!size && old_size) { \
+ kfree(state->FIELD); \
+ state->FIELD = NULL; \
+ } \
+ return 0; \
+ } \
+ new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
+ GFP_KERNEL); \
+ if (!new_##FIELD) \
+ return -ENOMEM; \
+ if (copy_old) { \
+ if (state->FIELD) \
+ memcpy(new_##FIELD, state->FIELD, \
+ sizeof(*new_##FIELD) * (old_size / SIZE)); \
+ memset(new_##FIELD + old_size / SIZE, 0, \
+ sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
+ } \
+ state->COUNT = slot * SIZE; \
+ kfree(state->FIELD); \
+ state->FIELD = new_##FIELD; \
+ return 0; \
+}
+/* realloc_reference_state() */
+REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
+/* realloc_stack_state() */
+REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
+#undef REALLOC_STATE_FN
/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
* make it consume minimal amount of memory. check_stack_write() access from
* the program calls into realloc_func_state() to grow the stack size.
- * Note there is a non-zero parent pointer inside each reg of bpf_verifier_state
- * which this function copies over. It points to corresponding reg in previous
- * bpf_verifier_state which is never reallocated
+ * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
+ * which realloc_stack_state() copies over. It points to previous
+ * bpf_verifier_state which is never reallocated.
*/
-static int realloc_func_state(struct bpf_func_state *state, int size,
- bool copy_old)
+static int realloc_func_state(struct bpf_func_state *state, int stack_size,
+ int refs_size, bool copy_old)
{
- u32 old_size = state->allocated_stack;
- struct bpf_stack_state *new_stack;
- int slot = size / BPF_REG_SIZE;
+ int err = realloc_reference_state(state, refs_size, copy_old);
+ if (err)
+ return err;
+ return realloc_stack_state(state, stack_size, copy_old);
+}
+
+/* Acquire a pointer id from the env and update the state->refs to include
+ * this new pointer reference.
+ * On success, returns a valid pointer id to associate with the register
+ * On failure, returns a negative errno.
+ */
+static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
+{
+ struct bpf_func_state *state = cur_func(env);
+ int new_ofs = state->acquired_refs;
+ int id, err;
- if (size <= old_size || !size) {
- if (copy_old)
+ err = realloc_reference_state(state, state->acquired_refs + 1, true);
+ if (err)
+ return err;
+ id = ++env->id_gen;
+ state->refs[new_ofs].id = id;
+ state->refs[new_ofs].insn_idx = insn_idx;
+
+ return id;
+}
+
+/* release function corresponding to acquire_reference_state(). Idempotent. */
+static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
+{
+ int i, last_idx;
+
+ if (!ptr_id)
+ return -EFAULT;
+
+ last_idx = state->acquired_refs - 1;
+ for (i = 0; i < state->acquired_refs; i++) {
+ if (state->refs[i].id == ptr_id) {
+ if (last_idx && i != last_idx)
+ memcpy(&state->refs[i], &state->refs[last_idx],
+ sizeof(*state->refs));
+ memset(&state->refs[last_idx], 0, sizeof(*state->refs));
+ state->acquired_refs--;
return 0;
- state->allocated_stack = slot * BPF_REG_SIZE;
- if (!size && old_size) {
- kfree(state->stack);
- state->stack = NULL;
}
- return 0;
}
- new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
- GFP_KERNEL);
- if (!new_stack)
- return -ENOMEM;
- if (copy_old) {
- if (state->stack)
- memcpy(new_stack, state->stack,
- sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
- memset(new_stack + old_size / BPF_REG_SIZE, 0,
- sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
- }
- state->allocated_stack = slot * BPF_REG_SIZE;
- kfree(state->stack);
- state->stack = new_stack;
+ return -EFAULT;
+}
+
+/* variation on the above for cases where we expect that there must be an
+ * outstanding reference for the specified ptr_id.
+ */
+static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
+{
+ struct bpf_func_state *state = cur_func(env);
+ int err;
+
+ err = __release_reference_state(state, ptr_id);
+ if (WARN_ON_ONCE(err != 0))
+ verbose(env, "verifier internal error: can't release reference\n");
+ return err;
+}
+
+static int transfer_reference_state(struct bpf_func_state *dst,
+ struct bpf_func_state *src)
+{
+ int err = realloc_reference_state(dst, src->acquired_refs, false);
+ if (err)
+ return err;
+ err = copy_reference_state(dst, src);
+ if (err)
+ return err;
return 0;
}
@@ -441,6 +599,7 @@ static void free_func_state(struct bpf_func_state *state)
{
if (!state)
return;
+ kfree(state->refs);
kfree(state->stack);
kfree(state);
}
@@ -466,10 +625,14 @@ static int copy_func_state(struct bpf_func_state *dst,
{
int err;
- err = realloc_func_state(dst, src->allocated_stack, false);
+ err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
+ false);
+ if (err)
+ return err;
+ memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
+ err = copy_reference_state(dst, src);
if (err)
return err;
- memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
return copy_stack_state(dst, src);
}
@@ -968,6 +1131,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_PACKET_END:
case PTR_TO_FLOW_KEYS:
case CONST_PTR_TO_MAP:
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCKET_OR_NULL:
return true;
default:
return false;
@@ -992,7 +1157,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
enum bpf_reg_type type;
err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
- true);
+ state->acquired_refs, true);
if (err)
return err;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -1336,6 +1501,28 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
return 0;
}
+static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
+ int size, enum bpf_access_type t)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *reg = &regs[regno];
+ struct bpf_insn_access_aux info;
+
+ if (reg->smin_value < 0) {
+ verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+ regno);
+ return -EACCES;
+ }
+
+ if (!bpf_sock_is_valid_access(off, size, t, &info)) {
+ verbose(env, "invalid bpf_sock access off=%d size=%d\n",
+ off, size);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
static bool __is_pointer_value(bool allow_ptr_leaks,
const struct bpf_reg_state *reg)
{
@@ -1354,7 +1541,8 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
{
const struct bpf_reg_state *reg = cur_regs(env) + regno;
- return reg->type == PTR_TO_CTX;
+ return reg->type == PTR_TO_CTX ||
+ reg->type == PTR_TO_SOCKET;
}
static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@@ -1454,6 +1642,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
*/
strict = true;
break;
+ case PTR_TO_SOCKET:
+ pointer_desc = "sock ";
+ break;
default:
break;
}
@@ -1721,6 +1912,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_flow_keys_access(env, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_SOCKET) {
+ if (t == BPF_WRITE) {
+ verbose(env, "cannot write into socket\n");
+ return -EACCES;
+ }
+ err = check_sock_access(env, regno, off, size, t);
+ if (!err && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
@@ -1763,8 +1962,7 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
if (is_ctx_reg(env, insn->dst_reg) ||
is_pkt_reg(env, insn->dst_reg)) {
verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
- insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
- "context" : "packet");
+ insn->dst_reg, reg_type_str[insn->dst_reg]);
return -EACCES;
}
@@ -1944,6 +2142,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
err = check_ctx_reg(env, reg, regno);
if (err < 0)
return err;
+ } else if (arg_type == ARG_PTR_TO_SOCKET) {
+ expected_type = PTR_TO_SOCKET;
+ if (type != expected_type)
+ goto err_type;
+ if (meta->ptr_id || !reg->id) {
+ verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
+ meta->ptr_id, reg->id);
+ return -EFAULT;
+ }
+ meta->ptr_id = reg->id;
} else if (arg_type_is_mem_ptr(arg_type)) {
expected_type = PTR_TO_STACK;
/* One exception here. In case function allows for NULL to be
@@ -2233,10 +2441,32 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
return true;
}
+static bool check_refcount_ok(const struct bpf_func_proto *fn)
+{
+ int count = 0;
+
+ if (arg_type_is_refcounted(fn->arg1_type))
+ count++;
+ if (arg_type_is_refcounted(fn->arg2_type))
+ count++;
+ if (arg_type_is_refcounted(fn->arg3_type))
+ count++;
+ if (arg_type_is_refcounted(fn->arg4_type))
+ count++;
+ if (arg_type_is_refcounted(fn->arg5_type))
+ count++;
+
+ /* We only support one arg being unreferenced at the moment,
+ * which is sufficient for the helper functions we have right now.
+ */
+ return count <= 1;
+}
+
static int check_func_proto(const struct bpf_func_proto *fn)
{
return check_raw_mode_ok(fn) &&
- check_arg_pair_ok(fn) ? 0 : -EINVAL;
+ check_arg_pair_ok(fn) &&
+ check_refcount_ok(fn) ? 0 : -EINVAL;
}
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2252,10 +2482,9 @@ static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
if (reg_is_pkt_pointer_any(&regs[i]))
mark_reg_unknown(env, regs, i);
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_SPILL)
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
continue;
- reg = &state->stack[i].spilled_ptr;
if (reg_is_pkt_pointer_any(reg))
__mark_reg_unknown(reg);
}
@@ -2270,12 +2499,45 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
__clear_all_pkt_pointers(env, vstate->frame[i]);
}
+static void release_reg_references(struct bpf_verifier_env *env,
+ struct bpf_func_state *state, int id)
+{
+ struct bpf_reg_state *regs = state->regs, *reg;
+ int i;
+
+ for (i = 0; i < MAX_BPF_REG; i++)
+ if (regs[i].id == id)
+ mark_reg_unknown(env, regs, i);
+
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
+ continue;
+ if (reg_is_refcounted(reg) && reg->id == id)
+ __mark_reg_unknown(reg);
+ }
+}
+
+/* The pointer with the specified id has released its reference to kernel
+ * resources. Identify all copies of the same pointer and clear the reference.
+ */
+static int release_reference(struct bpf_verifier_env *env,
+ struct bpf_call_arg_meta *meta)
+{
+ struct bpf_verifier_state *vstate = env->cur_state;
+ int i;
+
+ for (i = 0; i <= vstate->curframe; i++)
+ release_reg_references(env, vstate->frame[i], meta->ptr_id);
+
+ return release_reference_state(env, meta->ptr_id);
+}
+
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
struct bpf_func_state *caller, *callee;
- int i, subprog, target_insn;
+ int i, err, subprog, target_insn;
if (state->curframe + 1 >= MAX_CALL_FRAMES) {
verbose(env, "the call stack of %d frames is too deep\n",
@@ -2313,6 +2575,11 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
state->curframe + 1 /* frameno within this callchain */,
subprog /* subprog number within this prog */);
+ /* Transfer references to the callee */
+ err = transfer_reference_state(callee, caller);
+ if (err)
+ return err;
+
/* copy r1 - r5 args that callee can access. The copy includes parent
* pointers, which connects us up to the liveness chain
*/
@@ -2345,6 +2612,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
struct bpf_verifier_state *state = env->cur_state;
struct bpf_func_state *caller, *callee;
struct bpf_reg_state *r0;
+ int err;
callee = state->frame[state->curframe];
r0 = &callee->regs[BPF_REG_0];
@@ -2364,6 +2632,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
/* return to the caller whatever r0 had in the callee */
caller->regs[BPF_REG_0] = *r0;
+ /* Transfer references to the caller */
+ err = transfer_reference_state(caller, callee);
+ if (err)
+ return err;
+
*insn_idx = callee->callsite + 1;
if (env->log.level) {
verbose(env, "returning from callee:\n");
@@ -2420,6 +2693,18 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return 0;
}
+static int check_reference_leak(struct bpf_verifier_env *env)
+{
+ struct bpf_func_state *state = cur_func(env);
+ int i;
+
+ for (i = 0; i < state->acquired_refs; i++) {
+ verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
+ state->refs[i].id, state->refs[i].insn_idx);
+ }
+ return state->acquired_refs ? -EINVAL : 0;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
const struct bpf_func_proto *fn = NULL;
@@ -2498,6 +2783,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return err;
}
+ if (func_id == BPF_FUNC_tail_call) {
+ err = check_reference_leak(env);
+ if (err) {
+ verbose(env, "tail_call would lead to reference leak\n");
+ return err;
+ }
+ } else if (is_release_function(func_id)) {
+ err = release_reference(env, &meta);
+ if (err)
+ return err;
+ }
+
regs = cur_regs(env);
/* check that flags argument in get_local_storage(map, flags) is 0,
@@ -2540,6 +2837,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
regs[BPF_REG_0].map_ptr = meta.map_ptr;
regs[BPF_REG_0].id = ++env->id_gen;
+ } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
+ int id = acquire_reference_state(env, insn_idx);
+ if (id < 0)
+ return id;
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
+ regs[BPF_REG_0].id = id;
} else {
verbose(env, "unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
@@ -2670,20 +2974,20 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
return -EACCES;
}
- if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
- verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
- dst);
- return -EACCES;
- }
- if (ptr_reg->type == CONST_PTR_TO_MAP) {
- verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
- dst);
+ switch (ptr_reg->type) {
+ case PTR_TO_MAP_VALUE_OR_NULL:
+ verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
+ dst, reg_type_str[ptr_reg->type]);
return -EACCES;
- }
- if (ptr_reg->type == PTR_TO_PACKET_END) {
- verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
- dst);
+ case CONST_PTR_TO_MAP:
+ case PTR_TO_PACKET_END:
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCKET_OR_NULL:
+ verbose(env, "R%d pointer arithmetic on %s prohibited\n",
+ dst, reg_type_str[ptr_reg->type]);
return -EACCES;
+ default:
+ break;
}
/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
@@ -3395,10 +3699,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
for (j = 0; j <= vstate->curframe; j++) {
state = vstate->frame[j];
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_SPILL)
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
continue;
- reg = &state->stack[i].spilled_ptr;
if (reg->type == type && reg->id == dst_reg->id)
reg->range = max(reg->range, new_range);
}
@@ -3604,12 +3907,11 @@ static void reg_combine_min_max(struct bpf_reg_state *true_src,
}
}
-static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
- bool is_null)
+static void mark_ptr_or_null_reg(struct bpf_func_state *state,
+ struct bpf_reg_state *reg, u32 id,
+ bool is_null)
{
- struct bpf_reg_state *reg = &regs[regno];
-
- if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
+ if (reg_type_may_be_null(reg->type) && reg->id == id) {
/* Old offset (both fixed and variable parts) should
* have been known-zero, because we don't allow pointer
* arithmetic on pointers that might be NULL.
@@ -3622,40 +3924,49 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
}
if (is_null) {
reg->type = SCALAR_VALUE;
- } else if (reg->map_ptr->inner_map_meta) {
- reg->type = CONST_PTR_TO_MAP;
- reg->map_ptr = reg->map_ptr->inner_map_meta;
- } else {
- reg->type = PTR_TO_MAP_VALUE;
+ } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
+ if (reg->map_ptr->inner_map_meta) {
+ reg->type = CONST_PTR_TO_MAP;
+ reg->map_ptr = reg->map_ptr->inner_map_meta;
+ } else {
+ reg->type = PTR_TO_MAP_VALUE;
+ }
+ } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
+ reg->type = PTR_TO_SOCKET;
+ }
+ if (is_null || !reg_is_refcounted(reg)) {
+ /* We don't need id from this point onwards anymore,
+ * thus we should better reset it, so that state
+ * pruning has chances to take effect.
+ */
+ reg->id = 0;
}
- /* We don't need id from this point onwards anymore, thus we
- * should better reset it, so that state pruning has chances
- * to take effect.
- */
- reg->id = 0;
}
}
/* The logic is similar to find_good_pkt_pointers(), both could eventually
* be folded together at some point.
*/
-static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
- bool is_null)
+static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
+ bool is_null)
{
struct bpf_func_state *state = vstate->frame[vstate->curframe];
- struct bpf_reg_state *regs = state->regs;
+ struct bpf_reg_state *reg, *regs = state->regs;
u32 id = regs[regno].id;
int i, j;
+ if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
+ __release_reference_state(state, id);
+
for (i = 0; i < MAX_BPF_REG; i++)
- mark_map_reg(regs, i, id, is_null);
+ mark_ptr_or_null_reg(state, &regs[i], id, is_null);
for (j = 0; j <= vstate->curframe; j++) {
state = vstate->frame[j];
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_SPILL)
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
continue;
- mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
+ mark_ptr_or_null_reg(state, reg, id, is_null);
}
}
}
@@ -3857,12 +4168,14 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
if (BPF_SRC(insn->code) == BPF_K &&
insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
- dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
- /* Mark all identical map registers in each branch as either
+ reg_type_may_be_null(dst_reg->type)) {
+ /* Mark all identical registers in each branch as either
* safe or unknown depending R == 0 or R != 0 conditional.
*/
- mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
- mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
+ mark_ptr_or_null_regs(this_branch, insn->dst_reg,
+ opcode == BPF_JNE);
+ mark_ptr_or_null_regs(other_branch, insn->dst_reg,
+ opcode == BPF_JEQ);
} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
this_branch, other_branch) &&
is_pointer_value(env, insn->dst_reg)) {
@@ -3985,6 +4298,16 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
+ /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
+ * gen_ld_abs() may terminate the program at runtime, leading to
+ * reference leak.
+ */
+ err = check_reference_leak(env);
+ if (err) {
+ verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
+ return err;
+ }
+
if (regs[BPF_REG_6].type != PTR_TO_CTX) {
verbose(env,
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -4400,6 +4723,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
case CONST_PTR_TO_MAP:
case PTR_TO_PACKET_END:
case PTR_TO_FLOW_KEYS:
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCKET_OR_NULL:
/* Only valid matches are exact, which memcmp() above
* would have accepted
*/
@@ -4475,6 +4800,14 @@ static bool stacksafe(struct bpf_func_state *old,
return true;
}
+static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
+{
+ if (old->acquired_refs != cur->acquired_refs)
+ return false;
+ return !memcmp(old->refs, cur->refs,
+ sizeof(*old->refs) * old->acquired_refs);
+}
+
/* compare two verifier states
*
* all states stored in state_list are known to be valid, since
@@ -4520,6 +4853,9 @@ static bool func_states_equal(struct bpf_func_state *old,
if (!stacksafe(old, cur, idmap))
goto out_free;
+
+ if (!refsafe(old, cur))
+ goto out_free;
ret = true;
out_free:
kfree(idmap);
@@ -4677,6 +5013,37 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
return 0;
}
+/* Return true if it's OK to have the same insn return a different type. */
+static bool reg_type_mismatch_ok(enum bpf_reg_type type)
+{
+ switch (type) {
+ case PTR_TO_CTX:
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCKET_OR_NULL:
+ return false;
+ default:
+ return true;
+ }
+}
+
+/* If an instruction was previously used with particular pointer types, then we
+ * need to be careful to avoid cases such as the below, where it may be ok
+ * for one branch accessing the pointer, but not ok for the other branch:
+ *
+ * R1 = sock_ptr
+ * goto X;
+ * ...
+ * R1 = some_other_valid_ptr;
+ * goto X;
+ * ...
+ * R2 = *(u32 *)(R1 + 0);
+ */
+static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
+{
+ return src != prev && (!reg_type_mismatch_ok(src) ||
+ !reg_type_mismatch_ok(prev));
+}
+
static int do_check(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *state;
@@ -4770,6 +5137,7 @@ static int do_check(struct bpf_verifier_env *env)
regs = cur_regs(env);
env->insn_aux_data[insn_idx].seen = true;
+
if (class == BPF_ALU || class == BPF_ALU64) {
err = check_alu_op(env, insn);
if (err)
@@ -4809,9 +5177,7 @@ static int do_check(struct bpf_verifier_env *env)
*/
*prev_src_type = src_reg_type;
- } else if (src_reg_type != *prev_src_type &&
- (src_reg_type == PTR_TO_CTX ||
- *prev_src_type == PTR_TO_CTX)) {
+ } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
/* ABuser program is trying to use the same insn
* dst_reg = *(u32*) (src_reg + off)
* with different pointer types:
@@ -4856,9 +5222,7 @@ static int do_check(struct bpf_verifier_env *env)
if (*prev_dst_type == NOT_INIT) {
*prev_dst_type = dst_reg_type;
- } else if (dst_reg_type != *prev_dst_type &&
- (dst_reg_type == PTR_TO_CTX ||
- *prev_dst_type == PTR_TO_CTX)) {
+ } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
verbose(env, "same insn cannot be used with different pointers\n");
return -EINVAL;
}
@@ -4875,8 +5239,8 @@ static int do_check(struct bpf_verifier_env *env)
return err;
if (is_ctx_reg(env, insn->dst_reg)) {
- verbose(env, "BPF_ST stores into R%d context is not allowed\n",
- insn->dst_reg);
+ verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
+ insn->dst_reg, reg_type_str[insn->dst_reg]);
return -EACCES;
}
@@ -4938,6 +5302,10 @@ static int do_check(struct bpf_verifier_env *env)
continue;
}
+ err = check_reference_leak(env);
+ if (err)
+ return err;
+
/* eBPF calling convetion is such that R0 is used
* to return the value from eBPF program.
* Make sure that it's readable at this time
@@ -5284,8 +5652,10 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
}
}
-/* convert load instructions that access fields of 'struct __sk_buff'
- * into sequence of instructions that access fields of 'struct sk_buff'
+/* convert load instructions that access fields of a context type into a
+ * sequence of instructions that access fields of the underlying structure:
+ * struct __sk_buff -> struct sk_buff
+ * struct bpf_sock_ops -> struct sock
*/
static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
@@ -5314,12 +5684,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
}
- if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
+ if (bpf_prog_is_dev_bound(env->prog->aux))
return 0;
insn = env->prog->insnsi + delta;
for (i = 0; i < insn_cnt; i++, insn++) {
+ bpf_convert_ctx_access_t convert_ctx_access;
+
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
@@ -5361,8 +5733,18 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
continue;
}
- if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
+ switch (env->insn_aux_data[i + delta].ptr_type) {
+ case PTR_TO_CTX:
+ if (!ops->convert_ctx_access)
+ continue;
+ convert_ctx_access = ops->convert_ctx_access;
+ break;
+ case PTR_TO_SOCKET:
+ convert_ctx_access = bpf_sock_convert_ctx_access;
+ break;
+ default:
continue;
+ }
ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
size = BPF_LDST_BYTES(insn);
@@ -5394,8 +5776,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
target_size = 0;
- cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
- &target_size);
+ cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
+ &target_size);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
(ctx_field_size && !target_size)) {
verbose(env, "bpf verifier is misconfigured\n");
diff --git a/net/core/filter.c b/net/core/filter.c
index 72db8afb7cb6..591c698bc517 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -58,13 +58,17 @@
#include <net/busy_poll.h>
#include <net/tcp.h>
#include <net/xfrm.h>
+#include <net/udp.h>
#include <linux/bpf_trace.h>
#include <net/xdp_sock.h>
#include <linux/inetdevice.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
#include <net/ip_fib.h>
#include <net/flow.h>
#include <net/arp.h>
#include <net/ipv6.h>
+#include <net/net_namespace.h>
#include <linux/seg6_local.h>
#include <net/seg6.h>
#include <net/seg6_local.h>
@@ -4813,6 +4817,141 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
};
#endif /* CONFIG_IPV6_SEG6_BPF */
+struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
+ struct sk_buff *skb, u8 family, u8 proto)
+{
+ int dif = skb->dev->ifindex;
+ bool refcounted = false;
+ struct sock *sk = NULL;
+
+ if (family == AF_INET) {
+ __be32 src4 = tuple->ipv4.saddr;
+ __be32 dst4 = tuple->ipv4.daddr;
+ int sdif = inet_sdif(skb);
+
+ if (proto == IPPROTO_TCP)
+ sk = __inet_lookup(net, &tcp_hashinfo, skb, 0,
+ src4, tuple->ipv4.sport,
+ dst4, tuple->ipv4.dport,
+ dif, sdif, &refcounted);
+ else
+ sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
+ dst4, tuple->ipv4.dport,
+ dif, sdif, &udp_table, skb);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
+ struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
+ int sdif = inet6_sdif(skb);
+
+ if (proto == IPPROTO_TCP)
+ sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
+ src6, tuple->ipv6.sport,
+ dst6, tuple->ipv6.dport,
+ dif, sdif, &refcounted);
+ else
+ sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
+ dst6, tuple->ipv6.dport,
+ dif, sdif, &udp_table, skb);
+#endif
+ }
+
+ if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ sk = NULL;
+ }
+ return sk;
+}
+
+/* bpf_sk_lookup performs the core lookup for different types of sockets,
+ * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
+ * Returns the socket as an 'unsigned long' to simplify the casting in the
+ * callers to satisfy BPF_CALL declarations.
+ */
+static unsigned long
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ u8 proto, u64 netns_id, u64 flags)
+{
+ struct net *caller_net;
+ struct sock *sk = NULL;
+ u8 family = AF_UNSPEC;
+ struct net *net;
+
+ family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
+ if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
+ goto out;
+
+ if (skb->dev)
+ caller_net = dev_net(skb->dev);
+ else
+ caller_net = sock_net(skb->sk);
+ if (netns_id) {
+ net = get_net_ns_by_id(caller_net, netns_id);
+ if (unlikely(!net))
+ goto out;
+ sk = sk_lookup(net, tuple, skb, family, proto);
+ put_net(net);
+ } else {
+ net = caller_net;
+ sk = sk_lookup(net, tuple, skb, family, proto);
+ }
+
+ if (sk)
+ sk = sk_to_full_sk(sk);
+out:
+ return (unsigned long) sk;
+}
+
+BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
+ .func = bpf_sk_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
+ .func = bpf_sk_lookup_udp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_sk_release, struct sock *, sk)
+{
+ if (!sock_flag(sk, SOCK_RCU_FREE))
+ sock_gen_put(sk);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_release_proto = {
+ .func = bpf_sk_release,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_SOCKET,
+};
+
bool bpf_helper_changes_pkt_data(void *func)
{
if (func == bpf_skb_vlan_push ||
@@ -5019,6 +5158,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_ancestor_cgroup_id:
return &bpf_skb_ancestor_cgroup_id_proto;
#endif
+ case BPF_FUNC_sk_lookup_tcp:
+ return &bpf_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_lookup_udp:
+ return &bpf_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5119,6 +5264,12 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_redirect_hash_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
+ case BPF_FUNC_sk_lookup_tcp:
+ return &bpf_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_lookup_udp:
+ return &bpf_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5394,23 +5545,29 @@ static bool __sock_filter_check_size(int off, int size,
return size == size_default;
}
-static bool sock_filter_is_valid_access(int off, int size,
- enum bpf_access_type type,
- const struct bpf_prog *prog,
- struct bpf_insn_access_aux *info)
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
if (off % size != 0)
return false;
- if (!__sock_filter_check_attach_type(off, type,
- prog->expected_attach_type))
- return false;
if (!__sock_filter_check_size(off, size, info))
return false;
return true;
}
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (!bpf_sock_is_valid_access(off, size, type, info))
+ return false;
+ return __sock_filter_check_attach_type(off, type,
+ prog->expected_attach_type);
+}
+
static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog, int drop_verdict)
{
@@ -6122,10 +6279,10 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
-static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
- const struct bpf_insn *si,
- struct bpf_insn *insn_buf,
- struct bpf_prog *prog, u32 *target_size)
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
int off;
@@ -7037,7 +7194,7 @@ const struct bpf_prog_ops lwt_seg6local_prog_ops = {
const struct bpf_verifier_ops cg_sock_verifier_ops = {
.get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access,
- .convert_ctx_access = sock_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_sock_convert_ctx_access,
};
const struct bpf_prog_ops cg_sock_prog_ops = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e2070d819e04..f9187b41dff6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2144,6 +2144,77 @@ union bpf_attr {
* request in the skb.
* Return
* 0 on success, or a negative error in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-NULL, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is zero, then the socket lookup table in the
+ * netns associated with the *ctx* will be used. For the TC hooks,
+ * this in the netns of the device in the skb. For socket hooks,
+ * this in the netns of the socket. If *netns* is non-zero, then
+ * it specifies the ID of the netns relative to the netns
+ * associated with the *ctx*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * Description
+ * Look for UDP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-NULL, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is zero, then the socket lookup table in the
+ * netns associated with the *ctx* will be used. For the TC hooks,
+ * this in the netns of the device in the skb. For socket hooks,
+ * this in the netns of the socket. If *netns* is non-zero, then
+ * it specifies the ID of the netns relative to the netns
+ * associated with the *ctx*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *
+ * int bpf_sk_release(struct bpf_sock *sk)
+ * Description
+ * Release the reference held by *sock*. *sock* must be a non-NULL
+ * pointer that was returned from bpf_sk_lookup_xxx\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2229,7 +2300,10 @@ union bpf_attr {
FN(get_current_cgroup_id), \
FN(get_local_storage), \
FN(sk_select_reuseport), \
- FN(skb_ancestor_cgroup_id),
+ FN(skb_ancestor_cgroup_id), \
+ FN(sk_lookup_tcp), \
+ FN(sk_lookup_udp), \
+ FN(sk_release),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2399,6 +2473,23 @@ struct bpf_sock {
*/
};
+struct bpf_sock_tuple {
+ union {
+ struct {
+ __be32 saddr;
+ __be32 daddr;
+ __be16 sport;
+ __be16 dport;
+ } ipv4;
+ struct {
+ __be32 saddr[4];
+ __be32 daddr[4];
+ __be16 sport;
+ __be16 dport;
+ } ipv6;
+ };
+};
+
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 425d5ca45c97..9e68fd9fcfca 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -228,7 +228,7 @@ struct bpf_object {
};
#define obj_elf_valid(o) ((o)->efile.elf)
-static void bpf_program__unload(struct bpf_program *prog)
+void bpf_program__unload(struct bpf_program *prog)
{
int i;
@@ -1375,7 +1375,7 @@ out:
return ret;
}
-static int
+int
bpf_program__load(struct bpf_program *prog,
char *license, u32 kern_version)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 511c1294dcbf..2ed24d3f80b3 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -128,10 +128,13 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex);
const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
+int bpf_program__load(struct bpf_program *prog, char *license,
+ u32 kern_version);
int bpf_program__fd(struct bpf_program *prog);
int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
int instance);
int bpf_program__pin(struct bpf_program *prog, const char *path);
+void bpf_program__unload(struct bpf_program *prog);
struct bpf_insn;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f802de526f57..1381ab81099c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -36,7 +36,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
- test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o
+ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_sk_lookup_kern.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index e4be7730222d..1d407b3494f9 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -143,6 +143,18 @@ static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
(void *) BPF_FUNC_skb_cgroup_id;
static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
(void *) BPF_FUNC_skb_ancestor_cgroup_id;
+static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
+ struct bpf_sock_tuple *tuple,
+ int size, unsigned int netns_id,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sk_lookup_tcp;
+static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
+ struct bpf_sock_tuple *tuple,
+ int size, unsigned int netns_id,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sk_lookup_udp;
+static int (*bpf_sk_release)(struct bpf_sock *sk) =
+ (void *) BPF_FUNC_sk_release;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 63a671803ed6..e8becca9c521 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1698,6 +1698,43 @@ static void test_task_fd_query_tp(void)
"sys_enter_read");
}
+static void test_reference_tracking()
+{
+ const char *file = "./test_sk_lookup_kern.o";
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ __u32 duration;
+ int err = 0;
+
+ obj = bpf_object__open(file);
+ if (IS_ERR(obj)) {
+ error_cnt++;
+ return;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ const char *title;
+
+ /* Ignore .text sections */
+ title = bpf_program__title(prog, false);
+ if (strstr(title, ".text") != NULL)
+ continue;
+
+ bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS);
+
+ /* Expect verifier failure if test name has 'fail' */
+ if (strstr(title, "fail") != NULL) {
+ libbpf_set_print(NULL, NULL, NULL);
+ err = !bpf_program__load(prog, "GPL", 0);
+ libbpf_set_print(printf, printf, NULL);
+ } else {
+ err = bpf_program__load(prog, "GPL", 0);
+ }
+ CHECK(err, title, "\n");
+ }
+ bpf_object__close(obj);
+}
+
int main(void)
{
jit_enabled = is_jit_enabled();
@@ -1719,6 +1756,7 @@ int main(void)
test_get_stack_raw_tp();
test_task_fd_query_rawtp();
test_task_fd_query_tp();
+ test_reference_tracking();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
new file mode 100644
index 000000000000..b745bdc08c2b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
+static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
+ void *data_end, __u16 eth_proto,
+ bool *ipv4)
+{
+ struct bpf_sock_tuple *result;
+ __u8 proto = 0;
+ __u64 ihl_len;
+
+ if (eth_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(data + nh_off);
+
+ if (iph + 1 > data_end)
+ return NULL;
+ ihl_len = iph->ihl * 4;
+ proto = iph->protocol;
+ *ipv4 = true;
+ result = (struct bpf_sock_tuple *)&iph->saddr;
+ } else if (eth_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off);
+
+ if (ip6h + 1 > data_end)
+ return NULL;
+ ihl_len = sizeof(*ip6h);
+ proto = ip6h->nexthdr;
+ *ipv4 = true;
+ result = (struct bpf_sock_tuple *)&ip6h->saddr;
+ }
+
+ if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP)
+ return NULL;
+
+ return result;
+}
+
+SEC("sk_lookup_success")
+int bpf_sk_lookup_test0(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = (struct ethhdr *)(data);
+ struct bpf_sock_tuple *tuple;
+ struct bpf_sock *sk;
+ size_t tuple_len;
+ bool ipv4;
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4);
+ if (!tuple || tuple + sizeof *tuple > data_end)
+ return TC_ACT_SHOT;
+
+ tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+ sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0);
+ if (sk)
+ bpf_sk_release(sk);
+ return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
+}
+
+SEC("sk_lookup_success_simple")
+int bpf_sk_lookup_test1(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ if (sk)
+ bpf_sk_release(sk);
+ return 0;
+}
+
+SEC("fail_use_after_free")
+int bpf_sk_lookup_uaf(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+ __u32 family = 0;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ if (sk) {
+ bpf_sk_release(sk);
+ family = sk->family;
+ }
+ return family;
+}
+
+SEC("fail_modify_sk_pointer")
+int bpf_sk_lookup_modptr(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+ __u32 family;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ if (sk) {
+ sk += 1;
+ bpf_sk_release(sk);
+ }
+ return 0;
+}
+
+SEC("fail_modify_sk_or_null_pointer")
+int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+ __u32 family;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ sk += 1;
+ if (sk)
+ bpf_sk_release(sk);
+ return 0;
+}
+
+SEC("fail_no_release")
+int bpf_sk_lookup_test2(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+
+ bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ return 0;
+}
+
+SEC("fail_release_twice")
+int bpf_sk_lookup_test3(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ bpf_sk_release(sk);
+ bpf_sk_release(sk);
+ return 0;
+}
+
+SEC("fail_release_unchecked")
+int bpf_sk_lookup_test4(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ bpf_sk_release(sk);
+ return 0;
+}
+
+void lookup_no_release(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+}
+
+SEC("fail_no_release_subcall")
+int bpf_sk_lookup_test5(struct __sk_buff *skb)
+{
+ lookup_no_release(skb);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c7d25f23baf9..bc9cd8537467 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2017 Facebook
+ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -178,6 +179,24 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
self->retval = (uint32_t)res;
}
+/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
+#define BPF_SK_LOOKUP \
+ /* struct bpf_sock_tuple tuple = {} */ \
+ BPF_MOV64_IMM(BPF_REG_2, 0), \
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -16), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -24), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48), \
+ /* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */ \
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), \
+ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), \
+ BPF_MOV64_IMM(BPF_REG_4, 0), \
+ BPF_MOV64_IMM(BPF_REG_5, 0), \
+ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp)
+
static struct bpf_test tests[] = {
{
"add+sub+mul",
@@ -2708,6 +2727,137 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "unpriv: spill/fill of different pointers stx - ctx and sock",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ /* u64 foo; */
+ /* void *target = &foo; */
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ /* if (skb == NULL) *target = sock; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ /* else *target = skb; */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ /* struct __sk_buff *skb = *target; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ /* skb->mark = 42; */
+ BPF_MOV64_IMM(BPF_REG_3, 42),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct __sk_buff, mark)),
+ /* if (sk) bpf_sk_release(sk) */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "type=ctx expected=sock",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers stx - leak sock",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ /* u64 foo; */
+ /* void *target = &foo; */
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ /* if (skb == NULL) *target = sock; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ /* else *target = skb; */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ /* struct __sk_buff *skb = *target; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ /* skb->mark = 42; */
+ BPF_MOV64_IMM(BPF_REG_3, 42),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ //.errstr = "same insn cannot be used with different pointers",
+ .errstr = "Unreleased reference",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers stx - sock and ctx (read)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ /* u64 foo; */
+ /* void *target = &foo; */
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ /* if (skb) *target = skb */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ /* else *target = sock */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ /* struct bpf_sock *sk = *target; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct bpf_sock, mark)),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "same insn cannot be used with different pointers",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers stx - sock and ctx (write)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ /* u64 foo; */
+ /* void *target = &foo; */
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ /* if (skb) *target = skb */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ /* else *target = sock */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ /* struct bpf_sock *sk = *target; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ /* if (sk) sk->mark = 42; bpf_sk_release(sk); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 42),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct bpf_sock, mark)),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ //.errstr = "same insn cannot be used with different pointers",
+ .errstr = "cannot write into socket",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
"unpriv: spill/fill of different pointers ldx",
.insns = {
BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
@@ -3276,7 +3426,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
BPF_EXIT_INSN(),
},
- .errstr = "BPF_ST stores into R1 context is not allowed",
+ .errstr = "BPF_ST stores into R1 inv is not allowed",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -3288,7 +3438,7 @@ static struct bpf_test tests[] = {
BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
BPF_EXIT_INSN(),
},
- .errstr = "BPF_XADD stores into R1 context is not allowed",
+ .errstr = "BPF_XADD stores into R1 inv is not allowed",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -3638,7 +3788,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .errstr = "R3 pointer arithmetic on pkt_end",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -4896,7 +5046,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.fixup_map1 = { 4 },
- .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .errstr = "R4 pointer arithmetic on map_value_or_null",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS
},
@@ -4917,7 +5067,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.fixup_map1 = { 4 },
- .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .errstr = "R4 pointer arithmetic on map_value_or_null",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS
},
@@ -4938,7 +5088,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.fixup_map1 = { 4 },
- .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .errstr = "R4 pointer arithmetic on map_value_or_null",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS
},
@@ -5266,7 +5416,7 @@ static struct bpf_test tests[] = {
.errstr_unpriv = "R2 leaks addr into mem",
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "BPF_XADD stores into R1 context is not allowed",
+ .errstr = "BPF_XADD stores into R1 inv is not allowed",
},
{
"leak pointer into ctx 2",
@@ -5281,7 +5431,7 @@ static struct bpf_test tests[] = {
.errstr_unpriv = "R10 leaks addr into mem",
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "BPF_XADD stores into R1 context is not allowed",
+ .errstr = "BPF_XADD stores into R1 inv is not allowed",
},
{
"leak pointer into ctx 3",
@@ -7253,7 +7403,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.fixup_map_in_map = { 3 },
- .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+ .errstr = "R1 pointer arithmetic on map_ptr prohibited",
.result = REJECT,
},
{
@@ -8927,7 +9077,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .errstr = "R3 pointer arithmetic on pkt_end",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_XDP,
},
@@ -8946,7 +9096,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .errstr = "R3 pointer arithmetic on pkt_end",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_XDP,
},
@@ -12230,7 +12380,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = REJECT,
- .errstr = "BPF_XADD stores into R2 packet",
+ .errstr = "BPF_XADD stores into R2 ctx",
.prog_type = BPF_PROG_TYPE_XDP,
},
{
@@ -12558,6 +12708,214 @@ static struct bpf_test tests[] = {
.result = ACCEPT,
},
{
+ "reference tracking: leak potential reference",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: leak potential reference on stack",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: leak potential reference on stack 2",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: zero potential reference",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: copy and zero potential references",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: release reference without check",
+ .insns = {
+ BPF_SK_LOOKUP,
+ /* reference in r0 may be NULL */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "type=sock_or_null expected=sock",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: release reference",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: release reference 2",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: release reference twice",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "type=inv expected=sock",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: release reference twice inside branch",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "type=inv expected=sock",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: alloc, check, free in one subbranch",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
+ /* if (offsetof(skb, mark) > data_len) exit; */
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
+ offsetof(struct __sk_buff, mark)),
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */
+ /* Leak reference in R0 */
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: alloc, check, free in both subbranches",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
+ /* if (offsetof(skb, mark) > data_len) exit; */
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
+ offsetof(struct __sk_buff, mark)),
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking in call: free reference in subprog",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
"pass modified ctx pointer to helper, 1",
.insns = {
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
@@ -12627,6 +12985,407 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
},
+ {
+ "reference tracking in call: free reference in subprog and outside",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "type=inv expected=sock",
+ .result = REJECT,
+ },
+ {
+ "reference tracking in call: alloc & leak reference in subprog",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_4),
+ BPF_SK_LOOKUP,
+ /* spill unchecked sk_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking in call: alloc in subprog, release outside",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_SK_LOOKUP,
+ BPF_EXIT_INSN(), /* return sk */
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = POINTER_VALUE,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking in call: sk_ptr leak into caller stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ /* spill unchecked sk_ptr into stack of caller */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_SK_LOOKUP,
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking in call: sk_ptr spill into caller stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ /* spill unchecked sk_ptr into stack of caller */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* now the sk_ptr is verified, free the reference */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_SK_LOOKUP,
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: allow LD_ABS",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: forbid LD_ABS while holding reference",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: allow LD_IND",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_IMM(BPF_REG_7, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "reference tracking: forbid LD_IND while holding reference",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_7, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: check reference or tail call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ /* if (sk) bpf_sk_release() */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
+ /* bpf_tail_call() */
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 17 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: release reference then tail call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ /* if (sk) bpf_sk_release() */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ /* bpf_tail_call() */
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 18 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: leak possible reference over tail call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+ /* Look up socket and store in REG_6 */
+ BPF_SK_LOOKUP,
+ /* bpf_tail_call() */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* if (sk) bpf_sk_release() */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 16 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "tail_call would lead to reference leak",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: leak checked reference over tail call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+ /* Look up socket and store in REG_6 */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* if (!sk) goto end */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ /* bpf_tail_call() */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 17 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "tail_call would lead to reference leak",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: mangle and release sock_or_null",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R1 pointer arithmetic on sock_or_null prohibited",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: mangle and release sock",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R1 pointer arithmetic on sock prohibited",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: access member",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: write to member",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LD_IMM64(BPF_REG_2, 42),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2,
+ offsetof(struct bpf_sock, mark)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "cannot write into socket",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: invalid 64-bit access of member",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "invalid bpf_sock access off=0 size=8",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: access after release",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "!read_ok",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: direct access for lookup",
+ .insns = {
+ /* Check that the packet is at least 64B long */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+ /* sk = sk_lookup_tcp(ctx, skb->data, ...) */
+ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
};
static int probe_filter_length(const struct bpf_insn *fp)
@@ -12652,18 +13411,18 @@ static int create_map(uint32_t type, uint32_t size_key,
return fd;
}
-static int create_prog_dummy1(void)
+static int create_prog_dummy1(enum bpf_map_type prog_type)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_0, 42),
BPF_EXIT_INSN(),
};
- return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+ return bpf_load_program(prog_type, prog,
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
}
-static int create_prog_dummy2(int mfd, int idx)
+static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_3, idx),
@@ -12674,11 +13433,12 @@ static int create_prog_dummy2(int mfd, int idx)
BPF_EXIT_INSN(),
};
- return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+ return bpf_load_program(prog_type, prog,
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
}
-static int create_prog_array(uint32_t max_elem, int p1key)
+static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem,
+ int p1key)
{
int p2key = 1;
int mfd, p1fd, p2fd;
@@ -12690,8 +13450,8 @@ static int create_prog_array(uint32_t max_elem, int p1key)
return -1;
}
- p1fd = create_prog_dummy1();
- p2fd = create_prog_dummy2(mfd, p2key);
+ p1fd = create_prog_dummy1(prog_type);
+ p2fd = create_prog_dummy2(prog_type, mfd, p2key);
if (p1fd < 0 || p2fd < 0)
goto out;
if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
@@ -12748,8 +13508,8 @@ static int create_cgroup_storage(bool percpu)
static char bpf_vlog[UINT_MAX >> 8];
-static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
- int *map_fds)
+static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
+ struct bpf_insn *prog, int *map_fds)
{
int *fixup_map1 = test->fixup_map1;
int *fixup_map2 = test->fixup_map2;
@@ -12805,7 +13565,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
}
if (*fixup_prog1) {
- map_fds[4] = create_prog_array(4, 0);
+ map_fds[4] = create_prog_array(prog_type, 4, 0);
do {
prog[*fixup_prog1].imm = map_fds[4];
fixup_prog1++;
@@ -12813,7 +13573,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
}
if (*fixup_prog2) {
- map_fds[5] = create_prog_array(8, 7);
+ map_fds[5] = create_prog_array(prog_type, 8, 7);
do {
prog[*fixup_prog2].imm = map_fds[5];
fixup_prog2++;
@@ -12859,11 +13619,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
for (i = 0; i < MAX_NR_MAPS; i++)
map_fds[i] = -1;
- do_test_fixup(test, prog, map_fds);
+ if (!prog_type)
+ prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ do_test_fixup(test, prog_type, prog, map_fds);
prog_len = probe_filter_length(prog);
- fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
- prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+ fd_prog = bpf_verify_program(prog_type, prog, prog_len,
+ test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
"GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
expected_ret = unpriv && test->result_unpriv != UNDEF ?