diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/bpf/test_run.c | 150 | ||||
-rw-r--r-- | net/core/filter.c | 1 | ||||
-rw-r--r-- | net/core/net_namespace.c | 1 | ||||
-rw-r--r-- | net/ipv4/bpf_tcp_ca.c | 22 | ||||
-rw-r--r-- | net/ipv4/tcp_bbr.c | 18 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 17 | ||||
-rw-r--r-- | net/ipv4/tcp_dctcp.c | 18 | ||||
-rw-r--r-- | net/netfilter/Makefile | 5 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_bpf.c | 257 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 8 |
10 files changed, 454 insertions, 43 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 46dd95755967..3a5bf8ad834d 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -5,6 +5,7 @@ #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/slab.h> +#include <linux/init.h> #include <linux/vmalloc.h> #include <linux/etherdevice.h> #include <linux/filter.h> @@ -171,6 +172,8 @@ int noinline bpf_fentry_test1(int a) { return a + 1; } +EXPORT_SYMBOL_GPL(bpf_fentry_test1); +ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO); int noinline bpf_fentry_test2(int a, u64 b) { @@ -232,22 +235,137 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk) return sk; } +struct prog_test_ref_kfunc { + int a; + int b; + struct prog_test_ref_kfunc *next; +}; + +static struct prog_test_ref_kfunc prog_test_struct = { + .a = 42, + .b = 108, + .next = &prog_test_struct, +}; + +noinline struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) +{ + /* randomly return NULL */ + if (get_jiffies_64() % 2) + return NULL; + return &prog_test_struct; +} + +noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) +{ +} + +struct prog_test_pass1 { + int x0; + struct { + int x1; + struct { + int x2; + struct { + int x3; + }; + }; + }; +}; + +struct prog_test_pass2 { + int len; + short arr1[4]; + struct { + char arr2[4]; + unsigned long arr3[8]; + } x; +}; + +struct prog_test_fail1 { + void *p; + int x; +}; + +struct prog_test_fail2 { + int x8; + struct prog_test_pass1 x; +}; + +struct prog_test_fail3 { + int len; + char arr1[2]; + char arr2[0]; +}; + +noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) +{ +} + +noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) +{ +} + +noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) +{ +} + +noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) +{ +} + +noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) +{ +} + +noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) +{ +} + +noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) +{ +} + +noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) +{ +} + +noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) +{ +} + __diag_pop(); ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); -BTF_SET_START(test_sk_kfunc_ids) +BTF_SET_START(test_sk_check_kfunc_ids) BTF_ID(func, bpf_kfunc_call_test1) BTF_ID(func, bpf_kfunc_call_test2) BTF_ID(func, bpf_kfunc_call_test3) -BTF_SET_END(test_sk_kfunc_ids) - -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner) -{ - if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id)) - return true; - return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner); -} +BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_ID(func, bpf_kfunc_call_test_release) +BTF_ID(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID(func, bpf_kfunc_call_test_pass1) +BTF_ID(func, bpf_kfunc_call_test_pass2) +BTF_ID(func, bpf_kfunc_call_test_fail1) +BTF_ID(func, bpf_kfunc_call_test_fail2) +BTF_ID(func, bpf_kfunc_call_test_fail3) +BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_SET_END(test_sk_check_kfunc_ids) + +BTF_SET_START(test_sk_acquire_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_SET_END(test_sk_acquire_kfunc_ids) + +BTF_SET_START(test_sk_release_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_release) +BTF_SET_END(test_sk_release_kfunc_ids) + +BTF_SET_START(test_sk_ret_null_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_SET_END(test_sk_ret_null_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 size, u32 headroom, u32 tailroom) @@ -1067,3 +1185,17 @@ out: kfree(ctx); return err; } + +static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &test_sk_check_kfunc_ids, + .acquire_set = &test_sk_acquire_kfunc_ids, + .release_set = &test_sk_release_kfunc_ids, + .ret_null_set = &test_sk_ret_null_kfunc_ids, +}; + +static int __init bpf_prog_test_run_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); +} +late_initcall(bpf_prog_test_run_init); diff --git a/net/core/filter.c b/net/core/filter.c index 4603b7cd3cd1..f73a84c75970 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10062,7 +10062,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, .gen_ld_abs = bpf_gen_ld_abs, - .check_kfunc_call = bpf_prog_test_check_kfunc_call, }; const struct bpf_prog_ops tc_cls_act_prog_ops = { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 9b7171c40434..3b471781327f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -299,6 +299,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id) return peer; } +EXPORT_SYMBOL_GPL(get_net_ns_by_id); /* * setup_net runs the initializers for the network namespace object. diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index de610cb83694..b60c9fd7147e 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ +#include <linux/init.h> #include <linux/types.h> #include <linux/bpf_verifier.h> #include <linux/bpf.h> @@ -212,26 +213,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET_START(bpf_tcp_ca_kfunc_ids) +BTF_SET_START(bpf_tcp_ca_check_kfunc_ids) BTF_ID(func, tcp_reno_ssthresh) BTF_ID(func, tcp_reno_cong_avoid) BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_cong_avoid_ai) -BTF_SET_END(bpf_tcp_ca_kfunc_ids) +BTF_SET_END(bpf_tcp_ca_check_kfunc_ids) -static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner) -{ - if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id)) - return true; - return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner); -} +static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &bpf_tcp_ca_check_kfunc_ids, +}; static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { .get_func_proto = bpf_tcp_ca_get_func_proto, .is_valid_access = bpf_tcp_ca_is_valid_access, .btf_struct_access = bpf_tcp_ca_btf_struct_access, - .check_kfunc_call = bpf_tcp_ca_check_kfunc_call, }; static int bpf_tcp_ca_init_member(const struct btf_type *t, @@ -300,3 +298,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .init = bpf_tcp_ca_init, .name = "tcp_congestion_ops", }; + +static int __init bpf_tcp_ca_kfunc_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set); +} +late_initcall(bpf_tcp_ca_kfunc_init); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index ec5550089b4d..02e8626ccb27 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET_START(tcp_bbr_kfunc_ids) +BTF_SET_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID(func, bbr_init) @@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs) BTF_ID(func, bbr_set_state) #endif #endif -BTF_SET_END(tcp_bbr_kfunc_ids) +BTF_SET_END(tcp_bbr_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set); +static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &tcp_bbr_check_kfunc_ids, +}; static int __init bbr_register(void) { int ret; BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); - ret = tcp_register_congestion_control(&tcp_bbr_cong_ops); - if (ret) + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&tcp_bbr_cong_ops); } static void __exit bbr_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); tcp_unregister_congestion_control(&tcp_bbr_cong_ops); } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index e07837e23b3f..24d562dd6225 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET_START(tcp_cubic_kfunc_ids) +BTF_SET_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID(func, cubictcp_init) @@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event) BTF_ID(func, cubictcp_acked) #endif #endif -BTF_SET_END(tcp_cubic_kfunc_ids) +BTF_SET_END(tcp_cubic_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set); +static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &tcp_cubic_check_kfunc_ids, +}; static int __init cubictcp_register(void) { @@ -534,16 +537,14 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); - ret = tcp_register_congestion_control(&cubictcp); - if (ret) + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&cubictcp); } static void __exit cubictcp_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); tcp_unregister_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 0d7ab3cc7b61..1943a6630341 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET_START(tcp_dctcp_kfunc_ids) +BTF_SET_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID(func, dctcp_init) @@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo) BTF_ID(func, dctcp_state) #endif #endif -BTF_SET_END(tcp_dctcp_kfunc_ids) +BTF_SET_END(tcp_dctcp_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set); +static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &tcp_dctcp_check_kfunc_ids, +}; static int __init dctcp_register(void) { int ret; BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE); - ret = tcp_register_congestion_control(&dctcp); - if (ret) + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&dctcp); } static void __exit dctcp_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); tcp_unregister_congestion_control(&dctcp); } diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a135b1a46014..238b6a620e88 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o +ifeq ($(CONFIG_NF_CONNTRACK),m) +nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o +else ifeq ($(CONFIG_NF_CONNTRACK),y) +nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o +endif obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c new file mode 100644 index 000000000000..8ad3f52579f3 --- /dev/null +++ b/net/netfilter/nf_conntrack_bpf.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Conntrack Helpers for XDP and TC-BPF hook + * + * These are called from the XDP and SCHED_CLS BPF programs. Note that it is + * allowed to break compatibility for these functions since the interface they + * are exposed through to BPF programs is explicitly unstable. + */ + +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/types.h> +#include <linux/btf_ids.h> +#include <linux/net_namespace.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> + +/* bpf_ct_opts - Options for CT lookup helpers + * + * Members: + * @netns_id - Specify the network namespace for lookup + * Values: + * BPF_F_CURRENT_NETNS (-1) + * Use namespace associated with ctx (xdp_md, __sk_buff) + * [0, S32_MAX] + * Network Namespace ID + * @error - Out parameter, set for any errors encountered + * Values: + * -EINVAL - Passed NULL for bpf_tuple pointer + * -EINVAL - opts->reserved is not 0 + * -EINVAL - netns_id is less than -1 + * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12) + * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP + * -ENONET - No network namespace found for netns_id + * -ENOENT - Conntrack lookup could not find entry for tuple + * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4) + * or sizeof(tuple->ipv6) + * @l4proto - Layer 4 protocol + * Values: + * IPPROTO_TCP, IPPROTO_UDP + * @reserved - Reserved member, will be reused for more options in future + * Values: + * 0 + */ +struct bpf_ct_opts { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +}; + +enum { + NF_BPF_CT_OPTS_SZ = 12, +}; + +static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, + struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, u8 protonum, + s32 netns_id) +{ + struct nf_conntrack_tuple_hash *hash; + struct nf_conntrack_tuple tuple; + + if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) + return ERR_PTR(-EPROTO); + if (unlikely(netns_id < BPF_F_CURRENT_NETNS)) + return ERR_PTR(-EINVAL); + + memset(&tuple, 0, sizeof(tuple)); + switch (tuple_len) { + case sizeof(bpf_tuple->ipv4): + tuple.src.l3num = AF_INET; + tuple.src.u3.ip = bpf_tuple->ipv4.saddr; + tuple.src.u.tcp.port = bpf_tuple->ipv4.sport; + tuple.dst.u3.ip = bpf_tuple->ipv4.daddr; + tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport; + break; + case sizeof(bpf_tuple->ipv6): + tuple.src.l3num = AF_INET6; + memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); + tuple.src.u.tcp.port = bpf_tuple->ipv6.sport; + memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); + tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport; + break; + default: + return ERR_PTR(-EAFNOSUPPORT); + } + + tuple.dst.protonum = protonum; + + if (netns_id >= 0) { + net = get_net_ns_by_id(net, netns_id); + if (unlikely(!net)) + return ERR_PTR(-ENONET); + } + + hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple); + if (netns_id >= 0) + put_net(net); + if (!hash) + return ERR_PTR(-ENOENT); + return nf_ct_tuplehash_to_ctrack(hash); +} + +__diag_push(); +__diag_ignore(GCC, 8, "-Wmissing-prototypes", + "Global functions as their definitions will be in nf_conntrack BTF"); + +/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a + * reference to it + * + * Parameters: + * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; + struct net *caller_net; + struct nf_conn *nfct; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + + if (!opts) + return NULL; + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + caller_net = dev_net(ctx->rxq->dev); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, + opts->netns_id); + if (IS_ERR(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + return nfct; +} + +/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a + * reference to it + * + * Parameters: + * @skb_ctx - Pointer to ctx (__sk_buff) in TC program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct net *caller_net; + struct nf_conn *nfct; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + + if (!opts) + return NULL; + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, + opts->netns_id); + if (IS_ERR(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + return nfct; +} + +/* bpf_ct_release - Release acquired nf_conn object + * + * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects + * the program if any references remain in the program in all of the explored + * states. + * + * Parameters: + * @nf_conn - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_lookup or bpf_skb_ct_lookup. + */ +void bpf_ct_release(struct nf_conn *nfct) +{ + if (!nfct) + return; + nf_ct_put(nfct); +} + +__diag_pop() + +BTF_SET_START(nf_ct_xdp_check_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_lookup) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_xdp_check_kfunc_ids) + +BTF_SET_START(nf_ct_tc_check_kfunc_ids) +BTF_ID(func, bpf_skb_ct_lookup) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_tc_check_kfunc_ids) + +BTF_SET_START(nf_ct_acquire_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_lookup) +BTF_ID(func, bpf_skb_ct_lookup) +BTF_SET_END(nf_ct_acquire_kfunc_ids) + +BTF_SET_START(nf_ct_release_kfunc_ids) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_release_kfunc_ids) + +/* Both sets are identical */ +#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids + +static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &nf_ct_xdp_check_kfunc_ids, + .acquire_set = &nf_ct_acquire_kfunc_ids, + .release_set = &nf_ct_release_kfunc_ids, + .ret_null_set = &nf_ct_ret_null_kfunc_ids, +}; + +static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &nf_ct_tc_check_kfunc_ids, + .acquire_set = &nf_ct_acquire_kfunc_ids, + .release_set = &nf_ct_release_kfunc_ids, + .ret_null_set = &nf_ct_ret_null_kfunc_ids, +}; + +int register_nf_conntrack_bpf(void) +{ + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); +} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 894a325d39f2..3b60fca61920 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -34,6 +34,7 @@ #include <linux/rculist_nulls.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> @@ -2748,8 +2749,15 @@ int nf_conntrack_init_start(void) conntrack_gc_work_init(&conntrack_gc_work); queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); + ret = register_nf_conntrack_bpf(); + if (ret < 0) + goto err_kfunc; + return 0; +err_kfunc: + cancel_delayed_work_sync(&conntrack_gc_work.dwork); + nf_conntrack_proto_fini(); err_proto: nf_conntrack_seqadj_fini(); err_seqadj: |