From ca2e1a627035002cd33d9667431e80bad90c25fa Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 7 Jul 2022 15:08:42 +0200 Subject: xsk: Mark napi_id on sendmsg() When application runs in busy poll mode and does not receive a single packet but only sends them, it is currently impossible to get into napi_busy_loop() as napi_id is only marked on Rx side in xsk_rcv_check(). In there, napi_id is being taken from xdp_rxq_info carried by xdp_buff. From Tx perspective, we do not have access to it. What we have handy is the xsk pool. Xsk pool works on a pool of internal xdp_buff wrappers called xdp_buff_xsk. AF_XDP ZC enabled drivers call xp_set_rxq_info() so each of xdp_buff_xsk has a valid pointer to xdp_rxq_info of underlying queue. Therefore, on Tx side, napi_id can be pulled from xs->pool->heads[0].xdp.rxq->napi_id. Hide this pointer chase under helper function, xsk_pool_get_napi_id(). Do this only for sockets working in ZC mode as otherwise rxq pointers would not be initialized. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220707130842.49408-1-maciej.fijalkowski@intel.com --- include/net/xdp_sock_drv.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 4aa031849668..4277b0dcee05 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -44,6 +44,15 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, xp_set_rxq_info(pool, rxq); } +static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + return pool->heads[0].xdp.rxq->napi_id; +#else + return 0; +#endif +} + static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { @@ -198,6 +207,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, { } +static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) +{ + return 0; +} + static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { -- cgit v1.2.3 From d7e79c97c00ca82dace0e3b645d4b3b02fa273c2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 21 Jul 2022 15:42:39 +0200 Subject: net: netfilter: Add kfuncs to allocate and insert CT Introduce bpf_xdp_ct_alloc, bpf_skb_ct_alloc and bpf_ct_insert_entry kfuncs in order to insert a new entry from XDP and TC programs. Introduce bpf_nf_ct_tuple_parse utility routine to consolidate common code. We extract out a helper __nf_ct_set_timeout, used by the ctnetlink and nf_conntrack_bpf code, extract it out to nf_conntrack_core, so that nf_conntrack_bpf doesn't need a dependency on CONFIG_NF_CT_NETLINK. Later this helper will be reused as a helper to set timeout of allocated but not yet inserted CT entry. The allocation functions return struct nf_conn___init instead of nf_conn, to distinguish allocated CT from an already inserted or looked up CT. This is later used to enforce restrictions on what kfuncs allocated CT can be used with. Signed-off-by: Lorenzo Bianconi Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-8-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/net/netfilter/nf_conntrack_core.h | 15 +++ net/netfilter/nf_conntrack_bpf.c | 208 +++++++++++++++++++++++++++--- net/netfilter/nf_conntrack_netlink.c | 8 +- 3 files changed, 204 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 37866c8386e2..83a60c684e6c 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -84,4 +84,19 @@ void nf_conntrack_lock(spinlock_t *lock); extern spinlock_t nf_conntrack_expect_lock; +/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \ + IS_ENABLED(CONFIG_NF_CT_NETLINK)) + +static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) +{ + if (timeout > INT_MAX) + timeout = INT_MAX; + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); +} + +#endif + #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 16304869264f..cac4a9558968 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -55,6 +55,94 @@ enum { NF_BPF_CT_OPTS_SZ = 12, }; +static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, u8 protonum, u8 dir, + struct nf_conntrack_tuple *tuple) +{ + union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3; + union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3; + union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u + : &tuple->src.u; + union nf_conntrack_man_proto *dport = dir ? &tuple->src.u + : (void *)&tuple->dst.u; + + if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) + return -EPROTO; + + memset(tuple, 0, sizeof(*tuple)); + + switch (tuple_len) { + case sizeof(bpf_tuple->ipv4): + tuple->src.l3num = AF_INET; + src->ip = bpf_tuple->ipv4.saddr; + sport->tcp.port = bpf_tuple->ipv4.sport; + dst->ip = bpf_tuple->ipv4.daddr; + dport->tcp.port = bpf_tuple->ipv4.dport; + break; + case sizeof(bpf_tuple->ipv6): + tuple->src.l3num = AF_INET6; + memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); + sport->tcp.port = bpf_tuple->ipv6.sport; + memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); + dport->tcp.port = bpf_tuple->ipv6.dport; + break; + default: + return -EAFNOSUPPORT; + } + tuple->dst.protonum = protonum; + tuple->dst.dir = dir; + + return 0; +} + +static struct nf_conn * +__bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len, + u32 timeout) +{ + struct nf_conntrack_tuple otuple, rtuple; + struct nf_conn *ct; + int err; + + if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts_len != NF_BPF_CT_OPTS_SZ) + return ERR_PTR(-EINVAL); + + if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) + return ERR_PTR(-EINVAL); + + err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto, + IP_CT_DIR_ORIGINAL, &otuple); + if (err < 0) + return ERR_PTR(err); + + err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto, + IP_CT_DIR_REPLY, &rtuple); + if (err < 0) + return ERR_PTR(err); + + if (opts->netns_id >= 0) { + net = get_net_ns_by_id(net, opts->netns_id); + if (unlikely(!net)) + return ERR_PTR(-ENONET); + } + + ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple, + GFP_ATOMIC); + if (IS_ERR(ct)) + goto out; + + memset(&ct->proto, 0, sizeof(ct->proto)); + __nf_ct_set_timeout(ct, timeout * HZ); + ct->status |= IPS_CONFIRMED; + +out: + if (opts->netns_id >= 0) + put_net(net); + + return ct; +} + static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, struct bpf_sock_tuple *bpf_tuple, u32 tuple_len, struct bpf_ct_opts *opts, @@ -63,6 +151,7 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, struct nf_conntrack_tuple_hash *hash; struct nf_conntrack_tuple tuple; struct nf_conn *ct; + int err; if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || opts_len != NF_BPF_CT_OPTS_SZ) @@ -72,27 +161,10 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) return ERR_PTR(-EINVAL); - memset(&tuple, 0, sizeof(tuple)); - switch (tuple_len) { - case sizeof(bpf_tuple->ipv4): - tuple.src.l3num = AF_INET; - tuple.src.u3.ip = bpf_tuple->ipv4.saddr; - tuple.src.u.tcp.port = bpf_tuple->ipv4.sport; - tuple.dst.u3.ip = bpf_tuple->ipv4.daddr; - tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport; - break; - case sizeof(bpf_tuple->ipv6): - tuple.src.l3num = AF_INET6; - memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); - tuple.src.u.tcp.port = bpf_tuple->ipv6.sport; - memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); - tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport; - break; - default: - return ERR_PTR(-EAFNOSUPPORT); - } - - tuple.dst.protonum = opts->l4proto; + err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto, + IP_CT_DIR_ORIGINAL, &tuple); + if (err < 0) + return ERR_PTR(err); if (opts->netns_id >= 0) { net = get_net_ns_by_id(net, opts->netns_id); @@ -116,6 +188,43 @@ __diag_push(); __diag_ignore_all("-Wmissing-prototypes", "Global functions as their definitions will be in nf_conntrack BTF"); +struct nf_conn___init { + struct nf_conn ct; +}; + +/* bpf_xdp_ct_alloc - Allocate a new CT entry + * + * Parameters: + * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for allocation (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn___init * +bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; + struct nf_conn *nfct; + + nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz, + opts, opts__sz, 10); + if (IS_ERR(nfct)) { + if (opts) + opts->error = PTR_ERR(nfct); + return NULL; + } + + return (struct nf_conn___init *)nfct; +} + /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a * reference to it * @@ -150,6 +259,40 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, return nfct; } +/* bpf_skb_ct_alloc - Allocate a new CT entry + * + * Parameters: + * @skb_ctx - Pointer to ctx (__sk_buff) in TC program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for allocation (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn___init * +bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct nf_conn *nfct; + struct net *net; + + net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); + nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10); + if (IS_ERR(nfct)) { + if (opts) + opts->error = PTR_ERR(nfct); + return NULL; + } + + return (struct nf_conn___init *)nfct; +} + /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a * reference to it * @@ -184,6 +327,26 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, return nfct; } +/* bpf_ct_insert_entry - Add the provided entry into a CT map + * + * This must be invoked for referenced PTR_TO_BTF_ID. + * + * @nfct__ref - Pointer to referenced nf_conn___init object, obtained + * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + */ +struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct__ref) +{ + struct nf_conn *nfct = (struct nf_conn *)nfct__ref; + int err; + + err = nf_conntrack_hash_check_insert(nfct); + if (err < 0) { + nf_conntrack_free(nfct); + return NULL; + } + return nfct; +} + /* bpf_ct_release - Release acquired nf_conn object * * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects @@ -204,8 +367,11 @@ void bpf_ct_release(struct nf_conn *nfct) __diag_pop() BTF_SET8_START(nf_ct_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE) BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE) BTF_SET8_END(nf_ct_kfunc_set) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 722af5e309ba..0729b2f0d44f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2025,9 +2025,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, { u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - if (timeout > INT_MAX) - timeout = INT_MAX; - WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); + __nf_ct_set_timeout(ct, timeout); if (test_bit(IPS_DYING_BIT, &ct->status)) return -ETIME; @@ -2292,9 +2290,7 @@ ctnetlink_create_conntrack(struct net *net, goto err1; timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - if (timeout > INT_MAX) - timeout = INT_MAX; - ct->timeout = (u32)timeout + nfct_time_stamp; + __nf_ct_set_timeout(ct, timeout); rcu_read_lock(); if (cda[CTA_HELP]) { -- cgit v1.2.3 From 0b3892364431684e883682b85d008979e08d4ce6 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:40 +0200 Subject: net: netfilter: Add kfuncs to set and change CT timeout Introduce bpf_ct_set_timeout and bpf_ct_change_timeout kfunc helpers in order to change nf_conn timeout. This is same as ctnetlink_change_timeout, hence code is shared between both by extracting it out to __nf_ct_change_timeout. It is also updated to return an error when it sees IPS_FIXED_TIMEOUT_BIT bit in ct->status, as that check was missing. It is required to introduce two kfuncs taking nf_conn___init and nf_conn instead of sharing one because KF_TRUSTED_ARGS flag causes strict type checking. This would disallow passing nf_conn___init to kfunc taking nf_conn, and vice versa. We cannot remove the KF_TRUSTED_ARGS flag as we only want to accept refcounted pointers and not e.g. ct->master. Apart from this, bpf_ct_set_timeout is only called for newly allocated CT so it doesn't need to inspect the status field just yet. Sharing the helpers even if it was possible would make timeout setting helper sensitive to order of setting status and timeout after allocation. Hence, bpf_ct_set_* kfuncs are meant to be used on allocated CT, and bpf_ct_change_* kfuncs are meant to be used on inserted or looked up CT entry. Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-9-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/net/netfilter/nf_conntrack_core.h | 2 ++ net/netfilter/nf_conntrack_bpf.c | 38 ++++++++++++++++++++++++++++--- net/netfilter/nf_conntrack_core.c | 22 ++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 9 +------- 4 files changed, 60 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 83a60c684e6c..3b0f7d0eebae 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -97,6 +97,8 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); } +int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout); + #endif #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index cac4a9558968..b8912e15082f 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -331,12 +331,12 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, * * This must be invoked for referenced PTR_TO_BTF_ID. * - * @nfct__ref - Pointer to referenced nf_conn___init object, obtained + * @nfct - Pointer to referenced nf_conn___init object, obtained * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc. */ -struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct__ref) +struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) { - struct nf_conn *nfct = (struct nf_conn *)nfct__ref; + struct nf_conn *nfct = (struct nf_conn *)nfct_i; int err; err = nf_conntrack_hash_check_insert(nfct); @@ -364,6 +364,36 @@ void bpf_ct_release(struct nf_conn *nfct) nf_ct_put(nfct); } +/* bpf_ct_set_timeout - Set timeout of allocated nf_conn + * + * Sets the default timeout of newly allocated nf_conn before insertion. + * This helper must be invoked for refcounted pointer to nf_conn___init. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + * @timeout - Timeout in msecs. + */ +void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout) +{ + __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout)); +} + +/* bpf_ct_change_timeout - Change timeout of inserted nf_conn + * + * Change timeout associated of the inserted or looked up nf_conn. + * This helper must be invoked for refcounted pointer to nf_conn. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup. + * @timeout - New timeout in msecs. + */ +int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout) +{ + return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout)); +} + __diag_pop() BTF_SET8_START(nf_ct_kfunc_set) @@ -373,6 +403,8 @@ BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE) BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) BTF_SET8_END(nf_ct_kfunc_set) static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 082a2fd8d85b..572f59a5e936 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2786,3 +2786,25 @@ err_expect: free_percpu(net->ct.stat); return ret; } + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \ + IS_ENABLED(CONFIG_NF_CT_NETLINK)) + +/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ + +int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout) +{ + if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) + return -EPERM; + + __nf_ct_set_timeout(ct, timeout); + + if (test_bit(IPS_DYING_BIT, &ct->status)) + return -ETIME; + + return 0; +} +EXPORT_SYMBOL_GPL(__nf_ct_change_timeout); + +#endif diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0729b2f0d44f..b1de07c73845 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2023,14 +2023,7 @@ static int ctnetlink_change_helper(struct nf_conn *ct, static int ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) { - u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - - __nf_ct_set_timeout(ct, timeout); - - if (test_bit(IPS_DYING_BIT, &ct->status)) - return -ETIME; - - return 0; + return __nf_ct_change_timeout(ct, (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ); } #if defined(CONFIG_NF_CONNTRACK_MARK) -- cgit v1.2.3 From ef69aa3a986ef94f01ce8b5b619f550db54432fe Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 21 Jul 2022 15:42:41 +0200 Subject: net: netfilter: Add kfuncs to set and change CT status Introduce bpf_ct_set_status and bpf_ct_change_status kfunc helpers in order to set nf_conn field of allocated entry or update nf_conn status field of existing inserted entry. Use nf_ct_change_status_common to share the permitted status field changes between netlink and BPF side by refactoring ctnetlink_change_status. It is required to introduce two kfuncs taking nf_conn___init and nf_conn instead of sharing one because KF_TRUSTED_ARGS flag causes strict type checking. This would disallow passing nf_conn___init to kfunc taking nf_conn, and vice versa. We cannot remove the KF_TRUSTED_ARGS flag as we only want to accept refcounted pointers and not e.g. ct->master. Hence, bpf_ct_set_* kfuncs are meant to be used on allocated CT, and bpf_ct_change_* kfuncs are meant to be used on inserted or looked up CT entry. Signed-off-by: Lorenzo Bianconi Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-10-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/net/netfilter/nf_conntrack_core.h | 2 ++ net/netfilter/nf_conntrack_bpf.c | 32 +++++++++++++++++++++++++ net/netfilter/nf_conntrack_core.c | 40 +++++++++++++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 39 ++---------------------------- 4 files changed, 76 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3b0f7d0eebae..3cd3a6e631aa 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -98,6 +98,8 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) } int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout); +void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off); +int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status); #endif diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index b8912e15082f..1cd87b28c9b0 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -394,6 +394,36 @@ int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout) return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout)); } +/* bpf_ct_set_status - Set status field of allocated nf_conn + * + * Set the status field of the newly allocated nf_conn before insertion. + * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + * @status - New status value. + */ +int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status) +{ + return nf_ct_change_status_common((struct nf_conn *)nfct, status); +} + +/* bpf_ct_change_status - Change status of inserted nf_conn + * + * Change the status field of the provided connection tracking entry. + * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup. + * @status - New status value. + */ +int bpf_ct_change_status(struct nf_conn *nfct, u32 status) +{ + return nf_ct_change_status_common(nfct, status); +} + __diag_pop() BTF_SET8_START(nf_ct_kfunc_set) @@ -405,6 +435,8 @@ BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE) BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) BTF_SET8_END(nf_ct_kfunc_set) static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 572f59a5e936..66a0aa8dbc3b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2807,4 +2807,44 @@ int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout) } EXPORT_SYMBOL_GPL(__nf_ct_change_timeout); +void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off) +{ + unsigned int bit; + + /* Ignore these unchangable bits */ + on &= ~IPS_UNCHANGEABLE_MASK; + off &= ~IPS_UNCHANGEABLE_MASK; + + for (bit = 0; bit < __IPS_MAX_BIT; bit++) { + if (on & (1 << bit)) + set_bit(bit, &ct->status); + else if (off & (1 << bit)) + clear_bit(bit, &ct->status); + } +} +EXPORT_SYMBOL_GPL(__nf_ct_change_status); + +int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status) +{ + unsigned long d; + + d = ct->status ^ status; + + if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) + /* unchangeable */ + return -EBUSY; + + if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) + /* SEEN_REPLY bit can only be set */ + return -EBUSY; + + if (d & IPS_ASSURED && !(status & IPS_ASSURED)) + /* ASSURED bit can only be set */ + return -EBUSY; + + __nf_ct_change_status(ct, status, 0); + return 0; +} +EXPORT_SYMBOL_GPL(nf_ct_change_status_common); + #endif diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b1de07c73845..e02832ef9b9f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1890,45 +1890,10 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, } #endif -static void -__ctnetlink_change_status(struct nf_conn *ct, unsigned long on, - unsigned long off) -{ - unsigned int bit; - - /* Ignore these unchangable bits */ - on &= ~IPS_UNCHANGEABLE_MASK; - off &= ~IPS_UNCHANGEABLE_MASK; - - for (bit = 0; bit < __IPS_MAX_BIT; bit++) { - if (on & (1 << bit)) - set_bit(bit, &ct->status); - else if (off & (1 << bit)) - clear_bit(bit, &ct->status); - } -} - static int ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) { - unsigned long d; - unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS])); - d = ct->status ^ status; - - if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) - /* unchangeable */ - return -EBUSY; - - if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) - /* SEEN_REPLY bit can only be set */ - return -EBUSY; - - if (d & IPS_ASSURED && !(status & IPS_ASSURED)) - /* ASSURED bit can only be set */ - return -EBUSY; - - __ctnetlink_change_status(ct, status, 0); - return 0; + return nf_ct_change_status_common(ct, ntohl(nla_get_be32(cda[CTA_STATUS]))); } static int @@ -2825,7 +2790,7 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[]) * unchangeable bits but do not error out. Also user programs * are allowed to clear the bits that they are allowed to change. */ - __ctnetlink_change_status(ct, status, ~status); + __nf_ct_change_status(ct, status, ~status); return 0; } -- cgit v1.2.3