diff options
Diffstat (limited to 'net')
32 files changed, 1152 insertions, 433 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2199a5db25e6..690d27d3f2f9 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -58,6 +58,12 @@ config NFT_REJECT_IPV4 default NFT_REJECT tristate +config NFT_DUP_IPV4 + tristate "IPv4 nf_tables packet duplication support" + select NF_DUP_IPV4 + help + This module enables IPv4 packet duplication support for nf_tables. + endif # NF_TABLES_IPV4 config NF_TABLES_ARP @@ -67,6 +73,12 @@ config NF_TABLES_ARP endif # NF_TABLES +config NF_DUP_IPV4 + tristate "Netfilter IPv4 packet duplication to alternate destination" + help + This option enables the nf_dup_ipv4 core, which duplicates an IPv4 + packet to be rerouted to another destination. + config NF_LOG_ARP tristate "ARP packet logging" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 7fe6c703528f..87b073da14c9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o +obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables @@ -70,3 +71,5 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o # just filtering instance of ARP tables for now obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o + +obj-$(CONFIG_NF_DUP_IPV4) += nf_dup_ipv4.o diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 30ad9554b5e9..8a2caaf3940b 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -280,7 +280,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); + h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 80d5554b9a88..cdde3ec496e9 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -134,9 +134,11 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + const struct nf_conntrack_zone *zone; + struct nf_conntrack_zone tmp; NF_CT_ASSERT(skb->nfct == NULL); + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuplepr(skb, diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index b69e82bda215..9306ec4fab41 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -43,19 +43,22 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, struct sk_buff *skb) { - u16 zone = NF_CT_DEFAULT_ZONE; - + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) - zone = nf_ct_zone((struct nf_conn *)skb->nfct); + if (skb->nfct) { + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + } #endif if (nf_bridge_in_prerouting(skb)) - return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; + return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id; if (hooknum == NF_INET_PRE_ROUTING) - return IP_DEFRAG_CONNTRACK_IN + zone; + return IP_DEFRAG_CONNTRACK_IN + zone_id; else - return IP_DEFRAG_CONNTRACK_OUT + zone; + return IP_DEFRAG_CONNTRACK_OUT + zone_id; } static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c new file mode 100644 index 000000000000..b5bb37564b0e --- /dev/null +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -0,0 +1,120 @@ +/* + * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag> + * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de> + * + * Extracted from xt_TEE.c + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later, as + * published by the Free Software Foundation. + */ +#include <linux/ip.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/route.h> +#include <linux/skbuff.h> +#include <net/checksum.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/route.h> +#include <net/netfilter/ipv4/nf_dup_ipv4.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static struct net *pick_net(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS + const struct dst_entry *dst; + + if (skb->dev != NULL) + return dev_net(skb->dev); + dst = skb_dst(skb); + if (dst != NULL && dst->dev != NULL) + return dev_net(dst->dev); +#endif + return &init_net; +} + +static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw, + int oif) +{ + const struct iphdr *iph = ip_hdr(skb); + struct net *net = pick_net(skb); + struct rtable *rt; + struct flowi4 fl4; + + memset(&fl4, 0, sizeof(fl4)); + if (oif != -1) + fl4.flowi4_oif = oif; + + fl4.daddr = gw->s_addr; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return false; + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + skb->dev = rt->dst.dev; + skb->protocol = htons(ETH_P_IP); + + return true; +} + +void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct in_addr *gw, int oif) +{ + struct iphdr *iph; + + if (this_cpu_read(nf_skb_duplicated)) + return; + /* + * Copy the skb, and route the copy. Will later return %XT_CONTINUE for + * the original skb, which should continue on its way as if nothing has + * happened. The copy should be independently delivered to the gateway. + */ + skb = pskb_copy(skb, GFP_ATOMIC); + if (skb == NULL) + return; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + /* Avoid counting cloned packets towards the original connection. */ + nf_conntrack_put(skb->nfct); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); +#endif + /* + * If we are in PREROUTING/INPUT, the checksum must be recalculated + * since the length could have changed as a result of defragmentation. + * + * We also decrease the TTL to mitigate potential loops between two + * hosts. + * + * Set %IP_DF so that the original source is notified of a potentially + * decreased MTU on the clone route. IPv6 does this too. + */ + iph = ip_hdr(skb); + iph->frag_off |= htons(IP_DF); + if (hooknum == NF_INET_PRE_ROUTING || + hooknum == NF_INET_LOCAL_IN) + --iph->ttl; + ip_send_check(iph); + + if (nf_dup_ipv4_route(skb, gw, oif)) { + __this_cpu_write(nf_skb_duplicated, true); + ip_local_out(skb); + __this_cpu_write(nf_skb_duplicated, false); + } else { + kfree_skb(skb); + } +} +EXPORT_SYMBOL_GPL(nf_dup_ipv4); + +MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); +MODULE_DESCRIPTION("nf_dup_ipv4: Duplicate IPv4 packet"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c new file mode 100644 index 000000000000..25419fbddcb6 --- /dev/null +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/ipv4/nf_dup_ipv4.h> + +struct nft_dup_ipv4 { + enum nft_registers sreg_addr:8; + enum nft_registers sreg_dev:8; +}; + +static void nft_dup_ipv4_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_dup_ipv4 *priv = nft_expr_priv(expr); + struct in_addr gw = { + .s_addr = regs->data[priv->sreg_addr], + }; + int oif = regs->data[priv->sreg_dev]; + + nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif); +} + +static int nft_dup_ipv4_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dup_ipv4 *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_DUP_SREG_ADDR] == NULL) + return -EINVAL; + + priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); + err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr)); + if (err < 0) + return err; + + if (tb[NFTA_DUP_SREG_DEV] != NULL) { + priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); + return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + } + return 0; +} + +static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_dup_ipv4 *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dup_ipv4_type; +static const struct nft_expr_ops nft_dup_ipv4_ops = { + .type = &nft_dup_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv4)), + .eval = nft_dup_ipv4_eval, + .init = nft_dup_ipv4_init, + .dump = nft_dup_ipv4_dump, +}; + +static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = { + [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 }, + [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 }, +}; + +static struct nft_expr_type nft_dup_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "dup", + .ops = &nft_dup_ipv4_ops, + .policy = nft_dup_ipv4_policy, + .maxattr = NFTA_DUP_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_dup_ipv4_module_init(void) +{ + return nft_register_expr(&nft_dup_ipv4_type); +} + +static void __exit nft_dup_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_dup_ipv4_type); +} + +module_init(nft_dup_ipv4_module_init); +module_exit(nft_dup_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index b552cf0d6198..96833e4b3193 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -47,9 +47,21 @@ config NFT_REJECT_IPV6 default NFT_REJECT tristate +config NFT_DUP_IPV6 + tristate "IPv6 nf_tables packet duplication support" + select NF_DUP_IPV6 + help + This module enables IPv6 packet duplication support for nf_tables. + endif # NF_TABLES_IPV6 endif # NF_TABLES +config NF_DUP_IPV6 + tristate "Netfilter IPv6 packet duplication to alternate destination" + help + This option enables the nf_dup_ipv6 core, which duplicates an IPv6 + packet to be rerouted to another destination. + config NF_REJECT_IPV6 tristate "IPv6 packet rejection" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index c36e0a5490de..b4f7d0b4e2af 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -30,6 +30,8 @@ obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o # reject obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o +obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o + # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o @@ -37,6 +39,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o +obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 4ba0c34c627b..7302900c321a 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -251,7 +251,7 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) if (*len < 0 || (unsigned int) *len < sizeof(sin6)) return -EINVAL; - h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); + h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); if (!h) { pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 90388d606483..0e6fae103d33 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -150,7 +150,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_l4proto *inproto; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + struct nf_conntrack_zone tmp; NF_CT_ASSERT(skb->nfct == NULL); @@ -177,7 +177,8 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(net, zone, &intuple); + h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), + &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 267fb8d5876e..6d9c0b3d5b8c 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -33,20 +33,22 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { - u16 zone = NF_CT_DEFAULT_ZONE; - + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) - zone = nf_ct_zone((struct nf_conn *)skb->nfct); + if (skb->nfct) { + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + } #endif if (nf_bridge_in_prerouting(skb)) - return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; + return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id; if (hooknum == NF_INET_PRE_ROUTING) - return IP6_DEFRAG_CONNTRACK_IN + zone; + return IP6_DEFRAG_CONNTRACK_IN + zone_id; else - return IP6_DEFRAG_CONNTRACK_OUT + zone; - + return IP6_DEFRAG_CONNTRACK_OUT + zone_id; } static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c new file mode 100644 index 000000000000..d8ab654080b4 --- /dev/null +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -0,0 +1,96 @@ +/* + * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag> + * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de> + * + * Extracted from xt_TEE.c + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later, as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/skbuff.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/netfilter/ipv6/nf_dup_ipv6.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static struct net *pick_net(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS + const struct dst_entry *dst; + + if (skb->dev != NULL) + return dev_net(skb->dev); + dst = skb_dst(skb); + if (dst != NULL && dst->dev != NULL) + return dev_net(dst->dev); +#endif + return &init_net; +} + +static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw, + int oif) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = pick_net(skb); + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + if (oif != -1) + fl6.flowi6_oif = oif; + + fl6.daddr = *gw; + fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | + (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + dst_release(dst); + return false; + } + skb_dst_drop(skb); + skb_dst_set(skb, dst); + skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); + + return true; +} + +void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, + const struct in6_addr *gw, int oif) +{ + if (this_cpu_read(nf_skb_duplicated)) + return; + skb = pskb_copy(skb, GFP_ATOMIC); + if (skb == NULL) + return; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + nf_conntrack_put(skb->nfct); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); +#endif + if (hooknum == NF_INET_PRE_ROUTING || + hooknum == NF_INET_LOCAL_IN) { + struct ipv6hdr *iph = ipv6_hdr(skb); + --iph->hop_limit; + } + if (nf_dup_ipv6_route(skb, gw, oif)) { + __this_cpu_write(nf_skb_duplicated, true); + ip6_local_out(skb); + __this_cpu_write(nf_skb_duplicated, false); + } else { + kfree_skb(skb); + } +} +EXPORT_SYMBOL_GPL(nf_dup_ipv6); + +MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); +MODULE_DESCRIPTION("nf_dup_ipv6: IPv6 packet duplication"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c new file mode 100644 index 000000000000..0eaa4f65fdea --- /dev/null +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/ipv6/nf_dup_ipv6.h> + +struct nft_dup_ipv6 { + enum nft_registers sreg_addr:8; + enum nft_registers sreg_dev:8; +}; + +static void nft_dup_ipv6_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_dup_ipv6 *priv = nft_expr_priv(expr); + struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; + int oif = regs->data[priv->sreg_dev]; + + nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif); +} + +static int nft_dup_ipv6_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dup_ipv6 *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_DUP_SREG_ADDR] == NULL) + return -EINVAL; + + priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); + err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr)); + if (err < 0) + return err; + + if (tb[NFTA_DUP_SREG_DEV] != NULL) { + priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); + return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + } + return 0; +} + +static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_dup_ipv6 *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dup_ipv6_type; +static const struct nft_expr_ops nft_dup_ipv6_ops = { + .type = &nft_dup_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv6)), + .eval = nft_dup_ipv6_eval, + .init = nft_dup_ipv6_init, + .dump = nft_dup_ipv6_dump, +}; + +static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = { + [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 }, + [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 }, +}; + +static struct nft_expr_type nft_dup_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "dup", + .ops = &nft_dup_ipv6_ops, + .policy = nft_dup_ipv6_policy, + .maxattr = NFTA_DUP_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_dup_ipv6_module_init(void) +{ + return nft_register_expr(&nft_dup_ipv6_type); +} + +static void __exit nft_dup_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_dup_ipv6_type); +} + +module_init(nft_dup_ipv6_module_init); +module_exit(nft_dup_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6eae69a698ed..3e1b4abf1897 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -867,6 +867,8 @@ config NETFILTER_XT_TARGET_TEE depends on NETFILTER_ADVANCED depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK + select NF_DUP_IPV4 + select NF_DUP_IPV6 if IP6_NF_IPTABLES ---help--- This option adds a "TEE" target with which a packet can be cloned and this clone be rerouted to another nexthop. diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 5882bbfd198c..136184572fc9 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -274,7 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) " for conn " FMT_CONN "\n", __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); - h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, + h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt, &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3c20d02aee73..ac3be9b0629b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -126,7 +126,7 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); unsigned int nf_conntrack_hash_rnd __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd); -static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) +static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple) { unsigned int n; @@ -135,7 +135,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) * three bytes manually. */ n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); - return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^ + return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^ (((__force __u16)tuple->dst.u.all << 16) | tuple->dst.protonum)); } @@ -151,15 +151,15 @@ static u32 hash_bucket(u32 hash, const struct net *net) } static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, - u16 zone, unsigned int size) + unsigned int size) { - return __hash_bucket(hash_conntrack_raw(tuple, zone), size); + return __hash_bucket(hash_conntrack_raw(tuple), size); } -static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, +static inline u_int32_t hash_conntrack(const struct net *net, const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, zone, net->ct.htable_size); + return __hash_conntrack(tuple, net->ct.htable_size); } bool @@ -288,7 +288,9 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) } /* Released via destroy_conntrack() */ -struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, + const struct nf_conntrack_zone *zone, + gfp_t flags) { struct nf_conn *tmpl; @@ -299,24 +301,15 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); -#ifdef CONFIG_NF_CONNTRACK_ZONES - if (zone) { - struct nf_conntrack_zone *nf_ct_zone; + if (nf_ct_zone_add(tmpl, flags, zone) < 0) + goto out_free; - nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, flags); - if (!nf_ct_zone) - goto out_free; - nf_ct_zone->id = zone; - } -#endif atomic_set(&tmpl->ct_general.use, 0); return tmpl; -#ifdef CONFIG_NF_CONNTRACK_ZONES out_free: kfree(tmpl); return NULL; -#endif } EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); @@ -373,7 +366,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; - u16 zone = nf_ct_zone(ct); unsigned int sequence; nf_ct_helper_destroy(ct); @@ -381,9 +373,9 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) local_bh_disable(); do { sequence = read_seqcount_begin(&net->ct.generation); - hash = hash_conntrack(net, zone, + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - reply_hash = hash_conntrack(net, zone, + reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); @@ -431,8 +423,8 @@ static void death_by_timeout(unsigned long ul_conntrack) static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, - const struct nf_conntrack_tuple *tuple, - u16 zone) + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -440,8 +432,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, * so we need to check that the conntrack is confirmed */ return nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone(ct) == zone && - nf_ct_is_confirmed(ct); + nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && + nf_ct_is_confirmed(ct); } /* @@ -450,7 +442,7 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, * and recheck nf_ct_tuple_equal(tuple, &h->tuple) */ static struct nf_conntrack_tuple_hash * -____nf_conntrack_find(struct net *net, u16 zone, +____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; @@ -486,7 +478,7 @@ begin: /* Find a connection corresponding to a tuple. */ static struct nf_conntrack_tuple_hash * -__nf_conntrack_find_get(struct net *net, u16 zone, +__nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; @@ -513,11 +505,11 @@ begin: } struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, u16 zone, +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { return __nf_conntrack_find_get(net, zone, tuple, - hash_conntrack_raw(tuple, zone)); + hash_conntrack_raw(tuple)); } EXPORT_SYMBOL_GPL(nf_conntrack_find_get); @@ -536,11 +528,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, int nf_conntrack_hash_check_insert(struct nf_conn *ct) { + const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - u16 zone; unsigned int sequence; zone = nf_ct_zone(ct); @@ -548,9 +540,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) local_bh_disable(); do { sequence = read_seqcount_begin(&net->ct.generation); - hash = hash_conntrack(net, zone, + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - reply_hash = hash_conntrack(net, zone, + reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); @@ -558,12 +550,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; add_timer(&ct->timeout); @@ -588,6 +582,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); int __nf_conntrack_confirm(struct sk_buff *skb) { + const struct nf_conntrack_zone *zone; unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; @@ -596,7 +591,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; - u16 zone; unsigned int sequence; ct = nf_ct_get(skb, &ctinfo); @@ -617,7 +611,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* reuse the hash saved before */ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; hash = hash_bucket(hash, net); - reply_hash = hash_conntrack(net, zone, + reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); @@ -649,12 +643,14 @@ __nf_conntrack_confirm(struct sk_buff *skb) hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; /* Timer relative to confirmation time, not original @@ -707,11 +703,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { struct net *net = nf_ct_net(ignored_conntrack); + const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nf_conn *ct; - u16 zone = nf_ct_zone(ignored_conntrack); - unsigned int hash = hash_conntrack(net, zone, tuple); + unsigned int hash; + + zone = nf_ct_zone(ignored_conntrack); + hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. @@ -721,7 +720,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, ct = nf_ct_tuplehash_to_ctrack(h); if (ct != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone(ct) == zone) { + nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) { NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; @@ -810,7 +809,8 @@ void init_nf_conntrack_hash_rnd(void) } static struct nf_conn * -__nf_conntrack_alloc(struct net *net, u16 zone, +__nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp, u32 hash) @@ -820,7 +820,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone, if (unlikely(!nf_conntrack_hash_rnd)) { init_nf_conntrack_hash_rnd(); /* recompute the hash as nf_conntrack_hash_rnd is initialized */ - hash = hash_conntrack_raw(orig, zone); + hash = hash_conntrack_raw(orig); } /* We don't want any race condition at early drop stage */ @@ -840,10 +840,9 @@ __nf_conntrack_alloc(struct net *net, u16 zone, * SLAB_DESTROY_BY_RCU. */ ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); - if (ct == NULL) { - atomic_dec(&net->ct.count); - return ERR_PTR(-ENOMEM); - } + if (ct == NULL) + goto out; + spin_lock_init(&ct->lock); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; @@ -857,31 +856,24 @@ __nf_conntrack_alloc(struct net *net, u16 zone, memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - offsetof(struct nf_conn, __nfct_init_offset[0])); -#ifdef CONFIG_NF_CONNTRACK_ZONES - if (zone) { - struct nf_conntrack_zone *nf_ct_zone; - nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC); - if (!nf_ct_zone) - goto out_free; - nf_ct_zone->id = zone; - } -#endif + if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0) + goto out_free; + /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ atomic_set(&ct->ct_general.use, 0); return ct; - -#ifdef CONFIG_NF_CONNTRACK_ZONES out_free: - atomic_dec(&net->ct.count); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); +out: + atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); -#endif } -struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) @@ -923,8 +915,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp = NULL; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + const struct nf_conntrack_zone *zone; struct nf_conn_timeout *timeout_ext; + struct nf_conntrack_zone tmp; unsigned int *timeouts; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { @@ -932,6 +925,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, return NULL; } + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash); if (IS_ERR(ct)) @@ -1026,10 +1020,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, int *set_reply, enum ip_conntrack_info *ctinfo) { + const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_zone tmp; struct nf_conn *ct; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; u32 hash; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), @@ -1040,7 +1035,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } /* look for tuple match */ - hash = hash_conntrack_raw(&tuple, zone); + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); + hash = hash_conntrack_raw(&tuple); h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, @@ -1290,6 +1286,13 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +/* Built-in default zone used e.g. by modules. */ +const struct nf_conntrack_zone nf_ct_zone_dflt = { + .id = NF_CT_DEFAULT_ZONE_ID, + .dir = NF_CT_DEFAULT_ZONE_DIR, +}; +EXPORT_SYMBOL_GPL(nf_ct_zone_dflt); + #ifdef CONFIG_NF_CONNTRACK_ZONES static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { .len = sizeof(struct nf_conntrack_zone), @@ -1596,8 +1599,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) struct nf_conntrack_tuple_hash, hnnode); ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), - hashsize); + bucket = __hash_conntrack(&h->tuple, hashsize); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index b45a4223cb05..acf5c7b3f378 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -88,7 +88,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, u16 zone, +__nf_ct_expect_find(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; @@ -100,7 +101,7 @@ __nf_ct_expect_find(struct net *net, u16 zone, h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone(i->master) == zone) + nf_ct_zone_equal_any(i->master, zone)) return i; } return NULL; @@ -109,7 +110,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, u16 zone, +nf_ct_expect_find_get(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; @@ -127,7 +129,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, u16 zone, +nf_ct_find_expectation(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; @@ -140,7 +143,7 @@ nf_ct_find_expectation(struct net *net, u16 zone, hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone(i->master) == zone) { + nf_ct_zone_equal_any(i->master, zone)) { exp = i; break; } @@ -220,16 +223,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, } return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && - nf_ct_zone(a->master) == nf_ct_zone(b->master); + nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { return a->master == b->master && a->class == b->class && - nf_ct_tuple_equal(&a->tuple, &b->tuple) && - nf_ct_tuple_mask_equal(&a->mask, &b->mask) && - nf_ct_zone(a->master) == nf_ct_zone(b->master); + nf_ct_tuple_equal(&a->tuple, &b->tuple) && + nf_ct_tuple_mask_equal(&a->mask, &b->mask) && + nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } /* Generally a bad idea to call this: could have matched already. */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6b8b0abbfab4..94a66541e0b7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -128,6 +128,20 @@ ctnetlink_dump_tuples(struct sk_buff *skb, } static inline int +ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype, + const struct nf_conntrack_zone *zone, int dir) +{ + if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir) + return 0; + if (nla_put_be16(skb, attrtype, htons(zone->id))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status))) @@ -458,6 +472,7 @@ static int ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nf_conn *ct) { + const struct nf_conntrack_zone *zone; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; @@ -473,11 +488,16 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; + zone = nf_ct_zone(ct); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_ORIG) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); @@ -485,10 +505,13 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_REPL) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct) && - nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) + if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, + NF_CT_DEFAULT_ZONE_DIR) < 0) goto nla_put_failure; if (ctnetlink_dump_status(skb, ct) < 0 || @@ -598,7 +621,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif #ifdef CONFIG_NF_CONNTRACK_ZONES - + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */ #endif + ctnetlink_proto_size(ct) + ctnetlink_label_size(ct) @@ -609,6 +632,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) static int ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { + const struct nf_conntrack_zone *zone; struct net *net; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; @@ -655,11 +679,16 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) nfmsg->res_id = 0; rcu_read_lock(); + zone = nf_ct_zone(ct); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_ORIG) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); @@ -667,10 +696,13 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_REPL) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct) && - nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) + if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, + NF_CT_DEFAULT_ZONE_DIR) < 0) goto nla_put_failure; if (ctnetlink_dump_id(skb, ct) < 0) @@ -920,15 +952,54 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, return ret; } +static int +ctnetlink_parse_zone(const struct nlattr *attr, + struct nf_conntrack_zone *zone) +{ + nf_ct_zone_init(zone, NF_CT_DEFAULT_ZONE_ID, + NF_CT_DEFAULT_ZONE_DIR, 0); +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (attr) + zone->id = ntohs(nla_get_be16(attr)); +#else + if (attr) + return -EOPNOTSUPP; +#endif + return 0; +} + +static int +ctnetlink_parse_tuple_zone(struct nlattr *attr, enum ctattr_type type, + struct nf_conntrack_zone *zone) +{ + int ret; + + if (zone->id != NF_CT_DEFAULT_ZONE_ID) + return -EINVAL; + + ret = ctnetlink_parse_zone(attr, zone); + if (ret < 0) + return ret; + + if (type == CTA_TUPLE_REPLY) + zone->dir = NF_CT_ZONE_DIR_REPL; + else + zone->dir = NF_CT_ZONE_DIR_ORIG; + + return 0; +} + static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { [CTA_TUPLE_IP] = { .type = NLA_NESTED }, [CTA_TUPLE_PROTO] = { .type = NLA_NESTED }, + [CTA_TUPLE_ZONE] = { .type = NLA_U16 }, }; static int ctnetlink_parse_tuple(const struct nlattr * const cda[], struct nf_conntrack_tuple *tuple, - enum ctattr_type type, u_int8_t l3num) + enum ctattr_type type, u_int8_t l3num, + struct nf_conntrack_zone *zone) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; @@ -955,6 +1026,16 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], if (err < 0) return err; + if (tb[CTA_TUPLE_ZONE]) { + if (!zone) + return -EINVAL; + + err = ctnetlink_parse_tuple_zone(tb[CTA_TUPLE_ZONE], + type, zone); + if (err < 0) + return err; + } + /* orig and expect tuples get DIR_ORIGINAL */ if (type == CTA_TUPLE_REPLY) tuple->dst.dir = IP_CT_DIR_REPLY; @@ -964,21 +1045,6 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], return 0; } -static int -ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone) -{ - if (attr) -#ifdef CONFIG_NF_CONNTRACK_ZONES - *zone = ntohs(nla_get_be16(attr)); -#else - return -EOPNOTSUPP; -#endif - else - *zone = 0; - - return 0; -} - static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { [CTA_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, @@ -1058,7 +1124,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conn *ct; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); @@ -1066,9 +1132,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_TUPLE_ORIG]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, + u3, &zone); else if (cda[CTA_TUPLE_REPLY]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, + u3, &zone); else { return ctnetlink_flush_conntrack(net, cda, NETLINK_CB(skb).portid, @@ -1078,7 +1146,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(net, zone, &tuple); + h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; @@ -1112,7 +1180,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, struct sk_buff *skb2 = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -1138,16 +1206,18 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_TUPLE_ORIG]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, + u3, &zone); else if (cda[CTA_TUPLE_REPLY]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, + u3, &zone); else return -EINVAL; if (err < 0) return err; - h = nf_conntrack_find_get(net, zone, &tuple); + h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; @@ -1645,7 +1715,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct, } static struct nf_conn * -ctnetlink_create_conntrack(struct net *net, u16 zone, +ctnetlink_create_conntrack(struct net *net, + const struct nf_conntrack_zone *zone, const struct nlattr * const cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, @@ -1761,7 +1832,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, struct nf_conntrack_tuple_hash *master_h; struct nf_conn *master_ct; - err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, + u3, NULL); if (err < 0) goto err2; @@ -1804,7 +1876,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nf_conn *ct; u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); @@ -1812,21 +1884,23 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_TUPLE_ORIG]) { - err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3); + err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, + u3, &zone); if (err < 0) return err; } if (cda[CTA_TUPLE_REPLY]) { - err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3); + err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, + u3, &zone); if (err < 0) return err; } if (cda[CTA_TUPLE_ORIG]) - h = nf_conntrack_find_get(net, zone, &otuple); + h = nf_conntrack_find_get(net, &zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = nf_conntrack_find_get(net, zone, &rtuple); + h = nf_conntrack_find_get(net, &zone, &rtuple); if (h == NULL) { err = -ENOENT; @@ -1836,7 +1910,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) return -EINVAL; - ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, + ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple, &rtuple, u3); if (IS_ERR(ct)) return PTR_ERR(ct); @@ -2082,7 +2156,7 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif #ifdef CONFIG_NF_CONNTRACK_ZONES - + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */ #endif + ctnetlink_proto_size(ct) ; @@ -2091,14 +2165,20 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct) static int ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) { + const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; rcu_read_lock(); + zone = nf_ct_zone(ct); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_ORIG) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); @@ -2106,12 +2186,14 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_REPL) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct)) { - if (nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) - goto nla_put_failure; - } + if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, + NF_CT_DEFAULT_ZONE_DIR) < 0) + goto nla_put_failure; if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; @@ -2218,12 +2300,12 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda, int err; err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE, - nf_ct_l3num(ct)); + nf_ct_l3num(ct), NULL); if (err < 0) return err; return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK, - nf_ct_l3num(ct)); + nf_ct_l3num(ct), NULL); } static int @@ -2612,23 +2694,22 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; - u16 zone = 0; + struct nf_conntrack_zone zone; struct netlink_dump_control c = { .dump = ctnetlink_exp_ct_dump_table, .done = ctnetlink_exp_done, }; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, + u3, NULL); if (err < 0) return err; - if (cda[CTA_EXPECT_ZONE]) { - err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); - if (err < 0) - return err; - } + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; - h = nf_conntrack_find_get(net, zone, &tuple); + h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; @@ -2652,7 +2733,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, struct sk_buff *skb2; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -2672,16 +2753,18 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_EXPECT_TUPLE]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); else if (cda[CTA_EXPECT_MASTER]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, + u3, NULL); else return -EINVAL; if (err < 0) return err; - exp = nf_ct_expect_find_get(net, zone, &tuple); + exp = nf_ct_expect_find_get(net, &zone, &tuple); if (!exp) return -ENOENT; @@ -2732,8 +2815,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct hlist_node *next; u_int8_t u3 = nfmsg->nfgen_family; + struct nf_conntrack_zone zone; unsigned int i; - u16 zone; int err; if (cda[CTA_EXPECT_TUPLE]) { @@ -2742,12 +2825,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); if (err < 0) return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(net, zone, &tuple); + exp = nf_ct_expect_find_get(net, &zone, &tuple); if (!exp) return -ENOENT; @@ -2849,7 +2933,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, return -EINVAL; err = ctnetlink_parse_tuple((const struct nlattr * const *)tb, - &nat_tuple, CTA_EXPECT_NAT_TUPLE, u3); + &nat_tuple, CTA_EXPECT_NAT_TUPLE, + u3, NULL); if (err < 0) return err; @@ -2937,7 +3022,8 @@ err_out: } static int -ctnetlink_create_expect(struct net *net, u16 zone, +ctnetlink_create_expect(struct net *net, + const struct nf_conntrack_zone *zone, const struct nlattr * const cda[], u_int8_t u3, u32 portid, int report) { @@ -2949,13 +3035,16 @@ ctnetlink_create_expect(struct net *net, u16 zone, int err; /* caller guarantees that those three CTA_EXPECT_* exist */ - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3); + err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, + u3, NULL); if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, + u3, NULL); if (err < 0) return err; @@ -3011,7 +3100,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; if (!cda[CTA_EXPECT_TUPLE] @@ -3023,19 +3112,18 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); if (err < 0) return err; spin_lock_bh(&nf_conntrack_expect_lock); - exp = __nf_ct_expect_find(net, zone, &tuple); - + exp = __nf_ct_expect_find(net, &zone, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_expect_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - err = ctnetlink_create_expect(net, zone, cda, - u3, + err = ctnetlink_create_expect(net, &zone, cda, u3, NETLINK_CB(skb).portid, nlmsg_report(nlh)); } diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 825c3e3f8305..5588c7ae1ac2 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -143,13 +143,14 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, const struct nf_conntrack_tuple *t) { const struct nf_conntrack_tuple_hash *h; + const struct nf_conntrack_zone *zone; struct nf_conntrack_expect *exp; struct nf_conn *sibling; - u16 zone = nf_ct_zone(ct); pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); + zone = nf_ct_zone(ct); h = nf_conntrack_find_get(net, zone, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index fc823fa5dcf5..1fb3cacc04e1 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -140,6 +140,35 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) } #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES +static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct, + int dir) +{ + const struct nf_conntrack_zone *zone = nf_ct_zone(ct); + + if (zone->dir != dir) + return; + switch (zone->dir) { + case NF_CT_DEFAULT_ZONE_DIR: + seq_printf(s, "zone=%u ", zone->id); + break; + case NF_CT_ZONE_DIR_ORIG: + seq_printf(s, "zone-orig=%u ", zone->id); + break; + case NF_CT_ZONE_DIR_REPL: + seq_printf(s, "zone-reply=%u ", zone->id); + break; + default: + break; + } +} +#else +static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct, + int dir) +{ +} +#endif + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) { @@ -202,6 +231,8 @@ static int ct_seq_show(struct seq_file *s, void *v) print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, l3proto, l4proto); + ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG); + if (seq_has_overflowed(s)) goto release; @@ -214,6 +245,8 @@ static int ct_seq_show(struct seq_file *s, void *v) print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l3proto, l4proto); + ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL); + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) goto release; @@ -228,11 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v) #endif ct_show_secctx(s, ct); - -#ifdef CONFIG_NF_CONNTRACK_ZONES - seq_printf(s, "zone=%u ", nf_ct_zone(ct)); -#endif - + ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR); ct_show_delta_time(s, ct); seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 4e0b47831d43..5113dfd39df9 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -118,14 +118,13 @@ EXPORT_SYMBOL(nf_xfrm_me_harder); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) { unsigned int hash; /* Original src, to ensure we map it consistently if poss. */ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), - tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd); + tuple->dst.protonum ^ nf_conntrack_hash_rnd); return reciprocal_scale(hash, net->ct.nat_htable_size); } @@ -185,20 +184,22 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(struct net *net, u16 zone, +find_appropriate_src(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_nat_l3proto *l3proto, const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(net, zone, tuple); + unsigned int h = hash_by_src(net, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { ct = nat->ct; - if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { + if (same_src(ct, tuple) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -218,7 +219,8 @@ find_appropriate_src(struct net *net, u16 zone, * the ip with the lowest src-ip/dst-ip/proto usage. */ static void -find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, +find_best_ips_proto(const struct nf_conntrack_zone *zone, + struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) @@ -258,7 +260,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, */ j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32), range->flags & NF_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.all[max] ^ zone); + 0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id); full_range = false; for (i = 0; i <= max; i++) { @@ -297,10 +299,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + const struct nf_conntrack_zone *zone; const struct nf_nat_l3proto *l3proto; const struct nf_nat_l4proto *l4proto; struct net *net = nf_ct_net(ct); - u16 zone = nf_ct_zone(ct); + + zone = nf_ct_zone(ct); rcu_read_lock(); l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); @@ -420,7 +424,7 @@ nf_nat_setup_info(struct nf_conn *ct, if (maniptype == NF_NAT_MANIP_SRC) { unsigned int srchash; - srchash = hash_by_src(net, nf_ct_zone(ct), + srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate extension aera */ diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 14f8b43ec5a7..8fbbdb09826e 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -17,10 +17,12 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_tcpudp.h> #include <linux/netfilter/xt_SYNPROXY.h> + #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_synproxy.h> +#include <net/netfilter/nf_conntrack_zones.h> int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); @@ -352,7 +354,7 @@ static int __net_init synproxy_net_init(struct net *net) struct nf_conn *ct; int err = -ENOMEM; - ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL); if (!ct) goto err1; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c18af2f63eef..fefbf5f0b28d 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -27,8 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); -static LIST_HEAD(nfnl_acct_list); - struct nf_acct { atomic64_t pkts; atomic64_t bytes; @@ -53,6 +51,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; + struct net *net = sock_net(nfnl); char *acct_name; unsigned int size = 0; u32 flags = 0; @@ -64,7 +63,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, if (strlen(acct_name) == 0) return -EINVAL; - list_for_each_entry(nfacct, &nfnl_acct_list, head) { + list_for_each_entry(nfacct, &net->nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) continue; @@ -124,7 +123,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } atomic_set(&nfacct->refcnt, 1); - list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); + list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list); return 0; } @@ -185,6 +184,7 @@ nla_put_failure: static int nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct nf_acct *cur, *last; const struct nfacct_filter *filter = cb->data; @@ -196,7 +196,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { if (last) { if (cur != last) continue; @@ -257,6 +257,7 @@ static int nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { + struct net *net = sock_net(nfnl); int ret = -ENOENT; struct nf_acct *cur; char *acct_name; @@ -283,7 +284,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); - list_for_each_entry(cur, &nfnl_acct_list, head) { + list_for_each_entry(cur, &net->nfnl_acct_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) @@ -336,19 +337,20 @@ static int nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { + struct net *net = sock_net(nfnl); char *acct_name; struct nf_acct *cur; int ret = -ENOENT; if (!tb[NFACCT_NAME]) { - list_for_each_entry(cur, &nfnl_acct_list, head) + list_for_each_entry(cur, &net->nfnl_acct_list, head) nfnl_acct_try_del(cur); return 0; } acct_name = nla_data(tb[NFACCT_NAME]); - list_for_each_entry(cur, &nfnl_acct_list, head) { + list_for_each_entry(cur, &net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) continue; @@ -394,12 +396,12 @@ static const struct nfnetlink_subsystem nfnl_acct_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); -struct nf_acct *nfnl_acct_find_get(const char *acct_name) +struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name) { struct nf_acct *cur, *acct = NULL; rcu_read_lock(); - list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) continue; @@ -422,7 +424,9 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get); void nfnl_acct_put(struct nf_acct *acct) { - atomic_dec(&acct->refcnt); + if (atomic_dec_and_test(&acct->refcnt)) + kfree_rcu(acct, rcu_head); + module_put(THIS_MODULE); } EXPORT_SYMBOL_GPL(nfnl_acct_put); @@ -478,34 +482,59 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_overquota); +static int __net_init nfnl_acct_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfnl_acct_list); + + return 0; +} + +static void __net_exit nfnl_acct_net_exit(struct net *net) +{ + struct nf_acct *cur, *tmp; + + list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) { + list_del_rcu(&cur->head); + + if (atomic_dec_and_test(&cur->refcnt)) + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations nfnl_acct_ops = { + .init = nfnl_acct_net_init, + .exit = nfnl_acct_net_exit, +}; + static int __init nfnl_acct_init(void) { int ret; + ret = register_pernet_subsys(&nfnl_acct_ops); + if (ret < 0) { + pr_err("nfnl_acct_init: failed to register pernet ops\n"); + goto err_out; + } + pr_info("nfnl_acct: registering with nfnetlink.\n"); ret = nfnetlink_subsys_register(&nfnl_acct_subsys); if (ret < 0) { pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); - goto err_out; + goto cleanup_pernet; } return 0; + +cleanup_pernet: + unregister_pernet_subsys(&nfnl_acct_ops); err_out: return ret; } static void __exit nfnl_acct_exit(void) { - struct nf_acct *cur, *tmp; - pr_info("nfnl_acct: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&nfnl_acct_subsys); - - list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. */ - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&nfnl_acct_ops); } module_init(nfnl_acct_init); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 17591239229f..1067fb4c1ffa 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -18,39 +18,59 @@ #include <net/netfilter/nf_tables.h> struct nft_counter { - seqlock_t lock; u64 bytes; u64 packets; }; +struct nft_counter_percpu { + struct nft_counter counter; + struct u64_stats_sync syncp; +}; + +struct nft_counter_percpu_priv { + struct nft_counter_percpu __percpu *counter; +}; + static void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_counter *priv = nft_expr_priv(expr); - - write_seqlock_bh(&priv->lock); - priv->bytes += pkt->skb->len; - priv->packets++; - write_sequnlock_bh(&priv->lock); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu *this_cpu; + + local_bh_disable(); + this_cpu = this_cpu_ptr(priv->counter); + u64_stats_update_begin(&this_cpu->syncp); + this_cpu->counter.bytes += pkt->skb->len; + this_cpu->counter.packets++; + u64_stats_update_end(&this_cpu->syncp); + local_bh_enable(); } static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) { - struct nft_counter *priv = nft_expr_priv(expr); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu *cpu_stats; + struct nft_counter total; + u64 bytes, packets; unsigned int seq; - u64 bytes; - u64 packets; - - do { - seq = read_seqbegin(&priv->lock); - bytes = priv->bytes; - packets = priv->packets; - } while (read_seqretry(&priv->lock, seq)); - - if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes))) - goto nla_put_failure; - if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets))) + int cpu; + + memset(&total, 0, sizeof(total)); + for_each_possible_cpu(cpu) { + cpu_stats = per_cpu_ptr(priv->counter, cpu); + do { + seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + bytes = cpu_stats->counter.bytes; + packets = cpu_stats->counter.packets; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + + total.packets += packets; + total.bytes += bytes; + } + + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) || + nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets))) goto nla_put_failure; return 0; @@ -67,23 +87,44 @@ static int nft_counter_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - struct nft_counter *priv = nft_expr_priv(expr); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu __percpu *cpu_stats; + struct nft_counter_percpu *this_cpu; + + cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu); + if (cpu_stats == NULL) + return ENOMEM; + + preempt_disable(); + this_cpu = this_cpu_ptr(cpu_stats); + if (tb[NFTA_COUNTER_PACKETS]) { + this_cpu->counter.packets = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + } + if (tb[NFTA_COUNTER_BYTES]) { + this_cpu->counter.bytes = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + } + preempt_enable(); + priv->counter = cpu_stats; + return 0; +} - if (tb[NFTA_COUNTER_PACKETS]) - priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); - if (tb[NFTA_COUNTER_BYTES]) - priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); +static void nft_counter_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - seqlock_init(&priv->lock); - return 0; + free_percpu(priv->counter); } static struct nft_expr_type nft_counter_type; static const struct nft_expr_ops nft_counter_ops = { .type = &nft_counter_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), + .size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)), .eval = nft_counter_eval, .init = nft_counter_init, + .destroy = nft_counter_destroy, .dump = nft_counter_dump, }; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 435c1ccd6c0e..5d67938f8b2f 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -20,63 +20,79 @@ static DEFINE_SPINLOCK(limit_lock); struct nft_limit { + u64 last; u64 tokens; + u64 tokens_max; u64 rate; - u64 unit; - unsigned long stamp; + u64 nsecs; + u32 burst; }; -static void nft_limit_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) { - struct nft_limit *priv = nft_expr_priv(expr); + u64 now, tokens; + s64 delta; spin_lock_bh(&limit_lock); - if (time_after_eq(jiffies, priv->stamp)) { - priv->tokens = priv->rate; - priv->stamp = jiffies + priv->unit * HZ; - } - - if (priv->tokens >= 1) { - priv->tokens--; + now = ktime_get_ns(); + tokens = limit->tokens + now - limit->last; + if (tokens > limit->tokens_max) + tokens = limit->tokens_max; + + limit->last = now; + delta = tokens - cost; + if (delta >= 0) { + limit->tokens = delta; spin_unlock_bh(&limit_lock); - return; + return false; } + limit->tokens = tokens; spin_unlock_bh(&limit_lock); - - regs->verdict.code = NFT_BREAK; + return true; } -static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { - [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, - [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, -}; - -static int nft_limit_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, +static int nft_limit_init(struct nft_limit *limit, const struct nlattr * const tb[]) { - struct nft_limit *priv = nft_expr_priv(expr); + u64 unit; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; - priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); - priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); - priv->stamp = jiffies + priv->unit * HZ; - priv->tokens = priv->rate; + limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); + limit->nsecs = unit * NSEC_PER_SEC; + if (limit->rate == 0 || limit->nsecs < unit) + return -EOVERFLOW; + limit->tokens = limit->tokens_max = limit->nsecs; + + if (tb[NFTA_LIMIT_BURST]) { + u64 rate; + + limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); + + rate = limit->rate + limit->burst; + if (rate < limit->rate) + return -EOVERFLOW; + + limit->rate = rate; + } + limit->last = ktime_get_ns(); + return 0; } -static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, + enum nft_limit_type type) { - const struct nft_limit *priv = nft_expr_priv(expr); + u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); + u64 rate = limit->rate - limit->burst; - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate))) - goto nla_put_failure; - if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit))) + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) || + nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) || + nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) || + nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type))) goto nla_put_failure; return 0; @@ -84,18 +100,114 @@ nla_put_failure: return -1; } +struct nft_limit_pkts { + struct nft_limit limit; + u64 cost; +}; + +static void nft_limit_pkts_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_limit_pkts *priv = nft_expr_priv(expr); + + if (nft_limit_eval(&priv->limit, priv->cost)) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { + [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, + [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, + [NFTA_LIMIT_BURST] = { .type = NLA_U32 }, + [NFTA_LIMIT_TYPE] = { .type = NLA_U32 }, +}; + +static int nft_limit_pkts_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit_pkts *priv = nft_expr_priv(expr); + int err; + + err = nft_limit_init(&priv->limit, tb); + if (err < 0) + return err; + + priv->cost = div_u64(priv->limit.nsecs, priv->limit.rate); + return 0; +} + +static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_limit_pkts *priv = nft_expr_priv(expr); + + return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); +} + static struct nft_expr_type nft_limit_type; -static const struct nft_expr_ops nft_limit_ops = { +static const struct nft_expr_ops nft_limit_pkts_ops = { + .type = &nft_limit_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)), + .eval = nft_limit_pkts_eval, + .init = nft_limit_pkts_init, + .dump = nft_limit_pkts_dump, +}; + +static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_limit *priv = nft_expr_priv(expr); + u64 cost = div_u64(priv->nsecs * pkt->skb->len, priv->rate); + + if (nft_limit_eval(priv, cost)) + regs->verdict.code = NFT_BREAK; +} + +static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + return nft_limit_init(priv, tb); +} + +static int nft_limit_pkt_bytes_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_limit *priv = nft_expr_priv(expr); + + return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); +} + +static const struct nft_expr_ops nft_limit_pkt_bytes_ops = { .type = &nft_limit_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), - .eval = nft_limit_eval, - .init = nft_limit_init, - .dump = nft_limit_dump, + .eval = nft_limit_pkt_bytes_eval, + .init = nft_limit_pkt_bytes_init, + .dump = nft_limit_pkt_bytes_dump, }; +static const struct nft_expr_ops * +nft_limit_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_LIMIT_TYPE] == NULL) + return &nft_limit_pkts_ops; + + switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) { + case NFT_LIMIT_PKTS: + return &nft_limit_pkts_ops; + case NFT_LIMIT_PKT_BYTES: + return &nft_limit_pkt_bytes_ops; + } + return ERR_PTR(-EOPNOTSUPP); +} + static struct nft_expr_type nft_limit_type __read_mostly = { .name = "limit", - .ops = &nft_limit_ops, + .select_ops = nft_limit_select_ops, .policy = nft_limit_policy, .maxattr = NFTA_LIMIT_MAX, .flags = NFT_EXPR_STATEFUL, diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 94fb3b27a2c5..09b4b07eb676 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -9,6 +9,7 @@ */ #include <linux/kernel.h> +#include <linux/if_vlan.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> @@ -17,6 +18,53 @@ #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +/* add vlan header into the user buffer for if tag was removed by offloads */ +static bool +nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) +{ + int mac_off = skb_mac_header(skb) - skb->data; + u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d; + struct vlan_ethhdr veth; + + vlanh = (u8 *) &veth; + if (offset < ETH_HLEN) { + u8 ethlen = min_t(u8, len, ETH_HLEN - offset); + + if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN)) + return false; + + veth.h_vlan_proto = skb->vlan_proto; + + memcpy(dst_u8, vlanh + offset, ethlen); + + len -= ethlen; + if (len == 0) + return true; + + dst_u8 += ethlen; + offset = ETH_HLEN; + } else if (offset >= VLAN_ETH_HLEN) { + offset -= VLAN_HLEN; + goto skip; + } + + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + vlanh += offset; + + vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset); + memcpy(dst_u8, vlanh, vlan_len); + + len -= vlan_len; + if (!len) + return true; + + dst_u8 += vlan_len; + skip: + return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; +} + static void nft_payload_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -26,10 +74,18 @@ static void nft_payload_eval(const struct nft_expr *expr, u32 *dest = ®s->data[priv->dreg]; int offset; + dest[priv->len / NFT_REG32_SIZE] = 0; switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: if (!skb_mac_header_was_set(skb)) goto err; + + if (skb_vlan_tag_present(skb)) { + if (!nft_payload_copy_vlan(dest, skb, + priv->offset, priv->len)) + goto err; + return; + } offset = skb_mac_header(skb) - skb->data; break; case NFT_PAYLOAD_NETWORK_HEADER: @@ -43,7 +99,6 @@ static void nft_payload_eval(const struct nft_expr *expr, } offset += priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; if (skb_copy_bits(skb, offset, dest, priv->len) < 0) goto err; return; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 43ddeee404e9..8e524898ccea 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -181,9 +181,23 @@ out: #endif } +static u16 xt_ct_flags_to_dir(const struct xt_ct_target_info_v1 *info) +{ + switch (info->flags & (XT_CT_ZONE_DIR_ORIG | + XT_CT_ZONE_DIR_REPL)) { + case XT_CT_ZONE_DIR_ORIG: + return NF_CT_ZONE_DIR_ORIG; + case XT_CT_ZONE_DIR_REPL: + return NF_CT_ZONE_DIR_REPL; + default: + return NF_CT_DEFAULT_ZONE_DIR; + } +} + static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { + struct nf_conntrack_zone zone; struct nf_conn *ct; int ret = -EOPNOTSUPP; @@ -193,7 +207,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, } #ifndef CONFIG_NF_CONNTRACK_ZONES - if (info->zone) + if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG | + XT_CT_ZONE_DIR_REPL | + XT_CT_ZONE_MARK)) goto err1; #endif @@ -201,7 +217,13 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err1; - ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL); + memset(&zone, 0, sizeof(zone)); + zone.id = info->zone; + zone.dir = xt_ct_flags_to_dir(info); + if (info->flags & XT_CT_ZONE_MARK) + zone.flags |= NF_CT_FLAG_MARK; + + ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL); if (!ct) { ret = -ENOMEM; goto err2; diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index c5d6556dbc5e..49fee6aa2c0a 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -10,26 +10,15 @@ * modify it under the terms of the GNU General Public License * version 2 or later, as published by the Free Software Foundation. */ -#include <linux/ip.h> #include <linux/module.h> -#include <linux/percpu.h> -#include <linux/route.h> #include <linux/skbuff.h> -#include <linux/notifier.h> -#include <net/checksum.h> -#include <net/icmp.h> -#include <net/ip.h> -#include <net/ipv6.h> -#include <net/ip6_route.h> -#include <net/route.h> +#include <linux/route.h> #include <linux/netfilter/x_tables.h> +#include <net/route.h> +#include <net/netfilter/ipv4/nf_dup_ipv4.h> +#include <net/netfilter/ipv6/nf_dup_ipv6.h> #include <linux/netfilter/xt_TEE.h> -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -# define WITH_CONNTRACK 1 -# include <net/netfilter/nf_conntrack.h> -#endif - struct xt_tee_priv { struct notifier_block notifier; struct xt_tee_tginfo *tginfo; @@ -38,161 +27,24 @@ struct xt_tee_priv { static const union nf_inet_addr tee_zero_address; -static struct net *pick_net(struct sk_buff *skb) -{ -#ifdef CONFIG_NET_NS - const struct dst_entry *dst; - - if (skb->dev != NULL) - return dev_net(skb->dev); - dst = skb_dst(skb); - if (dst != NULL && dst->dev != NULL) - return dev_net(dst->dev); -#endif - return &init_net; -} - -static bool -tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) -{ - const struct iphdr *iph = ip_hdr(skb); - struct net *net = pick_net(skb); - struct rtable *rt; - struct flowi4 fl4; - - memset(&fl4, 0, sizeof(fl4)); - if (info->priv) { - if (info->priv->oif == -1) - return false; - fl4.flowi4_oif = info->priv->oif; - } - fl4.daddr = info->gw.ip; - fl4.flowi4_tos = RT_TOS(iph->tos); - fl4.flowi4_scope = RT_SCOPE_UNIVERSE; - fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return false; - - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - skb->dev = rt->dst.dev; - skb->protocol = htons(ETH_P_IP); - return true; -} - static unsigned int tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - struct iphdr *iph; - if (__this_cpu_read(nf_skb_duplicated)) - return XT_CONTINUE; - /* - * Copy the skb, and route the copy. Will later return %XT_CONTINUE for - * the original skb, which should continue on its way as if nothing has - * happened. The copy should be independently delivered to the TEE - * --gateway. - */ - skb = pskb_copy(skb, GFP_ATOMIC); - if (skb == NULL) - return XT_CONTINUE; - -#ifdef WITH_CONNTRACK - /* Avoid counting cloned packets towards the original connection. */ - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); -#endif - /* - * If we are in PREROUTING/INPUT, the checksum must be recalculated - * since the length could have changed as a result of defragmentation. - * - * We also decrease the TTL to mitigate potential TEE loops - * between two hosts. - * - * Set %IP_DF so that the original source is notified of a potentially - * decreased MTU on the clone route. IPv6 does this too. - */ - iph = ip_hdr(skb); - iph->frag_off |= htons(IP_DF); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_IN) - --iph->ttl; - ip_send_check(iph); + nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif); - if (tee_tg_route4(skb, info)) { - __this_cpu_write(nf_skb_duplicated, true); - ip_local_out(skb); - __this_cpu_write(nf_skb_duplicated, false); - } else { - kfree_skb(skb); - } return XT_CONTINUE; } #if IS_ENABLED(CONFIG_IPV6) -static bool -tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) -{ - const struct ipv6hdr *iph = ipv6_hdr(skb); - struct net *net = pick_net(skb); - struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - if (info->priv) { - if (info->priv->oif == -1) - return false; - fl6.flowi6_oif = info->priv->oif; - } - fl6.daddr = info->gw.in6; - fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | - (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; - fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - dst_release(dst); - return false; - } - skb_dst_drop(skb); - skb_dst_set(skb, dst); - skb->dev = dst->dev; - skb->protocol = htons(ETH_P_IPV6); - return true; -} - static unsigned int tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - if (__this_cpu_read(nf_skb_duplicated)) - return XT_CONTINUE; - skb = pskb_copy(skb, GFP_ATOMIC); - if (skb == NULL) - return XT_CONTINUE; + nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif); -#ifdef WITH_CONNTRACK - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); -#endif - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_IN) { - struct ipv6hdr *iph = ipv6_hdr(skb); - --iph->hop_limit; - } - if (tee_tg_route6(skb, info)) { - __this_cpu_write(nf_skb_duplicated, true); - ip6_local_out(skb); - __this_cpu_write(nf_skb_duplicated, false); - } else { - kfree_skb(skb); - } return XT_CONTINUE; } #endif diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 29ba6218a820..075d89d94d28 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -134,7 +134,7 @@ static bool add_hlist(struct hlist_head *head, static unsigned int check_hlist(struct net *net, struct hlist_head *head, const struct nf_conntrack_tuple *tuple, - u16 zone, + const struct nf_conntrack_zone *zone, bool *addit) { const struct nf_conntrack_tuple_hash *found; @@ -201,7 +201,7 @@ static unsigned int count_tree(struct net *net, struct rb_root *root, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u8 family, u16 zone) + u8 family, const struct nf_conntrack_zone *zone) { struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; struct rb_node **rbnode, *parent; @@ -290,7 +290,8 @@ static int count_them(struct net *net, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u_int8_t family, u16 zone) + u_int8_t family, + const struct nf_conntrack_zone *zone) { struct rb_root *root; int count; @@ -321,10 +322,10 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) union nf_inet_addr addr; struct nf_conntrack_tuple tuple; const struct nf_conntrack_tuple *tuple_ptr = &tuple; + const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int connections; - u16 zone = NF_CT_DEFAULT_ZONE; ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) { diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 8c646ed9c921..3048a7e3a90a 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -37,7 +37,7 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par) struct xt_nfacct_match_info *info = par->matchinfo; struct nf_acct *nfacct; - nfacct = nfnl_acct_find_get(info->name); + nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { pr_info("xt_nfacct: accounting object with name `%s' " "does not exists\n", info->name); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index f2b540220ad0..5019a47b9270 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -37,6 +37,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; struct tcf_connmark_info *ca = a->priv; + struct nf_conntrack_zone zone; struct nf_conn *c; int proto; @@ -70,7 +71,10 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, proto, &tuple)) goto out; - thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple); + zone.id = ca->zone; + zone.dir = NF_CT_DEFAULT_ZONE_DIR; + + thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple); if (!thash) goto out; |