diff options
author | David S. Miller <davem@davemloft.net> | 2021-10-18 16:05:25 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-10-18 16:05:25 +0300 |
commit | 7adaf56edd03751badc3f045e664e30f9d1b195e (patch) | |
tree | 5e79414c19c7076792a0190d46a3aa9ee3d30ba5 /include | |
parent | c87350ced118ef898ffcb7badacf0939010f2549 (diff) | |
parent | ffdd33dd9c12a8c263f78d778066709ef94671f9 (diff) | |
download | linux-7adaf56edd03751badc3f045e664e30f9d1b195e.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter/IPVS updates for net-next
The following patchset contains Netfilter/IPVS for net-next:
1) Add new run_estimation toggle to IPVS to stop the estimation_timer
logic, from Dust Li.
2) Relax superfluous dynset check on NFT_SET_TIMEOUT.
3) Add egress hook, from Lukas Wunner.
4) Nowadays, almost all hook functions in x_table land just call the hook
evaluation loop. Remove remaining hook wrappers from iptables and IPVS.
From Florian Westphal.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/netdevice.h | 4 | ||||
-rw-r--r-- | include/linux/netfilter_arp/arp_tables.h | 5 | ||||
-rw-r--r-- | include/linux/netfilter_bridge/ebtables.h | 5 | ||||
-rw-r--r-- | include/linux/netfilter_ingress.h | 58 | ||||
-rw-r--r-- | include/linux/netfilter_ipv4/ip_tables.h | 6 | ||||
-rw-r--r-- | include/linux/netfilter_ipv6/ip6_tables.h | 5 | ||||
-rw-r--r-- | include/linux/netfilter_netdev.h | 146 | ||||
-rw-r--r-- | include/linux/skbuff.h | 4 | ||||
-rw-r--r-- | include/net/ip_vs.h | 11 | ||||
-rw-r--r-- | include/uapi/linux/netfilter.h | 1 |
10 files changed, 175 insertions, 70 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f9cd6fea213f..3ec42495a43a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1861,6 +1861,7 @@ enum netdev_ml_priv_type { * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for * egress processing + * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) @@ -2160,6 +2161,9 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; #endif +#ifdef CONFIG_NETFILTER_EGRESS + struct nf_hook_entries __rcu *nf_hooks_egress; +#endif #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 4f9a4b3c5892..a40aaf645fa4 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -54,9 +54,8 @@ int arpt_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops); void arpt_unregister_table(struct net *net, const char *name); void arpt_unregister_table_pre_exit(struct net *net, const char *name); -extern unsigned int arpt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table); +extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include <net/compat.h> diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 10a01978bc0d..a13296d6c7ce 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -112,9 +112,8 @@ extern int ebt_register_table(struct net *net, const struct nf_hook_ops *ops); extern void ebt_unregister_table(struct net *net, const char *tablename); void ebt_unregister_table_pre_exit(struct net *net, const char *tablename); -extern unsigned int ebt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct ebt_table *table); +extern unsigned int ebt_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); /* True if the hook mask denotes that the rule is in a base chain, * used in the check() functions */ diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h deleted file mode 100644 index a13774be2eb5..000000000000 --- a/include/linux/netfilter_ingress.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NETFILTER_INGRESS_H_ -#define _NETFILTER_INGRESS_H_ - -#include <linux/netfilter.h> -#include <linux/netdevice.h> - -#ifdef CONFIG_NETFILTER_INGRESS -static inline bool nf_hook_ingress_active(const struct sk_buff *skb) -{ -#ifdef CONFIG_JUMP_LABEL - if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) - return false; -#endif - return rcu_access_pointer(skb->dev->nf_hooks_ingress); -} - -/* caller must hold rcu_read_lock */ -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); - struct nf_hook_state state; - int ret; - - /* Must recheck the ingress hook head, in the event it became NULL - * after the check in nf_hook_ingress_active evaluated to true. - */ - if (unlikely(!e)) - return 0; - - nf_hook_state_init(&state, NF_NETDEV_INGRESS, - NFPROTO_NETDEV, skb->dev, NULL, NULL, - dev_net(skb->dev), NULL); - ret = nf_hook_slow(skb, &state, e, 0); - if (ret == 0) - return -1; - - return ret; -} - -static inline void nf_hook_ingress_init(struct net_device *dev) -{ - RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); -} -#else /* CONFIG_NETFILTER_INGRESS */ -static inline int nf_hook_ingress_active(struct sk_buff *skb) -{ - return 0; -} - -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - return 0; -} - -static inline void nf_hook_ingress_init(struct net_device *dev) {} -#endif /* CONFIG_NETFILTER_INGRESS */ -#endif /* _NETFILTER_INGRESS_H_ */ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 8d09bfe850dc..132b0e4a6d4d 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -63,9 +63,9 @@ struct ipt_error { } extern void *ipt_alloc_initial_table(const struct xt_table *); -extern unsigned int ipt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table); +extern unsigned int ipt_do_table(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include <net/compat.h> diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 79e73fd7d965..8b8885a73c76 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -29,9 +29,8 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops); void ip6t_unregister_table_pre_exit(struct net *net, const char *name); void ip6t_unregister_table_exit(struct net *net, const char *name); -extern unsigned int ip6t_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table); +extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include <net/compat.h> diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h new file mode 100644 index 000000000000..b71b57a83bb4 --- /dev/null +++ b/include/linux/netfilter_netdev.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NETFILTER_NETDEV_H_ +#define _NETFILTER_NETDEV_H_ + +#include <linux/netfilter.h> +#include <linux/netdevice.h> + +#ifdef CONFIG_NETFILTER_INGRESS +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +{ +#ifdef CONFIG_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + return false; +#endif + return rcu_access_pointer(skb->dev->nf_hooks_ingress); +} + +/* caller must hold rcu_read_lock */ +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_state state; + int ret; + + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, NF_NETDEV_INGRESS, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); + ret = nf_hook_slow(skb, &state, e, 0); + if (ret == 0) + return -1; + + return ret; +} + +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} +#endif /* CONFIG_NETFILTER_INGRESS */ + +#ifdef CONFIG_NETFILTER_EGRESS +static inline bool nf_hook_egress_active(void) +{ +#ifdef CONFIG_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_EGRESS])) + return false; +#endif + return true; +} + +/** + * nf_hook_egress - classify packets before transmission + * @skb: packet to be classified + * @rc: result code which shall be returned by __dev_queue_xmit() on failure + * @dev: netdev whose egress hooks shall be applied to @skb + * + * Returns @skb on success or %NULL if the packet was consumed or filtered. + * Caller must hold rcu_read_lock. + * + * On ingress, packets are classified first by tc, then by netfilter. + * On egress, the order is reversed for symmetry. Conceptually, tc and + * netfilter can be thought of as layers, with netfilter layered above tc: + * When tc redirects a packet to another interface, netfilter is not applied + * because the packet is on the tc layer. + * + * The nf_skip_egress flag controls whether netfilter is applied on egress. + * It is updated by __netif_receive_skb_core() and __dev_queue_xmit() when the + * packet passes through tc and netfilter. Because __dev_queue_xmit() may be + * called recursively by tunnel drivers such as vxlan, the flag is reverted to + * false after sch_handle_egress(). This ensures that netfilter is applied + * both on the overlay and underlying network. + */ +static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, + struct net_device *dev) +{ + struct nf_hook_entries *e; + struct nf_hook_state state; + int ret; + +#ifdef CONFIG_NETFILTER_SKIP_EGRESS + if (skb->nf_skip_egress) + return skb; +#endif + + e = rcu_dereference(dev->nf_hooks_egress); + if (!e) + return skb; + + nf_hook_state_init(&state, NF_NETDEV_EGRESS, + NFPROTO_NETDEV, dev, NULL, NULL, + dev_net(dev), NULL); + ret = nf_hook_slow(skb, &state, e, 0); + + if (ret == 1) { + return skb; + } else if (ret < 0) { + *rc = NET_XMIT_DROP; + return NULL; + } else { /* ret == 0 */ + *rc = NET_XMIT_SUCCESS; + return NULL; + } +} +#else /* CONFIG_NETFILTER_EGRESS */ +static inline bool nf_hook_egress_active(void) +{ + return false; +} + +static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, + struct net_device *dev) +{ + return skb; +} +#endif /* CONFIG_NETFILTER_EGRESS */ + +static inline void nf_skip_egress(struct sk_buff *skb, bool skip) +{ +#ifdef CONFIG_NETFILTER_SKIP_EGRESS + skb->nf_skip_egress = skip; +#endif +} + +static inline void nf_hook_netdev_init(struct net_device *dev) +{ +#ifdef CONFIG_NETFILTER_INGRESS + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); +#endif +#ifdef CONFIG_NETFILTER_EGRESS + RCU_INIT_POINTER(dev->nf_hooks_egress, NULL); +#endif +} + +#endif /* _NETFILTER_NETDEV_H_ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 841e2f0f5240..cb96f1e6460c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -652,6 +652,7 @@ typedef unsigned char *sk_buff_data_t; * @tc_at_ingress: used within tc_classify to distinguish in/egress * @redirected: packet was redirected by packet classifier * @from_ingress: packet was redirected from the ingress path + * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -868,6 +869,9 @@ struct sk_buff { #ifdef CONFIG_NET_REDIRECT __u8 from_ingress:1; #endif +#ifdef CONFIG_NETFILTER_SKIP_EGRESS + __u8 nf_skip_egress:1; +#endif #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; #endif diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7cb5a1aace40..ff1804a0c469 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -931,6 +931,7 @@ struct netns_ipvs { int sysctl_conn_reuse_mode; int sysctl_schedule_icmp; int sysctl_ignore_tunneled; + int sysctl_run_estimation; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1071,6 +1072,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) return ipvs->sysctl_cache_bypass; } +static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_run_estimation; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1163,6 +1169,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) return 0; } +static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) +{ + return 1; +} + #endif /* IPVS core functions diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index ef9a44286e23..53411ccc69db 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -51,6 +51,7 @@ enum nf_inet_hooks { enum nf_dev_hooks { NF_NETDEV_INGRESS, + NF_NETDEV_EGRESS, NF_NETDEV_NUMHOOKS }; |