diff options
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 7 | ||||
-rw-r--r-- | Documentation/networking/vrf.txt | 7 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 11 | ||||
-rw-r--r-- | net/ipv4/udp.c | 27 | ||||
-rw-r--r-- | net/ipv6/udp.c | 27 |
6 files changed, 66 insertions, 17 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17f2e7791042..fc73eeb7b3b8 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -721,6 +721,13 @@ tcp_challenge_ack_limit - INTEGER UDP variables: +udp_l3mdev_accept - BOOLEAN + Enabling this option allows a "global" bound socket to work + across L3 master domains (e.g., VRFs) with packets capable of + being received regardless of the L3 domain in which they + originated. Only valid when the kernel was compiled with + CONFIG_NET_L3_MASTER_DEV. + udp_mem - vector of 3 INTEGERs: min, pressure, max Number of pages allowed for queueing by all UDP sockets. diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt index 755dab856392..3918dae964d4 100644 --- a/Documentation/networking/vrf.txt +++ b/Documentation/networking/vrf.txt @@ -98,10 +98,11 @@ VRF device: or to specify the output device using cmsg and IP_PKTINFO. -TCP services running in the default VRF context (ie., not bound to any VRF -device) can work across all VRF domains by enabling the tcp_l3mdev_accept -sysctl option: +TCP & UDP services running in the default VRF context (ie., not bound +to any VRF device) can work across all VRF domains by enabling the +tcp_l3mdev_accept and udp_l3mdev_accept sysctl options: sysctl -w net.ipv4.tcp_l3mdev_accept=1 + sysctl -w net.ipv4.udp_l3mdev_accept=1 netfilter rules on the VRF device can be used to limit access to services running in the default VRF context as well. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e365732b8051..622d2da27135 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -124,6 +124,10 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; +#ifdef CONFIG_NET_L3_MASTER_DEV + int sysctl_udp_l3mdev_accept; +#endif + int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; int sysctl_igmp_llm_reports; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1b861997fdc5..d6880a6149ee 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1012,6 +1012,17 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_privileged_ports, }, +#ifdef CONFIG_NET_L3_MASTER_DEV + { + .procname = "udp_l3mdev_accept", + .data = &init_net.ipv4.sysctl_udp_l3mdev_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif { } }; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d6dddcf59e79..cf6ba3387401 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -134,6 +134,17 @@ EXPORT_SYMBOL(udp_memory_allocated); #define MAX_UDP_PORTS 65536 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) +/* IPCB reference means this can not be used from early demux */ +static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_udp_l3mdev_accept && + skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) + return true; +#endif + return false; +} + static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, @@ -369,7 +380,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) static int compute_score(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, - __be32 daddr, unsigned short hnum, int dif) + __be32 daddr, unsigned short hnum, int dif, + bool exact_dif) { int score; struct inet_sock *inet; @@ -400,7 +412,7 @@ static int compute_score(struct sock *sk, struct net *net, score += 4; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score += 4; @@ -425,7 +437,7 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, - __be32 daddr, unsigned int hnum, int dif, + __be32 daddr, unsigned int hnum, int dif, bool exact_dif, struct udp_hslot *hslot2, struct sk_buff *skb) { @@ -437,7 +449,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -472,6 +484,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; + bool exact_dif = udp_lib_exact_dif_match(net, skb); int score, badness, matches = 0, reuseport = 0; u32 hash = 0; @@ -484,7 +497,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, skb); + exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); @@ -499,7 +512,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, skb); + exact_dif, hslot2, skb); } return result; } @@ -508,7 +521,7 @@ begin: badness = 0; sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 05d69324862e..b4c6516a3a0c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -55,6 +55,16 @@ #include <trace/events/skb.h> #include "udp_impl.h" +static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if defined(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_udp_l3mdev_accept && + skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) + return true; +#endif + return false; +} + static u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -118,7 +128,7 @@ static void udp_v6_rehash(struct sock *sk) static int compute_score(struct sock *sk, struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, - int dif) + int dif, bool exact_dif) { int score; struct inet_sock *inet; @@ -149,7 +159,7 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score++; @@ -165,7 +175,7 @@ static int compute_score(struct sock *sk, struct net *net, static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, + bool exact_dif, struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -176,7 +186,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -212,6 +222,7 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; + bool exact_dif = udp6_lib_exact_dif_match(net, skb); int score, badness, matches = 0, reuseport = 0; u32 hash = 0; @@ -223,7 +234,7 @@ struct sock *__udp6_lib_lookup(struct net *net, goto begin; result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, + daddr, hnum, dif, exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; @@ -239,7 +250,8 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, skb); + exact_dif, hslot2, + skb); } return result; } @@ -247,7 +259,8 @@ begin: result = NULL; badness = -1; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, + exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { |