diff options
author | David S. Miller <davem@davemloft.net> | 2022-11-16 12:43:36 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-11-16 12:43:36 +0300 |
commit | fd258f2aba2cfb2babe85b47ce01d0167c61a4db (patch) | |
tree | 618c3aae6090f4aec32aaf69683f4660fc9e35f5 | |
parent | e88225656d302eb369668003a622b7a7baef1f43 (diff) | |
parent | 9804985bf27f8fbcf0d96c7435b5ad94a2a6ea20 (diff) | |
download | linux-fd258f2aba2cfb2babe85b47ce01d0167c61a4db.tar.xz |
Merge branch 'udp-pernetns-hash'
Kuniyuki Iwashima says:
====================
udp: Introduce optional per-netns hash table.
This series is the UDP version of the per-netns ehash series [0],
which were initially in the same patch set. [1]
The notable difference with TCP is the max table size is 64K and the min
size is 128. This is because the possible hash range by udp_hashfn()
always fits in 64K within the same netns and because we want to keep a
bitmap in udp_lib_get_port() on the stack. Also, the UDP per-netns table
isolates both 1-tuple and 2-tuple tables.
For details, please see the last patch.
patch 1 - 4: prep for per-netns hash table
patch 5: add per-netns hash table
[0]: https://lore.kernel.org/netdev/20220908011022.45342-1-kuniyu@amazon.com/
[1]: https://lore.kernel.org/netdev/20220826000445.46552-1-kuniyu@amazon.com/
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/networking/ip-sysctl.rst | 27 | ||||
-rw-r--r-- | include/linux/udp.h | 2 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 3 | ||||
-rw-r--r-- | net/core/filter.c | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 40 | ||||
-rw-r--r-- | net/ipv4/udp.c | 196 | ||||
-rw-r--r-- | net/ipv4/udp_diag.c | 6 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 5 | ||||
-rw-r--r-- | net/ipv6/udp.c | 31 | ||||
-rw-r--r-- | net/ipv6/udp_offload.c | 5 |
10 files changed, 261 insertions, 58 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 815efc89ad73..727b25cc7ec4 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1177,6 +1177,33 @@ udp_rmem_min - INTEGER udp_wmem_min - INTEGER UDP does not have tx memory accounting and this tunable has no effect. +udp_hash_entries - INTEGER + Show the number of hash buckets for UDP sockets in the current + networking namespace. + + A negative value means the networking namespace does not own its + hash buckets and shares the initial networking namespace's one. + +udp_child_ehash_entries - INTEGER + Control the number of hash buckets for UDP sockets in the child + networking namespace, which must be set before clone() or unshare(). + + If the value is not 0, the kernel uses a value rounded up to 2^n + as the actual hash bucket size. 0 is a special value, meaning + the child networking namespace will share the initial networking + namespace's hash buckets. + + Note that the child will use the global one in case the kernel + fails to allocate enough memory. In addition, the global hash + buckets are spread over available NUMA nodes, but the allocation + of the child hash table depends on the current process's NUMA + policy, which could result in performance differences. + + Possible values: 0, 2^n (n: 7 (128) - 16 (64K)) + + Default: 0 + + RAW variables ============= diff --git a/include/linux/udp.h b/include/linux/udp.h index dea57aa37df6..a2892e151644 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -23,7 +23,9 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) return (struct udphdr *)skb_transport_header(skb); } +#define UDP_HTABLE_SIZE_MIN_PERNET 128 #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) +#define UDP_HTABLE_SIZE_MAX 65536 static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) { diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 25f90bba4889..db762e35aca9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -43,6 +43,7 @@ struct tcp_fastopen_context; struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; + struct udp_table *udp_table; #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -207,6 +208,8 @@ struct netns_ipv4 { atomic_t dev_addr_genid; + unsigned int sysctl_udp_child_hash_entries; + #ifdef CONFIG_SYSCTL unsigned long *sysctl_local_reserved_ports; int sysctl_ip_prot_sock; diff --git a/net/core/filter.c b/net/core/filter.c index 6dd2baf5eeb2..da4f697e4fa4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6432,7 +6432,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, else sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, dst4, tuple->ipv4.dport, - dif, sdif, &udp_table, NULL); + dif, sdif, net->ipv4.udp_table, NULL); #if IS_ENABLED(CONFIG_IPV6) } else { struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; @@ -6448,7 +6448,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, src6, tuple->ipv6.sport, dst6, tuple->ipv6.dport, dif, sdif, - &udp_table, NULL); + net->ipv4.udp_table, NULL); #endif } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0af28cedd071..0d0cc4ef2b85 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -40,6 +40,7 @@ static int one_day_secs = 24 * 3600; static u32 fib_multipath_hash_fields_all_mask __maybe_unused = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024; +static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX; static int tcp_plb_max_rounds = 31; static int tcp_plb_max_cong_thresh = 256; @@ -402,12 +403,36 @@ static int proc_tcp_ehash_entries(struct ctl_table *table, int write, if (!net_eq(net, &init_net) && !hinfo->pernet) tcp_ehash_entries *= -1; + memset(&tbl, 0, sizeof(tbl)); tbl.data = &tcp_ehash_entries; tbl.maxlen = sizeof(int); return proc_dointvec(&tbl, write, buffer, lenp, ppos); } +static int proc_udp_hash_entries(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_udp_child_hash_entries); + int udp_hash_entries; + struct ctl_table tbl; + + udp_hash_entries = net->ipv4.udp_table->mask + 1; + + /* A negative number indicates that the child netns + * shares the global udp_table. + */ + if (!net_eq(net, &init_net) && net->ipv4.udp_table == &udp_table) + udp_hash_entries *= -1; + + memset(&tbl, 0, sizeof(tbl)); + tbl.data = &udp_hash_entries; + tbl.maxlen = sizeof(int); + + return proc_dointvec(&tbl, write, buffer, lenp, ppos); +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, @@ -1362,6 +1387,21 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = &tcp_child_ehash_entries_max, }, { + .procname = "udp_hash_entries", + .data = &init_net.ipv4.sysctl_udp_child_hash_entries, + .mode = 0444, + .proc_handler = proc_udp_hash_entries, + }, + { + .procname = "udp_child_hash_entries", + .data = &init_net.ipv4.sysctl_udp_child_hash_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &udp_child_hash_entries_max, + }, + { .procname = "udp_rmem_min", .data = &init_net.ipv4.sysctl_udp_rmem_min, .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b859d6c8298e..1fb7d1ed1cb1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -129,7 +129,12 @@ DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); #define MAX_UDP_PORTS 65536 -#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) +#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN_PERNET) + +static struct udp_table *udp_get_table_prot(struct sock *sk) +{ + return sk->sk_prot->h.udp_table ? : sock_net(sk)->ipv4.udp_table; +} static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, @@ -232,16 +237,16 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned int hash2_nulladdr) { + struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2; - struct udp_table *udptable = sk->sk_prot->h.udp_table; - int error = 1; struct net *net = sock_net(sk); + int error = 1; if (!snum) { + DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); + unsigned short first, last; int low, high, remaining; unsigned int rand; - unsigned short first, last; - DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; @@ -467,7 +472,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, struct sock *sk, *reuse_sk; bool no_reuseport; - if (udptable != &udp_table) + if (udptable != net->ipv4.udp_table) return NULL; /* only UDP is supported */ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport, @@ -548,10 +553,11 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { const struct iphdr *iph = ip_hdr(skb); + struct net *net = dev_net(skb->dev); - return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + return __udp4_lib_lookup(net, iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - inet_sdif(skb), &udp_table, NULL); + inet_sdif(skb), net->ipv4.udp_table, NULL); } /* Must be called under rcu_read_lock(). @@ -564,7 +570,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct sock *sk; sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, - dif, 0, &udp_table, NULL); + dif, 0, net->ipv4.udp_table, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -802,7 +808,7 @@ out: int udp_err(struct sk_buff *skb, u32 info) { - return __udp4_lib_err(skb, info, &udp_table); + return __udp4_lib_err(skb, info, dev_net(skb->dev)->ipv4.udp_table); } /* @@ -1999,7 +2005,7 @@ EXPORT_SYMBOL(udp_disconnect); void udp_lib_unhash(struct sock *sk) { if (sk_hashed(sk)) { - struct udp_table *udptable = sk->sk_prot->h.udp_table; + struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2; hslot = udp_hashslot(udptable, sock_net(sk), @@ -2030,7 +2036,7 @@ EXPORT_SYMBOL(udp_lib_unhash); void udp_lib_rehash(struct sock *sk, u16 newhash) { if (sk_hashed(sk)) { - struct udp_table *udptable = sk->sk_prot->h.udp_table; + struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2, *nhslot2; hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); @@ -2519,10 +2525,14 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, __be16 rmt_port, __be32 rmt_addr, int dif, int sdif) { - struct sock *sk, *result; + struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(loc_port); - unsigned int slot = udp_hashfn(net, hnum, udp_table.mask); - struct udp_hslot *hslot = &udp_table.hash[slot]; + struct sock *sk, *result; + struct udp_hslot *hslot; + unsigned int slot; + + slot = udp_hashfn(net, hnum, udptable->mask); + hslot = &udptable->hash[slot]; /* Do not bother scanning a too big list */ if (hslot->count > 10) @@ -2550,14 +2560,19 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, __be16 rmt_port, __be32 rmt_addr, int dif, int sdif) { - unsigned short hnum = ntohs(loc_port); - unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum); - unsigned int slot2 = hash2 & udp_table.mask; - struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; + struct udp_table *udptable = net->ipv4.udp_table; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); - const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); + unsigned short hnum = ntohs(loc_port); + unsigned int hash2, slot2; + struct udp_hslot *hslot2; + __portpair ports; struct sock *sk; + hash2 = ipv4_portaddr_hash(net, loc_addr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + ports = INET_COMBINED_PORTS(rmt_port, hnum); + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (inet_match(net, sk, acookie, ports, dif, sdif)) return sk; @@ -2637,7 +2652,7 @@ int udp_v4_early_demux(struct sk_buff *skb) int udp_rcv(struct sk_buff *skb) { - return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); + return __udp4_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); } void udp_destroy_sock(struct sock *sk) @@ -2960,7 +2975,7 @@ struct proto udp_prot = { .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp_sock), - .h.udp_table = &udp_table, + .h.udp_table = NULL, .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); @@ -2968,21 +2983,30 @@ EXPORT_SYMBOL(udp_prot); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS +static struct udp_table *udp_get_table_afinfo(struct udp_seq_afinfo *afinfo, + struct net *net) +{ + return afinfo->udp_table ? : net->ipv4.udp_table; +} + static struct sock *udp_get_first(struct seq_file *seq, int start) { - struct sock *sk; - struct udp_seq_afinfo *afinfo; struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); + struct udp_seq_afinfo *afinfo; + struct udp_table *udptable; + struct sock *sk; if (state->bpf_seq_afinfo) afinfo = state->bpf_seq_afinfo; else afinfo = pde_data(file_inode(seq->file)); - for (state->bucket = start; state->bucket <= afinfo->udp_table->mask; + udptable = udp_get_table_afinfo(afinfo, net); + + for (state->bucket = start; state->bucket <= udptable->mask; ++state->bucket) { - struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket]; + struct udp_hslot *hslot = &udptable->hash[state->bucket]; if (hlist_empty(&hslot->head)) continue; @@ -3004,9 +3028,10 @@ found: static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) { - struct udp_seq_afinfo *afinfo; struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); + struct udp_seq_afinfo *afinfo; + struct udp_table *udptable; if (state->bpf_seq_afinfo) afinfo = state->bpf_seq_afinfo; @@ -3020,8 +3045,11 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) sk->sk_family != afinfo->family))); if (!sk) { - if (state->bucket <= afinfo->udp_table->mask) - spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock); + udptable = udp_get_table_afinfo(afinfo, net); + + if (state->bucket <= udptable->mask) + spin_unlock_bh(&udptable->hash[state->bucket].lock); + return udp_get_first(seq, state->bucket + 1); } return sk; @@ -3062,16 +3090,19 @@ EXPORT_SYMBOL(udp_seq_next); void udp_seq_stop(struct seq_file *seq, void *v) { - struct udp_seq_afinfo *afinfo; struct udp_iter_state *state = seq->private; + struct udp_seq_afinfo *afinfo; + struct udp_table *udptable; if (state->bpf_seq_afinfo) afinfo = state->bpf_seq_afinfo; else afinfo = pde_data(file_inode(seq->file)); - if (state->bucket <= afinfo->udp_table->mask) - spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock); + udptable = udp_get_table_afinfo(afinfo, seq_file_net(seq)); + + if (state->bucket <= udptable->mask) + spin_unlock_bh(&udptable->hash[state->bucket].lock); } EXPORT_SYMBOL(udp_seq_stop); @@ -3184,7 +3215,7 @@ EXPORT_SYMBOL(udp_seq_ops); static struct udp_seq_afinfo udp4_seq_afinfo = { .family = AF_INET, - .udp_table = &udp_table, + .udp_table = NULL, }; static int __net_init udp4_proc_init_net(struct net *net) @@ -3246,7 +3277,7 @@ void __init udp_table_init(struct udp_table *table, const char *name) &table->log, &table->mask, UDP_HTABLE_SIZE_MIN, - 64 * 1024); + UDP_HTABLE_SIZE_MAX); table->hash2 = table->hash + (table->mask + 1); for (i = 0; i <= table->mask; i++) { @@ -3271,7 +3302,7 @@ u32 udp_flow_hashrnd(void) } EXPORT_SYMBOL(udp_flow_hashrnd); -static int __net_init udp_sysctl_init(struct net *net) +static void __net_init udp_sysctl_init(struct net *net) { net->ipv4.sysctl_udp_rmem_min = PAGE_SIZE; net->ipv4.sysctl_udp_wmem_min = PAGE_SIZE; @@ -3279,12 +3310,103 @@ static int __net_init udp_sysctl_init(struct net *net) #ifdef CONFIG_NET_L3_MASTER_DEV net->ipv4.sysctl_udp_l3mdev_accept = 0; #endif +} + +static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries) +{ + struct udp_table *udptable; + int i; + + udptable = kmalloc(sizeof(*udptable), GFP_KERNEL); + if (!udptable) + goto out; + + udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot), + GFP_KERNEL_ACCOUNT); + if (!udptable->hash) + goto free_table; + + udptable->hash2 = udptable->hash + hash_entries; + udptable->mask = hash_entries - 1; + udptable->log = ilog2(hash_entries); + + for (i = 0; i < hash_entries; i++) { + INIT_HLIST_HEAD(&udptable->hash[i].head); + udptable->hash[i].count = 0; + spin_lock_init(&udptable->hash[i].lock); + + INIT_HLIST_HEAD(&udptable->hash2[i].head); + udptable->hash2[i].count = 0; + spin_lock_init(&udptable->hash2[i].lock); + } + + return udptable; + +free_table: + kfree(udptable); +out: + return NULL; +} + +static void __net_exit udp_pernet_table_free(struct net *net) +{ + struct udp_table *udptable = net->ipv4.udp_table; + + if (udptable == &udp_table) + return; + + kvfree(udptable->hash); + kfree(udptable); +} + +static void __net_init udp_set_table(struct net *net) +{ + struct udp_table *udptable; + unsigned int hash_entries; + struct net *old_net; + + if (net_eq(net, &init_net)) + goto fallback; + + old_net = current->nsproxy->net_ns; + hash_entries = READ_ONCE(old_net->ipv4.sysctl_udp_child_hash_entries); + if (!hash_entries) + goto fallback; + + /* Set min to keep the bitmap on stack in udp_lib_get_port() */ + if (hash_entries < UDP_HTABLE_SIZE_MIN_PERNET) + hash_entries = UDP_HTABLE_SIZE_MIN_PERNET; + else + hash_entries = roundup_pow_of_two(hash_entries); + + udptable = udp_pernet_table_alloc(hash_entries); + if (udptable) { + net->ipv4.udp_table = udptable; + } else { + pr_warn("Failed to allocate UDP hash table (entries: %u) " + "for a netns, fallback to the global one\n", + hash_entries); +fallback: + net->ipv4.udp_table = &udp_table; + } +} + +static int __net_init udp_pernet_init(struct net *net) +{ + udp_sysctl_init(net); + udp_set_table(net); return 0; } +static void __net_exit udp_pernet_exit(struct net *net) +{ + udp_pernet_table_free(net); +} + static struct pernet_operations __net_initdata udp_sysctl_ops = { - .init = udp_sysctl_init, + .init = udp_pernet_init, + .exit = udp_pernet_exit, }; #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) @@ -3302,7 +3424,7 @@ static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) return -ENOMEM; afinfo->family = AF_UNSPEC; - afinfo->udp_table = &udp_table; + afinfo->udp_table = NULL; st->bpf_seq_afinfo = afinfo; ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 1ed8c4d78e5c..de3f2d31f510 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -147,13 +147,13 @@ done: static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { - udp_dump(&udp_table, skb, cb, r); + udp_dump(sock_net(cb->skb->sk)->ipv4.udp_table, skb, cb, r); } static int udp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return udp_dump_one(&udp_table, cb, req); + return udp_dump_one(sock_net(cb->skb->sk)->ipv4.udp_table, cb, req); } static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, @@ -225,7 +225,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, static int udp_diag_destroy(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { - return __udp_diag_destroy(in_skb, req, &udp_table); + return __udp_diag_destroy(in_skb, req, sock_net(in_skb->sk)->ipv4.udp_table); } static int udplite_diag_destroy(struct sk_buff *in_skb, diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6d1a4bec2614..aedde65e2268 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -600,10 +600,11 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport) { const struct iphdr *iph = skb_gro_network_header(skb); + struct net *net = dev_net(skb->dev); - return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + return __udp4_lib_lookup(net, iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - inet_sdif(skb), &udp_table, NULL); + inet_sdif(skb), net->ipv4.udp_table, NULL); } INDIRECT_CALLABLE_SCOPE diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e2de3d906c82..9fb2f33ee3a7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -217,7 +217,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, struct sock *sk, *reuse_sk; bool no_reuseport; - if (udptable != &udp_table) + if (udptable != net->ipv4.udp_table) return NULL; /* only UDP is supported */ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport, @@ -298,10 +298,11 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { const struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); - return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, + return __udp6_lib_lookup(net, &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), &udp_table, NULL); + inet6_sdif(skb), net->ipv4.udp_table, NULL); } /* Must be called under rcu_read_lock(). @@ -314,7 +315,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be struct sock *sk; sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, - dif, 0, &udp_table, NULL); + dif, 0, net->ipv4.udp_table, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -689,7 +690,8 @@ static __inline__ int udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); + return __udp6_lib_err(skb, opt, type, code, offset, info, + dev_net(skb->dev)->ipv4.udp_table); } static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) @@ -1063,13 +1065,18 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, __be16 rmt_port, const struct in6_addr *rmt_addr, int dif, int sdif) { + struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(loc_port); - unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum); - unsigned int slot2 = hash2 & udp_table.mask; - struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; - const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); + unsigned int hash2, slot2; + struct udp_hslot *hslot2; + __portpair ports; struct sock *sk; + hash2 = ipv6_portaddr_hash(net, loc_addr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + ports = INET_COMBINED_PORTS(rmt_port, hnum); + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (sk->sk_state == TCP_ESTABLISHED && inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif)) @@ -1123,7 +1130,7 @@ void udp_v6_early_demux(struct sk_buff *skb) INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); + return __udp6_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); } /* @@ -1720,7 +1727,7 @@ EXPORT_SYMBOL(udp6_seq_ops); static struct udp_seq_afinfo udp6_seq_afinfo = { .family = AF_INET6, - .udp_table = &udp_table, + .udp_table = NULL, }; int __net_init udp6_proc_init(struct net *net) @@ -1770,7 +1777,7 @@ struct proto udpv6_prot = { .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp6_sock), - .h.udp_table = &udp_table, + .h.udp_table = NULL, .diag_destroy = udp_abort, }; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 7720d04ed396..e0e10f6bcdc1 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -116,10 +116,11 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport) { const struct ipv6hdr *iph = skb_gro_network_header(skb); + struct net *net = dev_net(skb->dev); - return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, + return __udp6_lib_lookup(net, &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), &udp_table, NULL); + inet6_sdif(skb), net->ipv4.udp_table, NULL); } INDIRECT_CALLABLE_SCOPE |