summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2022-11-16 12:43:36 +0300
committerDavid S. Miller <davem@davemloft.net>2022-11-16 12:43:36 +0300
commitfd258f2aba2cfb2babe85b47ce01d0167c61a4db (patch)
tree618c3aae6090f4aec32aaf69683f4660fc9e35f5
parente88225656d302eb369668003a622b7a7baef1f43 (diff)
parent9804985bf27f8fbcf0d96c7435b5ad94a2a6ea20 (diff)
downloadlinux-fd258f2aba2cfb2babe85b47ce01d0167c61a4db.tar.xz
Merge branch 'udp-pernetns-hash'
Kuniyuki Iwashima says: ==================== udp: Introduce optional per-netns hash table. This series is the UDP version of the per-netns ehash series [0], which were initially in the same patch set. [1] The notable difference with TCP is the max table size is 64K and the min size is 128. This is because the possible hash range by udp_hashfn() always fits in 64K within the same netns and because we want to keep a bitmap in udp_lib_get_port() on the stack. Also, the UDP per-netns table isolates both 1-tuple and 2-tuple tables. For details, please see the last patch. patch 1 - 4: prep for per-netns hash table patch 5: add per-netns hash table [0]: https://lore.kernel.org/netdev/20220908011022.45342-1-kuniyu@amazon.com/ [1]: https://lore.kernel.org/netdev/20220826000445.46552-1-kuniyu@amazon.com/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.rst27
-rw-r--r--include/linux/udp.h2
-rw-r--r--include/net/netns/ipv4.h3
-rw-r--r--net/core/filter.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c40
-rw-r--r--net/ipv4/udp.c196
-rw-r--r--net/ipv4/udp_diag.c6
-rw-r--r--net/ipv4/udp_offload.c5
-rw-r--r--net/ipv6/udp.c31
-rw-r--r--net/ipv6/udp_offload.c5
10 files changed, 261 insertions, 58 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 815efc89ad73..727b25cc7ec4 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1177,6 +1177,33 @@ udp_rmem_min - INTEGER
udp_wmem_min - INTEGER
UDP does not have tx memory accounting and this tunable has no effect.
+udp_hash_entries - INTEGER
+ Show the number of hash buckets for UDP sockets in the current
+ networking namespace.
+
+ A negative value means the networking namespace does not own its
+ hash buckets and shares the initial networking namespace's one.
+
+udp_child_ehash_entries - INTEGER
+ Control the number of hash buckets for UDP sockets in the child
+ networking namespace, which must be set before clone() or unshare().
+
+ If the value is not 0, the kernel uses a value rounded up to 2^n
+ as the actual hash bucket size. 0 is a special value, meaning
+ the child networking namespace will share the initial networking
+ namespace's hash buckets.
+
+ Note that the child will use the global one in case the kernel
+ fails to allocate enough memory. In addition, the global hash
+ buckets are spread over available NUMA nodes, but the allocation
+ of the child hash table depends on the current process's NUMA
+ policy, which could result in performance differences.
+
+ Possible values: 0, 2^n (n: 7 (128) - 16 (64K))
+
+ Default: 0
+
+
RAW variables
=============
diff --git a/include/linux/udp.h b/include/linux/udp.h
index dea57aa37df6..a2892e151644 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -23,7 +23,9 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
return (struct udphdr *)skb_transport_header(skb);
}
+#define UDP_HTABLE_SIZE_MIN_PERNET 128
#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
+#define UDP_HTABLE_SIZE_MAX 65536
static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
{
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 25f90bba4889..db762e35aca9 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -43,6 +43,7 @@ struct tcp_fastopen_context;
struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
+ struct udp_table *udp_table;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -207,6 +208,8 @@ struct netns_ipv4 {
atomic_t dev_addr_genid;
+ unsigned int sysctl_udp_child_hash_entries;
+
#ifdef CONFIG_SYSCTL
unsigned long *sysctl_local_reserved_ports;
int sysctl_ip_prot_sock;
diff --git a/net/core/filter.c b/net/core/filter.c
index 6dd2baf5eeb2..da4f697e4fa4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6432,7 +6432,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
else
sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
dst4, tuple->ipv4.dport,
- dif, sdif, &udp_table, NULL);
+ dif, sdif, net->ipv4.udp_table, NULL);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
@@ -6448,7 +6448,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
src6, tuple->ipv6.sport,
dst6, tuple->ipv6.dport,
dif, sdif,
- &udp_table, NULL);
+ net->ipv4.udp_table, NULL);
#endif
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0af28cedd071..0d0cc4ef2b85 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -40,6 +40,7 @@ static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
+static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX;
static int tcp_plb_max_rounds = 31;
static int tcp_plb_max_cong_thresh = 256;
@@ -402,12 +403,36 @@ static int proc_tcp_ehash_entries(struct ctl_table *table, int write,
if (!net_eq(net, &init_net) && !hinfo->pernet)
tcp_ehash_entries *= -1;
+ memset(&tbl, 0, sizeof(tbl));
tbl.data = &tcp_ehash_entries;
tbl.maxlen = sizeof(int);
return proc_dointvec(&tbl, write, buffer, lenp, ppos);
}
+static int proc_udp_hash_entries(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_udp_child_hash_entries);
+ int udp_hash_entries;
+ struct ctl_table tbl;
+
+ udp_hash_entries = net->ipv4.udp_table->mask + 1;
+
+ /* A negative number indicates that the child netns
+ * shares the global udp_table.
+ */
+ if (!net_eq(net, &init_net) && net->ipv4.udp_table == &udp_table)
+ udp_hash_entries *= -1;
+
+ memset(&tbl, 0, sizeof(tbl));
+ tbl.data = &udp_hash_entries;
+ tbl.maxlen = sizeof(int);
+
+ return proc_dointvec(&tbl, write, buffer, lenp, ppos);
+}
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
void *buffer, size_t *lenp,
@@ -1362,6 +1387,21 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = &tcp_child_ehash_entries_max,
},
{
+ .procname = "udp_hash_entries",
+ .data = &init_net.ipv4.sysctl_udp_child_hash_entries,
+ .mode = 0444,
+ .proc_handler = proc_udp_hash_entries,
+ },
+ {
+ .procname = "udp_child_hash_entries",
+ .data = &init_net.ipv4.sysctl_udp_child_hash_entries,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &udp_child_hash_entries_max,
+ },
+ {
.procname = "udp_rmem_min",
.data = &init_net.ipv4.sysctl_udp_rmem_min,
.maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min),
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b859d6c8298e..1fb7d1ed1cb1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -129,7 +129,12 @@ DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc);
EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc);
#define MAX_UDP_PORTS 65536
-#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
+#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN_PERNET)
+
+static struct udp_table *udp_get_table_prot(struct sock *sk)
+{
+ return sk->sk_prot->h.udp_table ? : sock_net(sk)->ipv4.udp_table;
+}
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
@@ -232,16 +237,16 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
int udp_lib_get_port(struct sock *sk, unsigned short snum,
unsigned int hash2_nulladdr)
{
+ struct udp_table *udptable = udp_get_table_prot(sk);
struct udp_hslot *hslot, *hslot2;
- struct udp_table *udptable = sk->sk_prot->h.udp_table;
- int error = 1;
struct net *net = sock_net(sk);
+ int error = 1;
if (!snum) {
+ DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
+ unsigned short first, last;
int low, high, remaining;
unsigned int rand;
- unsigned short first, last;
- DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
@@ -467,7 +472,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net,
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (udptable != &udp_table)
+ if (udptable != net->ipv4.udp_table)
return NULL; /* only UDP is supported */
no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport,
@@ -548,10 +553,11 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
const struct iphdr *iph = ip_hdr(skb);
+ struct net *net = dev_net(skb->dev);
- return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
+ return __udp4_lib_lookup(net, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), &udp_table, NULL);
+ inet_sdif(skb), net->ipv4.udp_table, NULL);
}
/* Must be called under rcu_read_lock().
@@ -564,7 +570,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
struct sock *sk;
sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
- dif, 0, &udp_table, NULL);
+ dif, 0, net->ipv4.udp_table, NULL);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
@@ -802,7 +808,7 @@ out:
int udp_err(struct sk_buff *skb, u32 info)
{
- return __udp4_lib_err(skb, info, &udp_table);
+ return __udp4_lib_err(skb, info, dev_net(skb->dev)->ipv4.udp_table);
}
/*
@@ -1999,7 +2005,7 @@ EXPORT_SYMBOL(udp_disconnect);
void udp_lib_unhash(struct sock *sk)
{
if (sk_hashed(sk)) {
- struct udp_table *udptable = sk->sk_prot->h.udp_table;
+ struct udp_table *udptable = udp_get_table_prot(sk);
struct udp_hslot *hslot, *hslot2;
hslot = udp_hashslot(udptable, sock_net(sk),
@@ -2030,7 +2036,7 @@ EXPORT_SYMBOL(udp_lib_unhash);
void udp_lib_rehash(struct sock *sk, u16 newhash)
{
if (sk_hashed(sk)) {
- struct udp_table *udptable = sk->sk_prot->h.udp_table;
+ struct udp_table *udptable = udp_get_table_prot(sk);
struct udp_hslot *hslot, *hslot2, *nhslot2;
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
@@ -2519,10 +2525,14 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
__be16 rmt_port, __be32 rmt_addr,
int dif, int sdif)
{
- struct sock *sk, *result;
+ struct udp_table *udptable = net->ipv4.udp_table;
unsigned short hnum = ntohs(loc_port);
- unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
- struct udp_hslot *hslot = &udp_table.hash[slot];
+ struct sock *sk, *result;
+ struct udp_hslot *hslot;
+ unsigned int slot;
+
+ slot = udp_hashfn(net, hnum, udptable->mask);
+ hslot = &udptable->hash[slot];
/* Do not bother scanning a too big list */
if (hslot->count > 10)
@@ -2550,14 +2560,19 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
__be16 rmt_port, __be32 rmt_addr,
int dif, int sdif)
{
- unsigned short hnum = ntohs(loc_port);
- unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
- unsigned int slot2 = hash2 & udp_table.mask;
- struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
+ struct udp_table *udptable = net->ipv4.udp_table;
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
- const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+ unsigned short hnum = ntohs(loc_port);
+ unsigned int hash2, slot2;
+ struct udp_hslot *hslot2;
+ __portpair ports;
struct sock *sk;
+ hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+ ports = INET_COMBINED_PORTS(rmt_port, hnum);
+
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (inet_match(net, sk, acookie, ports, dif, sdif))
return sk;
@@ -2637,7 +2652,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
int udp_rcv(struct sk_buff *skb)
{
- return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
+ return __udp4_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP);
}
void udp_destroy_sock(struct sock *sk)
@@ -2960,7 +2975,7 @@ struct proto udp_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp_sock),
- .h.udp_table = &udp_table,
+ .h.udp_table = NULL,
.diag_destroy = udp_abort,
};
EXPORT_SYMBOL(udp_prot);
@@ -2968,21 +2983,30 @@ EXPORT_SYMBOL(udp_prot);
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
+static struct udp_table *udp_get_table_afinfo(struct udp_seq_afinfo *afinfo,
+ struct net *net)
+{
+ return afinfo->udp_table ? : net->ipv4.udp_table;
+}
+
static struct sock *udp_get_first(struct seq_file *seq, int start)
{
- struct sock *sk;
- struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ struct udp_seq_afinfo *afinfo;
+ struct udp_table *udptable;
+ struct sock *sk;
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
afinfo = pde_data(file_inode(seq->file));
- for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
+ udptable = udp_get_table_afinfo(afinfo, net);
+
+ for (state->bucket = start; state->bucket <= udptable->mask;
++state->bucket) {
- struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
+ struct udp_hslot *hslot = &udptable->hash[state->bucket];
if (hlist_empty(&hslot->head))
continue;
@@ -3004,9 +3028,10 @@ found:
static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
{
- struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ struct udp_seq_afinfo *afinfo;
+ struct udp_table *udptable;
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
@@ -3020,8 +3045,11 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
sk->sk_family != afinfo->family)));
if (!sk) {
- if (state->bucket <= afinfo->udp_table->mask)
- spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
+ udptable = udp_get_table_afinfo(afinfo, net);
+
+ if (state->bucket <= udptable->mask)
+ spin_unlock_bh(&udptable->hash[state->bucket].lock);
+
return udp_get_first(seq, state->bucket + 1);
}
return sk;
@@ -3062,16 +3090,19 @@ EXPORT_SYMBOL(udp_seq_next);
void udp_seq_stop(struct seq_file *seq, void *v)
{
- struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private;
+ struct udp_seq_afinfo *afinfo;
+ struct udp_table *udptable;
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
afinfo = pde_data(file_inode(seq->file));
- if (state->bucket <= afinfo->udp_table->mask)
- spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
+ udptable = udp_get_table_afinfo(afinfo, seq_file_net(seq));
+
+ if (state->bucket <= udptable->mask)
+ spin_unlock_bh(&udptable->hash[state->bucket].lock);
}
EXPORT_SYMBOL(udp_seq_stop);
@@ -3184,7 +3215,7 @@ EXPORT_SYMBOL(udp_seq_ops);
static struct udp_seq_afinfo udp4_seq_afinfo = {
.family = AF_INET,
- .udp_table = &udp_table,
+ .udp_table = NULL,
};
static int __net_init udp4_proc_init_net(struct net *net)
@@ -3246,7 +3277,7 @@ void __init udp_table_init(struct udp_table *table, const char *name)
&table->log,
&table->mask,
UDP_HTABLE_SIZE_MIN,
- 64 * 1024);
+ UDP_HTABLE_SIZE_MAX);
table->hash2 = table->hash + (table->mask + 1);
for (i = 0; i <= table->mask; i++) {
@@ -3271,7 +3302,7 @@ u32 udp_flow_hashrnd(void)
}
EXPORT_SYMBOL(udp_flow_hashrnd);
-static int __net_init udp_sysctl_init(struct net *net)
+static void __net_init udp_sysctl_init(struct net *net)
{
net->ipv4.sysctl_udp_rmem_min = PAGE_SIZE;
net->ipv4.sysctl_udp_wmem_min = PAGE_SIZE;
@@ -3279,12 +3310,103 @@ static int __net_init udp_sysctl_init(struct net *net)
#ifdef CONFIG_NET_L3_MASTER_DEV
net->ipv4.sysctl_udp_l3mdev_accept = 0;
#endif
+}
+
+static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries)
+{
+ struct udp_table *udptable;
+ int i;
+
+ udptable = kmalloc(sizeof(*udptable), GFP_KERNEL);
+ if (!udptable)
+ goto out;
+
+ udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot),
+ GFP_KERNEL_ACCOUNT);
+ if (!udptable->hash)
+ goto free_table;
+
+ udptable->hash2 = udptable->hash + hash_entries;
+ udptable->mask = hash_entries - 1;
+ udptable->log = ilog2(hash_entries);
+
+ for (i = 0; i < hash_entries; i++) {
+ INIT_HLIST_HEAD(&udptable->hash[i].head);
+ udptable->hash[i].count = 0;
+ spin_lock_init(&udptable->hash[i].lock);
+
+ INIT_HLIST_HEAD(&udptable->hash2[i].head);
+ udptable->hash2[i].count = 0;
+ spin_lock_init(&udptable->hash2[i].lock);
+ }
+
+ return udptable;
+
+free_table:
+ kfree(udptable);
+out:
+ return NULL;
+}
+
+static void __net_exit udp_pernet_table_free(struct net *net)
+{
+ struct udp_table *udptable = net->ipv4.udp_table;
+
+ if (udptable == &udp_table)
+ return;
+
+ kvfree(udptable->hash);
+ kfree(udptable);
+}
+
+static void __net_init udp_set_table(struct net *net)
+{
+ struct udp_table *udptable;
+ unsigned int hash_entries;
+ struct net *old_net;
+
+ if (net_eq(net, &init_net))
+ goto fallback;
+
+ old_net = current->nsproxy->net_ns;
+ hash_entries = READ_ONCE(old_net->ipv4.sysctl_udp_child_hash_entries);
+ if (!hash_entries)
+ goto fallback;
+
+ /* Set min to keep the bitmap on stack in udp_lib_get_port() */
+ if (hash_entries < UDP_HTABLE_SIZE_MIN_PERNET)
+ hash_entries = UDP_HTABLE_SIZE_MIN_PERNET;
+ else
+ hash_entries = roundup_pow_of_two(hash_entries);
+
+ udptable = udp_pernet_table_alloc(hash_entries);
+ if (udptable) {
+ net->ipv4.udp_table = udptable;
+ } else {
+ pr_warn("Failed to allocate UDP hash table (entries: %u) "
+ "for a netns, fallback to the global one\n",
+ hash_entries);
+fallback:
+ net->ipv4.udp_table = &udp_table;
+ }
+}
+
+static int __net_init udp_pernet_init(struct net *net)
+{
+ udp_sysctl_init(net);
+ udp_set_table(net);
return 0;
}
+static void __net_exit udp_pernet_exit(struct net *net)
+{
+ udp_pernet_table_free(net);
+}
+
static struct pernet_operations __net_initdata udp_sysctl_ops = {
- .init = udp_sysctl_init,
+ .init = udp_pernet_init,
+ .exit = udp_pernet_exit,
};
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
@@ -3302,7 +3424,7 @@ static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
return -ENOMEM;
afinfo->family = AF_UNSPEC;
- afinfo->udp_table = &udp_table;
+ afinfo->udp_table = NULL;
st->bpf_seq_afinfo = afinfo;
ret = bpf_iter_init_seq_net(priv_data, aux);
if (ret)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 1ed8c4d78e5c..de3f2d31f510 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -147,13 +147,13 @@ done:
static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
- udp_dump(&udp_table, skb, cb, r);
+ udp_dump(sock_net(cb->skb->sk)->ipv4.udp_table, skb, cb, r);
}
static int udp_diag_dump_one(struct netlink_callback *cb,
const struct inet_diag_req_v2 *req)
{
- return udp_dump_one(&udp_table, cb, req);
+ return udp_dump_one(sock_net(cb->skb->sk)->ipv4.udp_table, cb, req);
}
static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -225,7 +225,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
static int udp_diag_destroy(struct sk_buff *in_skb,
const struct inet_diag_req_v2 *req)
{
- return __udp_diag_destroy(in_skb, req, &udp_table);
+ return __udp_diag_destroy(in_skb, req, sock_net(in_skb->sk)->ipv4.udp_table);
}
static int udplite_diag_destroy(struct sk_buff *in_skb,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6d1a4bec2614..aedde65e2268 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -600,10 +600,11 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
__be16 dport)
{
const struct iphdr *iph = skb_gro_network_header(skb);
+ struct net *net = dev_net(skb->dev);
- return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
+ return __udp4_lib_lookup(net, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), &udp_table, NULL);
+ inet_sdif(skb), net->ipv4.udp_table, NULL);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e2de3d906c82..9fb2f33ee3a7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -217,7 +217,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net,
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (udptable != &udp_table)
+ if (udptable != net->ipv4.udp_table)
return NULL; /* only UDP is supported */
no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport,
@@ -298,10 +298,11 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct net *net = dev_net(skb->dev);
- return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
+ return __udp6_lib_lookup(net, &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
- inet6_sdif(skb), &udp_table, NULL);
+ inet6_sdif(skb), net->ipv4.udp_table, NULL);
}
/* Must be called under rcu_read_lock().
@@ -314,7 +315,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
struct sock *sk;
sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
- dif, 0, &udp_table, NULL);
+ dif, 0, net->ipv4.udp_table, NULL);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
@@ -689,7 +690,8 @@ static __inline__ int udpv6_err(struct sk_buff *skb,
struct inet6_skb_parm *opt, u8 type,
u8 code, int offset, __be32 info)
{
- return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
+ return __udp6_lib_err(skb, opt, type, code, offset, info,
+ dev_net(skb->dev)->ipv4.udp_table);
}
static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
@@ -1063,13 +1065,18 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
__be16 rmt_port, const struct in6_addr *rmt_addr,
int dif, int sdif)
{
+ struct udp_table *udptable = net->ipv4.udp_table;
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
- unsigned int slot2 = hash2 & udp_table.mask;
- struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
- const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+ unsigned int hash2, slot2;
+ struct udp_hslot *hslot2;
+ __portpair ports;
struct sock *sk;
+ hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+ ports = INET_COMBINED_PORTS(rmt_port, hnum);
+
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (sk->sk_state == TCP_ESTABLISHED &&
inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif))
@@ -1123,7 +1130,7 @@ void udp_v6_early_demux(struct sk_buff *skb)
INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb)
{
- return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
+ return __udp6_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP);
}
/*
@@ -1720,7 +1727,7 @@ EXPORT_SYMBOL(udp6_seq_ops);
static struct udp_seq_afinfo udp6_seq_afinfo = {
.family = AF_INET6,
- .udp_table = &udp_table,
+ .udp_table = NULL,
};
int __net_init udp6_proc_init(struct net *net)
@@ -1770,7 +1777,7 @@ struct proto udpv6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
- .h.udp_table = &udp_table,
+ .h.udp_table = NULL,
.diag_destroy = udp_abort,
};
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 7720d04ed396..e0e10f6bcdc1 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -116,10 +116,11 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
__be16 dport)
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
+ struct net *net = dev_net(skb->dev);
- return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
+ return __udp6_lib_lookup(net, &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
- inet6_sdif(skb), &udp_table, NULL);
+ inet6_sdif(skb), net->ipv4.udp_table, NULL);
}
INDIRECT_CALLABLE_SCOPE