diff options
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
-rw-r--r-- | net/ipv4/inet_hashtables.c | 117 |
1 files changed, 50 insertions, 67 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 411dd7a90046..942265d65eb3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -65,12 +65,14 @@ static u32 sk_ehashfn(const struct sock *sk) struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, - const unsigned short snum) + const unsigned short snum, + int l3mdev) { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb) { write_pnet(&tb->ib_net, net); + tb->l3mdev = l3mdev; tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; @@ -135,6 +137,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; + int l3mdev; spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; @@ -143,6 +146,8 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) return -ENOENT; } if (tb->port != port) { + l3mdev = inet_sk_bound_l3mdev(sk); + /* NOTE: using tproxy and redirecting skbs to a proxy * on a different listener port breaks the assumption * that the listener socket's icsk_bind_hash is the same @@ -150,12 +155,13 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) * create a new bind bucket for the child here. */ inet_bind_bucket_for_each(tb, &head->chain) { if (net_eq(ib_net(tb), sock_net(sk)) && - tb->port == port) + tb->l3mdev == l3mdev && tb->port == port) break; } if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, - sock_net(sk), head, port); + sock_net(sk), head, port, + l3mdev); if (!tb) { spin_unlock(&head->lock); return -ENOMEM; @@ -228,26 +234,16 @@ static inline int compute_score(struct sock *sk, struct net *net, const int dif, const int sdif, bool exact_dif) { int score = -1; - struct inet_sock *inet = inet_sk(sk); - if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && + if (net_eq(sock_net(sk), net) && sk->sk_num == hnum && !ipv6_only_sock(sk)) { - __be32 rcv_saddr = inet->inet_rcv_saddr; + if (sk->sk_rcv_saddr != daddr) + return -1; + + if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) + return -1; + score = sk->sk_family == PF_INET ? 2 : 1; - if (rcv_saddr) { - if (rcv_saddr != daddr) - return -1; - score += 4; - } - if (sk->sk_bound_dev_if || exact_dif) { - bool dev_match = (sk->sk_bound_dev_if == dif || - sk->sk_bound_dev_if == sdif); - - if (!dev_match) - return -1; - if (sk->sk_bound_dev_if) - score += 4; - } if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; } @@ -303,26 +299,12 @@ struct sock *__inet_lookup_listener(struct net *net, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { - unsigned int hash = inet_lhashfn(net, hnum); - struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - bool exact_dif = inet_exact_dif_match(net, skb); struct inet_listen_hashbucket *ilb2; - struct sock *sk, *result = NULL; - int score, hiscore = 0; + struct sock *result = NULL; unsigned int hash2; - u32 phash = 0; - - if (ilb->count <= 10 || !hashinfo->lhash2) - goto port_lookup; - - /* Too many sk in the ilb bucket (which is hashed by port alone). - * Try lhash2 (which is hashed by port and addr) instead. - */ hash2 = ipv4_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); - if (ilb2->count > ilb->count) - goto port_lookup; result = inet_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, @@ -331,34 +313,12 @@ struct sock *__inet_lookup_listener(struct net *net, goto done; /* Lookup lhash2 with INADDR_ANY */ - hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); - if (ilb2->count > ilb->count) - goto port_lookup; result = inet_lhash2_lookup(net, ilb2, skb, doff, - saddr, sport, daddr, hnum, + saddr, sport, htonl(INADDR_ANY), hnum, dif, sdif); - goto done; - -port_lookup: - sk_for_each_rcu(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, - dif, sdif, exact_dif); - if (score > hiscore) { - if (sk->sk_reuseport) { - phash = inet_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, phash, - skb, doff); - if (result) - goto done; - } - result = sk; - hiscore = score; - } - } done: if (unlikely(IS_ERR(result))) return NULL; @@ -675,6 +635,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, u32 remaining, offset; int ret, i, low, high; static u32 hint; + int l3mdev; if (port) { head = &hinfo->bhash[inet_bhashfn(net, port, @@ -693,6 +654,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, return ret; } + l3mdev = inet_sk_bound_l3mdev(sk); + inet_get_local_port_range(net, &low, &high); high++; /* [32768, 60999] -> [32768, 61000[ */ remaining = high - low; @@ -719,7 +682,8 @@ other_parity_scan: * the established check is already unique enough. */ inet_bind_bucket_for_each(tb, &head->chain) { - if (net_eq(ib_net(tb), net) && tb->port == port) { + if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && + tb->port == port) { if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) goto next_port; @@ -732,7 +696,7 @@ other_parity_scan: } tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, - net, head, port); + net, head, port, l3mdev); if (!tb) { spin_unlock_bh(&head->lock); return -ENOMEM; @@ -798,13 +762,22 @@ void inet_hashinfo_init(struct inet_hashinfo *h) } EXPORT_SYMBOL_GPL(inet_hashinfo_init); +static void init_hashinfo_lhash2(struct inet_hashinfo *h) +{ + int i; + + for (i = 0; i <= h->lhash2_mask; i++) { + spin_lock_init(&h->lhash2[i].lock); + INIT_HLIST_HEAD(&h->lhash2[i].head); + h->lhash2[i].count = 0; + } +} + void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit) { - unsigned int i; - h->lhash2 = alloc_large_system_hash(name, sizeof(*h->lhash2), numentries, @@ -814,13 +787,23 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, &h->lhash2_mask, low_limit, high_limit); + init_hashinfo_lhash2(h); +} - for (i = 0; i <= h->lhash2_mask; i++) { - spin_lock_init(&h->lhash2[i].lock); - INIT_HLIST_HEAD(&h->lhash2[i].head); - h->lhash2[i].count = 0; - } +int inet_hashinfo2_init_mod(struct inet_hashinfo *h) +{ + h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL); + if (!h->lhash2) + return -ENOMEM; + + h->lhash2_mask = INET_LHTABLE_SIZE - 1; + /* INET_LHTABLE_SIZE must be a power of 2 */ + BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask); + + init_hashinfo_lhash2(h); + return 0; } +EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod); int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { |