diff options
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
| -rw-r--r-- | net/ipv4/inet_hashtables.c | 117 | 
1 files changed, 50 insertions, 67 deletions
| diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 411dd7a90046..942265d65eb3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -65,12 +65,14 @@ static u32 sk_ehashfn(const struct sock *sk)  struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,  						 struct net *net,  						 struct inet_bind_hashbucket *head, -						 const unsigned short snum) +						 const unsigned short snum, +						 int l3mdev)  {  	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);  	if (tb) {  		write_pnet(&tb->ib_net, net); +		tb->l3mdev    = l3mdev;  		tb->port      = snum;  		tb->fastreuse = 0;  		tb->fastreuseport = 0; @@ -135,6 +137,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)  			table->bhash_size);  	struct inet_bind_hashbucket *head = &table->bhash[bhash];  	struct inet_bind_bucket *tb; +	int l3mdev;  	spin_lock(&head->lock);  	tb = inet_csk(sk)->icsk_bind_hash; @@ -143,6 +146,8 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)  		return -ENOENT;  	}  	if (tb->port != port) { +		l3mdev = inet_sk_bound_l3mdev(sk); +  		/* NOTE: using tproxy and redirecting skbs to a proxy  		 * on a different listener port breaks the assumption  		 * that the listener socket's icsk_bind_hash is the same @@ -150,12 +155,13 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)  		 * create a new bind bucket for the child here. */  		inet_bind_bucket_for_each(tb, &head->chain) {  			if (net_eq(ib_net(tb), sock_net(sk)) && -			    tb->port == port) +			    tb->l3mdev == l3mdev && tb->port == port)  				break;  		}  		if (!tb) {  			tb = inet_bind_bucket_create(table->bind_bucket_cachep, -						     sock_net(sk), head, port); +						     sock_net(sk), head, port, +						     l3mdev);  			if (!tb) {  				spin_unlock(&head->lock);  				return -ENOMEM; @@ -228,26 +234,16 @@ static inline int compute_score(struct sock *sk, struct net *net,  				const int dif, const int sdif, bool exact_dif)  {  	int score = -1; -	struct inet_sock *inet = inet_sk(sk); -	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && +	if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&  			!ipv6_only_sock(sk)) { -		__be32 rcv_saddr = inet->inet_rcv_saddr; +		if (sk->sk_rcv_saddr != daddr) +			return -1; + +		if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) +			return -1; +  		score = sk->sk_family == PF_INET ? 2 : 1; -		if (rcv_saddr) { -			if (rcv_saddr != daddr) -				return -1; -			score += 4; -		} -		if (sk->sk_bound_dev_if || exact_dif) { -			bool dev_match = (sk->sk_bound_dev_if == dif || -					  sk->sk_bound_dev_if == sdif); - -			if (!dev_match) -				return -1; -			if (sk->sk_bound_dev_if) -				score += 4; -		}  		if (sk->sk_incoming_cpu == raw_smp_processor_id())  			score++;  	} @@ -303,26 +299,12 @@ struct sock *__inet_lookup_listener(struct net *net,  				    const __be32 daddr, const unsigned short hnum,  				    const int dif, const int sdif)  { -	unsigned int hash = inet_lhashfn(net, hnum); -	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; -	bool exact_dif = inet_exact_dif_match(net, skb);  	struct inet_listen_hashbucket *ilb2; -	struct sock *sk, *result = NULL; -	int score, hiscore = 0; +	struct sock *result = NULL;  	unsigned int hash2; -	u32 phash = 0; - -	if (ilb->count <= 10 || !hashinfo->lhash2) -		goto port_lookup; - -	/* Too many sk in the ilb bucket (which is hashed by port alone). -	 * Try lhash2 (which is hashed by port and addr) instead. -	 */  	hash2 = ipv4_portaddr_hash(net, daddr, hnum);  	ilb2 = inet_lhash2_bucket(hashinfo, hash2); -	if (ilb2->count > ilb->count) -		goto port_lookup;  	result = inet_lhash2_lookup(net, ilb2, skb, doff,  				    saddr, sport, daddr, hnum, @@ -331,34 +313,12 @@ struct sock *__inet_lookup_listener(struct net *net,  		goto done;  	/* Lookup lhash2 with INADDR_ANY */ -  	hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);  	ilb2 = inet_lhash2_bucket(hashinfo, hash2); -	if (ilb2->count > ilb->count) -		goto port_lookup;  	result = inet_lhash2_lookup(net, ilb2, skb, doff, -				    saddr, sport, daddr, hnum, +				    saddr, sport, htonl(INADDR_ANY), hnum,  				    dif, sdif); -	goto done; - -port_lookup: -	sk_for_each_rcu(sk, &ilb->head) { -		score = compute_score(sk, net, hnum, daddr, -				      dif, sdif, exact_dif); -		if (score > hiscore) { -			if (sk->sk_reuseport) { -				phash = inet_ehashfn(net, daddr, hnum, -						     saddr, sport); -				result = reuseport_select_sock(sk, phash, -							       skb, doff); -				if (result) -					goto done; -			} -			result = sk; -			hiscore = score; -		} -	}  done:  	if (unlikely(IS_ERR(result)))  		return NULL; @@ -675,6 +635,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,  	u32 remaining, offset;  	int ret, i, low, high;  	static u32 hint; +	int l3mdev;  	if (port) {  		head = &hinfo->bhash[inet_bhashfn(net, port, @@ -693,6 +654,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,  		return ret;  	} +	l3mdev = inet_sk_bound_l3mdev(sk); +  	inet_get_local_port_range(net, &low, &high);  	high++; /* [32768, 60999] -> [32768, 61000[ */  	remaining = high - low; @@ -719,7 +682,8 @@ other_parity_scan:  		 * the established check is already unique enough.  		 */  		inet_bind_bucket_for_each(tb, &head->chain) { -			if (net_eq(ib_net(tb), net) && tb->port == port) { +			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && +			    tb->port == port) {  				if (tb->fastreuse >= 0 ||  				    tb->fastreuseport >= 0)  					goto next_port; @@ -732,7 +696,7 @@ other_parity_scan:  		}  		tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, -					     net, head, port); +					     net, head, port, l3mdev);  		if (!tb) {  			spin_unlock_bh(&head->lock);  			return -ENOMEM; @@ -798,13 +762,22 @@ void inet_hashinfo_init(struct inet_hashinfo *h)  }  EXPORT_SYMBOL_GPL(inet_hashinfo_init); +static void init_hashinfo_lhash2(struct inet_hashinfo *h) +{ +	int i; + +	for (i = 0; i <= h->lhash2_mask; i++) { +		spin_lock_init(&h->lhash2[i].lock); +		INIT_HLIST_HEAD(&h->lhash2[i].head); +		h->lhash2[i].count = 0; +	} +} +  void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,  				unsigned long numentries, int scale,  				unsigned long low_limit,  				unsigned long high_limit)  { -	unsigned int i; -  	h->lhash2 = alloc_large_system_hash(name,  					    sizeof(*h->lhash2),  					    numentries, @@ -814,13 +787,23 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,  					    &h->lhash2_mask,  					    low_limit,  					    high_limit); +	init_hashinfo_lhash2(h); +} -	for (i = 0; i <= h->lhash2_mask; i++) { -		spin_lock_init(&h->lhash2[i].lock); -		INIT_HLIST_HEAD(&h->lhash2[i].head); -		h->lhash2[i].count = 0; -	} +int inet_hashinfo2_init_mod(struct inet_hashinfo *h) +{ +	h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL); +	if (!h->lhash2) +		return -ENOMEM; + +	h->lhash2_mask = INET_LHTABLE_SIZE - 1; +	/* INET_LHTABLE_SIZE must be a power of 2 */ +	BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask); + +	init_hashinfo_lhash2(h); +	return 0;  } +EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod);  int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)  { | 
