diff options
Diffstat (limited to 'net/ipv4/udp.c')
| -rw-r--r-- | net/ipv4/udp.c | 430 | 
1 files changed, 156 insertions, 274 deletions
| diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a2e7f55a1f61..d56c0559b477 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -143,10 +143,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,  			       unsigned int log)  {  	struct sock *sk2; -	struct hlist_nulls_node *node;  	kuid_t uid = sock_i_uid(sk); -	sk_nulls_for_each(sk2, node, &hslot->head) { +	sk_for_each(sk2, &hslot->head) {  		if (net_eq(sock_net(sk2), net) &&  		    sk2 != sk &&  		    (bitmap || udp_sk(sk2)->udp_port_hash == num) && @@ -177,12 +176,11 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,  						  bool match_wildcard))  {  	struct sock *sk2; -	struct hlist_nulls_node *node;  	kuid_t uid = sock_i_uid(sk);  	int res = 0;  	spin_lock(&hslot2->lock); -	udp_portaddr_for_each_entry(sk2, node, &hslot2->head) { +	udp_portaddr_for_each_entry(sk2, &hslot2->head) {  		if (net_eq(sock_net(sk2), net) &&  		    sk2 != sk &&  		    (udp_sk(sk2)->udp_port_hash == num) && @@ -207,11 +205,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,  						    bool match_wildcard))  {  	struct net *net = sock_net(sk); -	struct hlist_nulls_node *node;  	kuid_t uid = sock_i_uid(sk);  	struct sock *sk2; -	sk_nulls_for_each(sk2, node, &hslot->head) { +	sk_for_each(sk2, &hslot->head) {  		if (net_eq(sock_net(sk2), net) &&  		    sk2 != sk &&  		    sk2->sk_family == sk->sk_family && @@ -333,22 +330,23 @@ found:  			goto fail_unlock;  		} -		sk_nulls_add_node_rcu(sk, &hslot->head); +		sk_add_node_rcu(sk, &hslot->head);  		hslot->count++;  		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);  		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);  		spin_lock(&hslot2->lock);  		if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && -			sk->sk_family == AF_INET6) -			hlist_nulls_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, -						 &hslot2->head); +		    sk->sk_family == AF_INET6) +			hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, +					   &hslot2->head);  		else -			hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, -						 &hslot2->head); +			hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, +					   &hslot2->head);  		hslot2->count++;  		spin_unlock(&hslot2->lock);  	} +	sock_set_flag(sk, SOCK_RCU_FREE);  	error = 0;  fail_unlock:  	spin_unlock_bh(&hslot->lock); @@ -502,37 +500,27 @@ static struct sock *udp4_lib_lookup2(struct net *net,  		struct sk_buff *skb)  {  	struct sock *sk, *result; -	struct hlist_nulls_node *node;  	int score, badness, matches = 0, reuseport = 0; -	bool select_ok = true;  	u32 hash = 0; -begin:  	result = NULL;  	badness = 0; -	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { +	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {  		score = compute_score2(sk, net, saddr, sport,  				      daddr, hnum, dif);  		if (score > badness) { -			result = sk; -			badness = score;  			reuseport = sk->sk_reuseport;  			if (reuseport) {  				hash = udp_ehashfn(net, daddr, hnum,  						   saddr, sport); -				if (select_ok) { -					struct sock *sk2; - -					sk2 = reuseport_select_sock(sk, hash, skb, +				result = reuseport_select_sock(sk, hash, skb,  							sizeof(struct udphdr)); -					if (sk2) { -						result = sk2; -						select_ok = false; -						goto found; -					} -				} +				if (result) +					return result;  				matches = 1;  			} +			badness = score; +			result = sk;  		} else if (score == badness && reuseport) {  			matches++;  			if (reciprocal_scale(hash, matches) == 0) @@ -540,23 +528,6 @@ begin:  			hash = next_pseudo_random32(hash);  		}  	} -	/* -	 * if the nulls value we got at the end of this lookup is -	 * not the expected one, we must restart lookup. -	 * We probably met an item that was moved to another chain. -	 */ -	if (get_nulls_value(node) != slot2) -		goto begin; -	if (result) { -found: -		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) -			result = NULL; -		else if (unlikely(compute_score2(result, net, saddr, sport, -				  daddr, hnum, dif) < badness)) { -			sock_put(result); -			goto begin; -		} -	}  	return result;  } @@ -568,15 +539,12 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,  		int dif, struct udp_table *udptable, struct sk_buff *skb)  {  	struct sock *sk, *result; -	struct hlist_nulls_node *node;  	unsigned short hnum = ntohs(dport);  	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);  	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];  	int score, badness, matches = 0, reuseport = 0; -	bool select_ok = true;  	u32 hash = 0; -	rcu_read_lock();  	if (hslot->count > 10) {  		hash2 = udp4_portaddr_hash(net, daddr, hnum);  		slot2 = hash2 & udptable->mask; @@ -598,35 +566,27 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,  						  htonl(INADDR_ANY), hnum, dif,  						  hslot2, slot2, skb);  		} -		rcu_read_unlock();  		return result;  	}  begin:  	result = NULL;  	badness = 0; -	sk_nulls_for_each_rcu(sk, node, &hslot->head) { +	sk_for_each_rcu(sk, &hslot->head) {  		score = compute_score(sk, net, saddr, hnum, sport,  				      daddr, dport, dif);  		if (score > badness) { -			result = sk; -			badness = score;  			reuseport = sk->sk_reuseport;  			if (reuseport) {  				hash = udp_ehashfn(net, daddr, hnum,  						   saddr, sport); -				if (select_ok) { -					struct sock *sk2; - -					sk2 = reuseport_select_sock(sk, hash, skb, +				result = reuseport_select_sock(sk, hash, skb,  							sizeof(struct udphdr)); -					if (sk2) { -						result = sk2; -						select_ok = false; -						goto found; -					} -				} +				if (result) +					return result;  				matches = 1;  			} +			result = sk; +			badness = score;  		} else if (score == badness && reuseport) {  			matches++;  			if (reciprocal_scale(hash, matches) == 0) @@ -634,25 +594,6 @@ begin:  			hash = next_pseudo_random32(hash);  		}  	} -	/* -	 * if the nulls value we got at the end of this lookup is -	 * not the expected one, we must restart lookup. -	 * We probably met an item that was moved to another chain. -	 */ -	if (get_nulls_value(node) != slot) -		goto begin; - -	if (result) { -found: -		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) -			result = NULL; -		else if (unlikely(compute_score(result, net, saddr, hnum, sport, -				  daddr, dport, dif) < badness)) { -			sock_put(result); -			goto begin; -		} -	} -	rcu_read_unlock();  	return result;  }  EXPORT_SYMBOL_GPL(__udp4_lib_lookup); @@ -663,18 +604,36 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,  {  	const struct iphdr *iph = ip_hdr(skb); -	return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, +	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,  				 iph->daddr, dport, inet_iif(skb),  				 udptable, skb);  } +struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, +				 __be16 sport, __be16 dport) +{ +	return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table); +} +EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb); + +/* Must be called under rcu_read_lock(). + * Does increment socket refcount. + */ +#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ +    IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)  struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,  			     __be32 daddr, __be16 dport, int dif)  { -	return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, -				 &udp_table, NULL); +	struct sock *sk; + +	sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, +			       dif, &udp_table, NULL); +	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) +		sk = NULL; +	return sk;  }  EXPORT_SYMBOL_GPL(udp4_lib_lookup); +#endif  static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,  				       __be16 loc_port, __be32 loc_addr, @@ -723,7 +682,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)  			iph->saddr, uh->source, skb->dev->ifindex, udptable,  			NULL);  	if (!sk) { -		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); +		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);  		return;	/* No socket for error */  	} @@ -776,7 +735,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)  	sk->sk_err = err;  	sk->sk_error_report(sk);  out: -	sock_put(sk); +	return;  }  void udp_err(struct sk_buff *skb, u32 info) @@ -917,13 +876,13 @@ send:  	err = ip_send_skb(sock_net(sk), skb);  	if (err) {  		if (err == -ENOBUFS && !inet->recverr) { -			UDP_INC_STATS_USER(sock_net(sk), -					   UDP_MIB_SNDBUFERRORS, is_udplite); +			UDP_INC_STATS(sock_net(sk), +				      UDP_MIB_SNDBUFERRORS, is_udplite);  			err = 0;  		}  	} else -		UDP_INC_STATS_USER(sock_net(sk), -				   UDP_MIB_OUTDATAGRAMS, is_udplite); +		UDP_INC_STATS(sock_net(sk), +			      UDP_MIB_OUTDATAGRAMS, is_udplite);  	return err;  } @@ -1032,15 +991,13 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)  		 */  		connected = 1;  	} -	ipc.addr = inet->inet_saddr; +	ipc.sockc.tsflags = sk->sk_tsflags; +	ipc.addr = inet->inet_saddr;  	ipc.oif = sk->sk_bound_dev_if; -	sock_tx_timestamp(sk, &ipc.tx_flags); -  	if (msg->msg_controllen) { -		err = ip_cmsg_send(sock_net(sk), msg, &ipc, -				   sk->sk_family == AF_INET6); +		err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6);  		if (unlikely(err)) {  			kfree(ipc.opt);  			return err; @@ -1065,6 +1022,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)  	saddr = ipc.addr;  	ipc.addr = faddr = daddr; +	sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); +  	if (ipc.opt && ipc.opt->opt.srr) {  		if (!daddr)  			return -EINVAL; @@ -1192,8 +1151,8 @@ out:  	 * seems like overkill.  	 */  	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { -		UDP_INC_STATS_USER(sock_net(sk), -				UDP_MIB_SNDBUFERRORS, is_udplite); +		UDP_INC_STATS(sock_net(sk), +			      UDP_MIB_SNDBUFERRORS, is_udplite);  	}  	return err; @@ -1277,10 +1236,10 @@ static unsigned int first_packet_length(struct sock *sk)  	spin_lock_bh(&rcvq->lock);  	while ((skb = skb_peek(rcvq)) != NULL &&  		udp_lib_checksum_complete(skb)) { -		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, -				 IS_UDPLITE(sk)); -		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, -				 IS_UDPLITE(sk)); +		__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, +				IS_UDPLITE(sk)); +		__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, +				IS_UDPLITE(sk));  		atomic_inc(&sk->sk_drops);  		__skb_unlink(skb, rcvq);  		__skb_queue_tail(&list_kill, skb); @@ -1316,14 +1275,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)  	{  		unsigned int amount = first_packet_length(sk); -		if (amount) -			/* -			 * We will only return the amount -			 * of this packet since that is all -			 * that will be read. -			 */ -			amount -= sizeof(struct udphdr); -  		return put_user(amount, (int __user *)arg);  	} @@ -1347,7 +1298,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,  	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);  	struct sk_buff *skb;  	unsigned int ulen, copied; -	int peeked, off = 0; +	int peeked, peeking, off;  	int err;  	int is_udplite = IS_UDPLITE(sk);  	bool checksum_valid = false; @@ -1357,15 +1308,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,  		return ip_recv_error(sk, msg, len, addr_len);  try_again: +	peeking = off = sk_peek_offset(sk, flags);  	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),  				  &peeked, &off, &err);  	if (!skb) -		goto out; +		return err; -	ulen = skb->len - sizeof(struct udphdr); +	ulen = skb->len;  	copied = len; -	if (copied > ulen) -		copied = ulen; +	if (copied > ulen - off) +		copied = ulen - off;  	else if (copied < ulen)  		msg->msg_flags |= MSG_TRUNC; @@ -1375,18 +1327,16 @@ try_again:  	 * coverage checksum (UDP-Lite), do it before the copy.  	 */ -	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { +	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {  		checksum_valid = !udp_lib_checksum_complete(skb);  		if (!checksum_valid)  			goto csum_copy_err;  	}  	if (checksum_valid || skb_csum_unnecessary(skb)) -		err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), -					    msg, copied); +		err = skb_copy_datagram_msg(skb, off, msg, copied);  	else { -		err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), -						     msg); +		err = skb_copy_and_csum_datagram_msg(skb, off, msg);  		if (err == -EINVAL)  			goto csum_copy_err; @@ -1396,15 +1346,16 @@ try_again:  		trace_kfree_skb(skb, udp_recvmsg);  		if (!peeked) {  			atomic_inc(&sk->sk_drops); -			UDP_INC_STATS_USER(sock_net(sk), -					   UDP_MIB_INERRORS, is_udplite); +			UDP_INC_STATS(sock_net(sk), +				      UDP_MIB_INERRORS, is_udplite);  		} -		goto out_free; +		skb_free_datagram_locked(sk, skb); +		return err;  	}  	if (!peeked) -		UDP_INC_STATS_USER(sock_net(sk), -				UDP_MIB_INDATAGRAMS, is_udplite); +		UDP_INC_STATS(sock_net(sk), +			      UDP_MIB_INDATAGRAMS, is_udplite);  	sock_recv_ts_and_drops(msg, sk, skb); @@ -1417,22 +1368,20 @@ try_again:  		*addr_len = sizeof(*sin);  	}  	if (inet->cmsg_flags) -		ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); +		ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr) + off);  	err = copied;  	if (flags & MSG_TRUNC)  		err = ulen; -out_free: -	skb_free_datagram_locked(sk, skb); -out: +	__skb_free_datagram_locked(sk, skb, peeking ? -err : err);  	return err;  csum_copy_err:  	slow = lock_sock_fast(sk);  	if (!skb_kill_datagram(sk, skb, flags)) { -		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); -		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); +		UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); +		UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);  	}  	unlock_sock_fast(sk, slow); @@ -1479,13 +1428,13 @@ void udp_lib_unhash(struct sock *sk)  		spin_lock_bh(&hslot->lock);  		if (rcu_access_pointer(sk->sk_reuseport_cb))  			reuseport_detach_sock(sk); -		if (sk_nulls_del_node_init_rcu(sk)) { +		if (sk_del_node_init_rcu(sk)) {  			hslot->count--;  			inet_sk(sk)->inet_num = 0;  			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);  			spin_lock(&hslot2->lock); -			hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); +			hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);  			hslot2->count--;  			spin_unlock(&hslot2->lock);  		} @@ -1518,12 +1467,12 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)  			if (hslot2 != nhslot2) {  				spin_lock(&hslot2->lock); -				hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); +				hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);  				hslot2->count--;  				spin_unlock(&hslot2->lock);  				spin_lock(&nhslot2->lock); -				hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, +				hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,  							 &nhslot2->head);  				nhslot2->count++;  				spin_unlock(&nhslot2->lock); @@ -1553,15 +1502,15 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  		sk_incoming_cpu_update(sk);  	} -	rc = sock_queue_rcv_skb(sk, skb); +	rc = __sock_queue_rcv_skb(sk, skb);  	if (rc < 0) {  		int is_udplite = IS_UDPLITE(sk);  		/* Note that an ENOMEM error is charged twice */  		if (rc == -ENOMEM) -			UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, -					 is_udplite); -		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); +			UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, +					is_udplite); +		UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);  		kfree_skb(skb);  		trace_udp_fail_queue_rcv_skb(rc, sk);  		return -1; @@ -1616,7 +1565,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  		/* if we're overly short, let UDP handle it */  		encap_rcv = ACCESS_ONCE(up->encap_rcv); -		if (skb->len > sizeof(struct udphdr) && encap_rcv) { +		if (encap_rcv) {  			int ret;  			/* Verify checksum before giving to encap */ @@ -1625,9 +1574,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  			ret = encap_rcv(sk, skb);  			if (ret <= 0) { -				UDP_INC_STATS_BH(sock_net(sk), -						 UDP_MIB_INDATAGRAMS, -						 is_udplite); +				__UDP_INC_STATS(sock_net(sk), +						UDP_MIB_INDATAGRAMS, +						is_udplite);  				return -ret;  			}  		} @@ -1669,13 +1618,17 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  		}  	} -	if (rcu_access_pointer(sk->sk_filter) && -	    udp_lib_checksum_complete(skb)) -		goto csum_error; +	if (rcu_access_pointer(sk->sk_filter)) { +		if (udp_lib_checksum_complete(skb)) +			goto csum_error; +		if (sk_filter(sk, skb)) +			goto drop; +	} +	udp_csum_pull_header(skb);  	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { -		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, -				 is_udplite); +		__UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, +				is_udplite);  		goto drop;  	} @@ -1694,43 +1647,14 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  	return rc;  csum_error: -	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); +	__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);  drop: -	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); +	__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);  	atomic_inc(&sk->sk_drops);  	kfree_skb(skb);  	return -1;  } -static void flush_stack(struct sock **stack, unsigned int count, -			struct sk_buff *skb, unsigned int final) -{ -	unsigned int i; -	struct sk_buff *skb1 = NULL; -	struct sock *sk; - -	for (i = 0; i < count; i++) { -		sk = stack[i]; -		if (likely(!skb1)) -			skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); - -		if (!skb1) { -			atomic_inc(&sk->sk_drops); -			UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, -					 IS_UDPLITE(sk)); -			UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, -					 IS_UDPLITE(sk)); -		} - -		if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) -			skb1 = NULL; - -		sock_put(sk); -	} -	if (unlikely(skb1)) -		kfree_skb(skb1); -} -  /* For TCP sockets, sk_rx_dst is protected by socket lock   * For UDP, we use xchg() to guard against concurrent changes.   */ @@ -1754,14 +1678,14 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,  				    struct udp_table *udptable,  				    int proto)  { -	struct sock *sk, *stack[256 / sizeof(struct sock *)]; -	struct hlist_nulls_node *node; +	struct sock *sk, *first = NULL;  	unsigned short hnum = ntohs(uh->dest);  	struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); -	int dif = skb->dev->ifindex; -	unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);  	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); -	bool inner_flushed = false; +	unsigned int offset = offsetof(typeof(*sk), sk_node); +	int dif = skb->dev->ifindex; +	struct hlist_node *node; +	struct sk_buff *nskb;  	if (use_hash2) {  		hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & @@ -1772,23 +1696,28 @@ start_lookup:  		offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);  	} -	spin_lock(&hslot->lock); -	sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { -		if (__udp_is_mcast_sock(net, sk, -					uh->dest, daddr, -					uh->source, saddr, -					dif, hnum)) { -			if (unlikely(count == ARRAY_SIZE(stack))) { -				flush_stack(stack, count, skb, ~0); -				inner_flushed = true; -				count = 0; -			} -			stack[count++] = sk; -			sock_hold(sk); +	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { +		if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr, +					 uh->source, saddr, dif, hnum)) +			continue; + +		if (!first) { +			first = sk; +			continue;  		} -	} +		nskb = skb_clone(skb, GFP_ATOMIC); -	spin_unlock(&hslot->lock); +		if (unlikely(!nskb)) { +			atomic_inc(&sk->sk_drops); +			__UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS, +					IS_UDPLITE(sk)); +			__UDP_INC_STATS(net, UDP_MIB_INERRORS, +					IS_UDPLITE(sk)); +			continue; +		} +		if (udp_queue_rcv_skb(sk, nskb) > 0) +			consume_skb(nskb); +	}  	/* Also lookup *:port if we are using hash2 and haven't done so yet. */  	if (use_hash2 && hash2 != hash2_any) { @@ -1796,16 +1725,13 @@ start_lookup:  		goto start_lookup;  	} -	/* -	 * do the slow work with no lock held -	 */ -	if (count) { -		flush_stack(stack, count, skb, count - 1); +	if (first) { +		if (udp_queue_rcv_skb(first, skb) > 0) +			consume_skb(skb);  	} else { -		if (!inner_flushed) -			UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, -					 proto == IPPROTO_UDPLITE); -		consume_skb(skb); +		kfree_skb(skb); +		__UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI, +				proto == IPPROTO_UDPLITE);  	}  	return 0;  } @@ -1902,7 +1828,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,  						 inet_compute_pseudo);  		ret = udp_queue_rcv_skb(sk, skb); -		sock_put(sk);  		/* a return value > 0 means to resubmit the input, but  		 * it wants the return to be -protocol, or 0 @@ -1920,7 +1845,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,  	if (udp_lib_checksum_complete(skb))  		goto csum_error; -	UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); +	__UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);  	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);  	/* @@ -1947,9 +1872,9 @@ csum_error:  			    proto == IPPROTO_UDPLITE ? "Lite" : "",  			    &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),  			    ulen); -	UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); +	__UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);  drop: -	UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); +	__UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);  	kfree_skb(skb);  	return 0;  } @@ -1963,49 +1888,24 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,  						  int dif)  {  	struct sock *sk, *result; -	struct hlist_nulls_node *node;  	unsigned short hnum = ntohs(loc_port); -	unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); +	unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);  	struct udp_hslot *hslot = &udp_table.hash[slot];  	/* Do not bother scanning a too big list */  	if (hslot->count > 10)  		return NULL; -	rcu_read_lock(); -begin: -	count = 0;  	result = NULL; -	sk_nulls_for_each_rcu(sk, node, &hslot->head) { -		if (__udp_is_mcast_sock(net, sk, -					loc_port, loc_addr, -					rmt_port, rmt_addr, -					dif, hnum)) { +	sk_for_each_rcu(sk, &hslot->head) { +		if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr, +					rmt_port, rmt_addr, dif, hnum)) { +			if (result) +				return NULL;  			result = sk; -			++count; -		} -	} -	/* -	 * if the nulls value we got at the end of this lookup is -	 * not the expected one, we must restart lookup. -	 * We probably met an item that was moved to another chain. -	 */ -	if (get_nulls_value(node) != slot) -		goto begin; - -	if (result) { -		if (count != 1 || -		    unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) -			result = NULL; -		else if (unlikely(!__udp_is_mcast_sock(net, result, -						       loc_port, loc_addr, -						       rmt_port, rmt_addr, -						       dif, hnum))) { -			sock_put(result); -			result = NULL;  		}  	} -	rcu_read_unlock(); +  	return result;  } @@ -2018,37 +1918,22 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,  					    __be16 rmt_port, __be32 rmt_addr,  					    int dif)  { -	struct sock *sk, *result; -	struct hlist_nulls_node *node;  	unsigned short hnum = ntohs(loc_port);  	unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);  	unsigned int slot2 = hash2 & udp_table.mask;  	struct udp_hslot *hslot2 = &udp_table.hash2[slot2];  	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);  	const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); +	struct sock *sk; -	rcu_read_lock(); -	result = NULL; -	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { -		if (INET_MATCH(sk, net, acookie, -			       rmt_addr, loc_addr, ports, dif)) -			result = sk; +	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { +		if (INET_MATCH(sk, net, acookie, rmt_addr, +			       loc_addr, ports, dif)) +			return sk;  		/* Only check first socket in chain */  		break;  	} - -	if (result) { -		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) -			result = NULL; -		else if (unlikely(!INET_MATCH(sk, net, acookie, -					      rmt_addr, loc_addr, -					      ports, dif))) { -			sock_put(result); -			result = NULL; -		} -	} -	rcu_read_unlock(); -	return result; +	return NULL;  }  void udp_v4_early_demux(struct sk_buff *skb) @@ -2056,7 +1941,7 @@ void udp_v4_early_demux(struct sk_buff *skb)  	struct net *net = dev_net(skb->dev);  	const struct iphdr *iph;  	const struct udphdr *uh; -	struct sock *sk; +	struct sock *sk = NULL;  	struct dst_entry *dst;  	int dif = skb->dev->ifindex;  	int ours; @@ -2088,11 +1973,9 @@ void udp_v4_early_demux(struct sk_buff *skb)  	} else if (skb->pkt_type == PACKET_HOST) {  		sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,  					     uh->source, iph->saddr, dif); -	} else { -		return;  	} -	if (!sk) +	if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))  		return;  	skb->sk = sk; @@ -2392,14 +2275,13 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)  	for (state->bucket = start; state->bucket <= state->udp_table->mask;  	     ++state->bucket) { -		struct hlist_nulls_node *node;  		struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; -		if (hlist_nulls_empty(&hslot->head)) +		if (hlist_empty(&hslot->head))  			continue;  		spin_lock_bh(&hslot->lock); -		sk_nulls_for_each(sk, node, &hslot->head) { +		sk_for_each(sk, &hslot->head) {  			if (!net_eq(sock_net(sk), net))  				continue;  			if (sk->sk_family == state->family) @@ -2418,7 +2300,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)  	struct net *net = seq_file_net(seq);  	do { -		sk = sk_nulls_next(sk); +		sk = sk_next(sk);  	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));  	if (!sk) { @@ -2627,12 +2509,12 @@ void __init udp_table_init(struct udp_table *table, const char *name)  	table->hash2 = table->hash + (table->mask + 1);  	for (i = 0; i <= table->mask; i++) { -		INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); +		INIT_HLIST_HEAD(&table->hash[i].head);  		table->hash[i].count = 0;  		spin_lock_init(&table->hash[i].lock);  	}  	for (i = 0; i <= table->mask; i++) { -		INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); +		INIT_HLIST_HEAD(&table->hash2[i].head);  		table->hash2[i].count = 0;  		spin_lock_init(&table->hash2[i].lock);  	} | 
