diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 189 | 
1 files changed, 107 insertions, 82 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c8d28c433b2b..767823764016 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -209,22 +209,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	}  	if (tcp_death_row.sysctl_tw_recycle && -	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { -		struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); -		/* -		 * VJ's idea. We save last timestamp seen from -		 * the destination in peer table, when entering state -		 * TIME-WAIT * and initialize rx_opt.ts_recent from it, -		 * when trying new connection. -		 */ -		if (peer) { -			inet_peer_refcheck(peer); -			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { -				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; -				tp->rx_opt.ts_recent = peer->tcp_ts; -			} -		} -	} +	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) +		tcp_fetch_timewait_stamp(sk, &rt->dst);  	inet->inet_dport = usin->sin_port;  	inet->inet_daddr = daddr; @@ -289,12 +275,15 @@ failure:  EXPORT_SYMBOL(tcp_v4_connect);  /* - * This routine does path mtu discovery as defined in RFC1191. + * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. + * It can be called through tcp_release_cb() if socket was owned by user + * at the time tcp_v4_err() was called to handle ICMP message.   */ -static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) +static void tcp_v4_mtu_reduced(struct sock *sk)  {  	struct dst_entry *dst;  	struct inet_sock *inet = inet_sk(sk); +	u32 mtu = tcp_sk(sk)->mtu_info;  	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs  	 * send out by Linux are always <576bytes so they should go through @@ -303,17 +292,10 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)  	if (sk->sk_state == TCP_LISTEN)  		return; -	/* We don't check in the destentry if pmtu discovery is forbidden -	 * on this route. We just assume that no packet_to_big packets -	 * are send back when pmtu discovery is not active. -	 * There is a small race when the user changes this flag in the -	 * route, but I think that's acceptable. -	 */ -	if ((dst = __sk_dst_check(sk, 0)) == NULL) +	dst = inet_csk_update_pmtu(sk, mtu); +	if (!dst)  		return; -	dst->ops->update_pmtu(dst, mtu); -  	/* Something is about to be wrong... Remember soft error  	 * for the case, if this connection will not able to recover.  	 */ @@ -335,6 +317,14 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)  	} /* else let the usual retransmit timer handle it */  } +static void do_redirect(struct sk_buff *skb, struct sock *sk) +{ +	struct dst_entry *dst = __sk_dst_check(sk, 0); + +	if (dst) +		dst->ops->redirect(dst, sk, skb); +} +  /*   * This routine is called by the ICMP module when it gets some   * sort of error condition.  If err < 0 then the socket should @@ -386,8 +376,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)  	bh_lock_sock(sk);  	/* If too many ICMPs get dropped on busy  	 * servers this needs to be solved differently. +	 * We do take care of PMTU discovery (RFC1191) special case : +	 * we can receive locally generated ICMP messages while socket is held.  	 */ -	if (sock_owned_by_user(sk)) +	if (sock_owned_by_user(sk) && +	    type != ICMP_DEST_UNREACH && +	    code != ICMP_FRAG_NEEDED)  		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);  	if (sk->sk_state == TCP_CLOSE) @@ -408,6 +402,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)  	}  	switch (type) { +	case ICMP_REDIRECT: +		do_redirect(icmp_skb, sk); +		goto out;  	case ICMP_SOURCE_QUENCH:  		/* Just silently ignore these. */  		goto out; @@ -419,8 +416,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)  			goto out;  		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ +			tp->mtu_info = info;  			if (!sock_owned_by_user(sk)) -				do_pmtu_discovery(sk, iph, info); +				tcp_v4_mtu_reduced(sk); +			else +				set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);  			goto out;  		} @@ -698,8 +698,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)  	net = dev_net(skb_dst(skb)->dev);  	arg.tos = ip_hdr(skb)->tos; -	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, -		      &arg, arg.iov[0].iov_len); +	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, +			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);  	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);  	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); @@ -781,8 +781,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,  	if (oif)  		arg.bound_dev_if = oif;  	arg.tos = tos; -	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, -		      &arg, arg.iov[0].iov_len); +	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, +			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);  	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);  } @@ -825,7 +825,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,  static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,  			      struct request_sock *req,  			      struct request_values *rvp, -			      u16 queue_mapping) +			      u16 queue_mapping, +			      bool nocache)  {  	const struct inet_request_sock *ireq = inet_rsk(req);  	struct flowi4 fl4; @@ -848,7 +849,6 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,  		err = net_xmit_eval(err);  	} -	dst_release(dst);  	return err;  } @@ -856,7 +856,7 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,  			      struct request_values *rvp)  {  	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); -	return tcp_v4_send_synack(sk, NULL, req, rvp, 0); +	return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);  }  /* @@ -1317,7 +1317,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  	tcp_clear_options(&tmp_opt);  	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;  	tmp_opt.user_mss  = tp->rx_opt.user_mss; -	tcp_parse_options(skb, &tmp_opt, &hash_location, 0); +	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);  	if (tmp_opt.cookie_plus > 0 &&  	    tmp_opt.saw_tstamp && @@ -1375,7 +1375,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  		isn = cookie_v4_init_sequence(sk, skb, &req->mss);  		req->cookie_ts = tmp_opt.tstamp_ok;  	} else if (!isn) { -		struct inet_peer *peer = NULL;  		struct flowi4 fl4;  		/* VJ's idea. We save last timestamp seen @@ -1390,12 +1389,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  		if (tmp_opt.saw_tstamp &&  		    tcp_death_row.sysctl_tw_recycle &&  		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && -		    fl4.daddr == saddr && -		    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { -			inet_peer_refcheck(peer); -			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && -			    (s32)(peer->tcp_ts - req->ts_recent) > -							TCP_PAWS_WINDOW) { +		    fl4.daddr == saddr) { +			if (!tcp_peer_is_proven(req, dst, true)) {  				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);  				goto drop_and_release;  			} @@ -1404,8 +1399,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  		else if (!sysctl_tcp_syncookies &&  			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <  			  (sysctl_max_syn_backlog >> 2)) && -			 (!peer || !peer->tcp_ts_stamp) && -			 (!dst || !dst_metric(dst, RTAX_RTT))) { +			 !tcp_peer_is_proven(req, dst, false)) {  			/* Without syncookies last quarter of  			 * backlog is filled with destinations,  			 * proven to be alive. @@ -1425,7 +1419,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  	if (tcp_v4_send_synack(sk, dst, req,  			       (struct request_values *)&tmp_ext, -			       skb_get_queue_mapping(skb)) || +			       skb_get_queue_mapping(skb), +			       want_cookie) ||  	    want_cookie)  		goto drop_and_free; @@ -1622,7 +1617,16 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)  #endif  	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ +		struct dst_entry *dst = sk->sk_rx_dst; +  		sock_rps_save_rxhash(sk, skb); +		if (dst) { +			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || +			    dst->ops->check(dst, 0) == NULL) { +				dst_release(dst); +				sk->sk_rx_dst = NULL; +			} +		}  		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {  			rsk = sk;  			goto reset; @@ -1672,6 +1676,44 @@ csum_err:  }  EXPORT_SYMBOL(tcp_v4_do_rcv); +void tcp_v4_early_demux(struct sk_buff *skb) +{ +	struct net *net = dev_net(skb->dev); +	const struct iphdr *iph; +	const struct tcphdr *th; +	struct sock *sk; + +	if (skb->pkt_type != PACKET_HOST) +		return; + +	if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) +		return; + +	iph = ip_hdr(skb); +	th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); + +	if (th->doff < sizeof(struct tcphdr) / 4) +		return; + +	sk = __inet_lookup_established(net, &tcp_hashinfo, +				       iph->saddr, th->source, +				       iph->daddr, ntohs(th->dest), +				       skb->skb_iif); +	if (sk) { +		skb->sk = sk; +		skb->destructor = sock_edemux; +		if (sk->sk_state != TCP_TIME_WAIT) { +			struct dst_entry *dst = sk->sk_rx_dst; + +			if (dst) +				dst = dst_check(dst, 0); +			if (dst && +			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) +				skb_dst_set_noref(skb, dst); +		} +	} +} +  /*   *	From tcp_input.c   */ @@ -1821,49 +1863,29 @@ do_time_wait:  	goto discard_it;  } -struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) -{ -	struct rtable *rt = (struct rtable *) __sk_dst_get(sk); -	struct inet_sock *inet = inet_sk(sk); -	struct inet_peer *peer; - -	if (!rt || -	    inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { -		peer = inet_getpeer_v4(inet->inet_daddr, 1); -		*release_it = true; -	} else { -		if (!rt->peer) -			rt_bind_peer(rt, inet->inet_daddr, 1); -		peer = rt->peer; -		*release_it = false; -	} - -	return peer; -} -EXPORT_SYMBOL(tcp_v4_get_peer); - -void *tcp_v4_tw_get_peer(struct sock *sk) -{ -	const struct inet_timewait_sock *tw = inet_twsk(sk); - -	return inet_getpeer_v4(tw->tw_daddr, 1); -} -EXPORT_SYMBOL(tcp_v4_tw_get_peer); -  static struct timewait_sock_ops tcp_timewait_sock_ops = {  	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),  	.twsk_unique	= tcp_twsk_unique,  	.twsk_destructor= tcp_twsk_destructor, -	.twsk_getpeer	= tcp_v4_tw_get_peer,  }; +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ +	struct dst_entry *dst = skb_dst(skb); + +	dst_hold(dst); +	sk->sk_rx_dst = dst; +	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; +} +EXPORT_SYMBOL(inet_sk_rx_dst_set); +  const struct inet_connection_sock_af_ops ipv4_specific = {  	.queue_xmit	   = ip_queue_xmit,  	.send_check	   = tcp_v4_send_check,  	.rebuild_header	   = inet_sk_rebuild_header, +	.sk_rx_dst_set	   = inet_sk_rx_dst_set,  	.conn_request	   = tcp_v4_conn_request,  	.syn_recv_sock	   = tcp_v4_syn_recv_sock, -	.get_peer	   = tcp_v4_get_peer,  	.net_header_len	   = sizeof(struct iphdr),  	.setsockopt	   = ip_setsockopt,  	.getsockopt	   = ip_getsockopt, @@ -1953,6 +1975,9 @@ void tcp_v4_destroy_sock(struct sock *sk)  		tp->cookie_values = NULL;  	} +	/* If socket is aborted during connect operation */ +	tcp_free_fastopen_req(tp); +  	sk_sockets_allocated_dec(sk);  	sock_release_memcg(sk);  } @@ -2593,6 +2618,8 @@ struct proto tcp_prot = {  	.sendmsg		= tcp_sendmsg,  	.sendpage		= tcp_sendpage,  	.backlog_rcv		= tcp_v4_do_rcv, +	.release_cb		= tcp_release_cb, +	.mtu_reduced		= tcp_v4_mtu_reduced,  	.hash			= inet_hash,  	.unhash			= inet_unhash,  	.get_port		= inet_csk_get_port, @@ -2614,7 +2641,7 @@ struct proto tcp_prot = {  	.compat_setsockopt	= compat_tcp_setsockopt,  	.compat_getsockopt	= compat_tcp_getsockopt,  #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM  	.init_cgroup		= tcp_init_cgroup,  	.destroy_cgroup		= tcp_destroy_cgroup,  	.proto_cgroup		= tcp_proto_cgroup, @@ -2624,13 +2651,11 @@ EXPORT_SYMBOL(tcp_prot);  static int __net_init tcp_sk_init(struct net *net)  { -	return inet_ctl_sock_create(&net->ipv4.tcp_sock, -				    PF_INET, SOCK_RAW, IPPROTO_TCP, net); +	return 0;  }  static void __net_exit tcp_sk_exit(struct net *net)  { -	inet_ctl_sock_destroy(net->ipv4.tcp_sock);  }  static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)  | 
