diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 225 | 
1 files changed, 152 insertions, 73 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7cda24b53f61..15e96030ce47 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -165,10 +165,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  		nexthop = inet->opt->faddr;  	} -	tmp = ip_route_connect(&rt, nexthop, inet->saddr, +	tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,  			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,  			       IPPROTO_TCP, -			       inet->sport, usin->sin_port, sk, 1); +			       inet->inet_sport, usin->sin_port, sk, 1);  	if (tmp < 0) {  		if (tmp == -ENETUNREACH)  			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); @@ -183,11 +183,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	if (!inet->opt || !inet->opt->srr)  		daddr = rt->rt_dst; -	if (!inet->saddr) -		inet->saddr = rt->rt_src; -	inet->rcv_saddr = inet->saddr; +	if (!inet->inet_saddr) +		inet->inet_saddr = rt->rt_src; +	inet->inet_rcv_saddr = inet->inet_saddr; -	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { +	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {  		/* Reset inherited state */  		tp->rx_opt.ts_recent	   = 0;  		tp->rx_opt.ts_recent_stamp = 0; @@ -204,20 +204,20 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  		 * when trying new connection.  		 */  		if (peer != NULL && -		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { +		    (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {  			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;  			tp->rx_opt.ts_recent = peer->tcp_ts;  		}  	} -	inet->dport = usin->sin_port; -	inet->daddr = daddr; +	inet->inet_dport = usin->sin_port; +	inet->inet_daddr = daddr;  	inet_csk(sk)->icsk_ext_hdr_len = 0;  	if (inet->opt)  		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; -	tp->rx_opt.mss_clamp = 536; +	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;  	/* Socket identity is still unknown (sport may be zero).  	 * However we set state to SYN-SENT and not releasing socket @@ -230,7 +230,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  		goto failure;  	err = ip_route_newports(&rt, IPPROTO_TCP, -				inet->sport, inet->dport, sk); +				inet->inet_sport, inet->inet_dport, sk);  	if (err)  		goto failure; @@ -239,12 +239,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	sk_setup_caps(sk, &rt->u.dst);  	if (!tp->write_seq) -		tp->write_seq = secure_tcp_sequence_number(inet->saddr, -							   inet->daddr, -							   inet->sport, +		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, +							   inet->inet_daddr, +							   inet->inet_sport,  							   usin->sin_port); -	inet->id = tp->write_seq ^ jiffies; +	inet->inet_id = tp->write_seq ^ jiffies;  	err = tcp_connect(sk);  	rt = NULL; @@ -261,7 +261,7 @@ failure:  	tcp_set_state(sk, TCP_CLOSE);  	ip_rt_put(rt);  	sk->sk_route_caps = 0; -	inet->dport = 0; +	inet->inet_dport = 0;  	return err;  } @@ -520,12 +520,13 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)  	struct tcphdr *th = tcp_hdr(skb);  	if (skb->ip_summed == CHECKSUM_PARTIAL) { -		th->check = ~tcp_v4_check(len, inet->saddr, -					  inet->daddr, 0); +		th->check = ~tcp_v4_check(len, inet->inet_saddr, +					  inet->inet_daddr, 0);  		skb->csum_start = skb_transport_header(skb) - skb->head;  		skb->csum_offset = offsetof(struct tcphdr, check);  	} else { -		th->check = tcp_v4_check(len, inet->saddr, inet->daddr, +		th->check = tcp_v4_check(len, inet->inet_saddr, +					 inet->inet_daddr,  					 csum_partial(th,  						      th->doff << 2,  						      skb->csum)); @@ -741,8 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,   *	This still operates on a request_sock only, not on a big   *	socket.   */ -static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, -				struct dst_entry *dst) +static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, +				struct request_sock *req, +				struct request_values *rvp)  {  	const struct inet_request_sock *ireq = inet_rsk(req);  	int err = -1; @@ -752,7 +754,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,  	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)  		return -1; -	skb = tcp_make_synack(sk, dst, req); +	skb = tcp_make_synack(sk, dst, req, rvp);  	if (skb) {  		struct tcphdr *th = tcp_hdr(skb); @@ -773,9 +775,10 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,  	return err;  } -static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req) +static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, +			      struct request_values *rvp)  { -	return __tcp_v4_send_synack(sk, req, NULL); +	return __tcp_v4_send_synack(sk, NULL, req, rvp);  }  /* @@ -848,7 +851,7 @@ static struct tcp_md5sig_key *  struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,  					 struct sock *addr_sk)  { -	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); +	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);  }  EXPORT_SYMBOL(tcp_v4_md5_lookup); @@ -923,7 +926,7 @@ EXPORT_SYMBOL(tcp_v4_md5_do_add);  static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,  			       u8 *newkey, u8 newkeylen)  { -	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, +	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,  				 newkey, newkeylen);  } @@ -1089,8 +1092,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,  	__be32 saddr, daddr;  	if (sk) { -		saddr = inet_sk(sk)->saddr; -		daddr = inet_sk(sk)->daddr; +		saddr = inet_sk(sk)->inet_saddr; +		daddr = inet_sk(sk)->inet_daddr;  	} else if (req) {  		saddr = inet_rsk(req)->loc_addr;  		daddr = inet_rsk(req)->rmt_addr; @@ -1210,13 +1213,16 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {  int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  { -	struct inet_request_sock *ireq; +	struct tcp_extend_values tmp_ext;  	struct tcp_options_received tmp_opt; +	u8 *hash_location;  	struct request_sock *req; +	struct inet_request_sock *ireq; +	struct tcp_sock *tp = tcp_sk(sk); +	struct dst_entry *dst = NULL;  	__be32 saddr = ip_hdr(skb)->saddr;  	__be32 daddr = ip_hdr(skb)->daddr;  	__u32 isn = TCP_SKB_CB(skb)->when; -	struct dst_entry *dst = NULL;  #ifdef CONFIG_SYN_COOKIES  	int want_cookie = 0;  #else @@ -1256,27 +1262,65 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;  #endif +	ireq = inet_rsk(req); +	ireq->loc_addr = daddr; +	ireq->rmt_addr = saddr; +	ireq->no_srccheck = inet_sk(sk)->transparent; +	ireq->opt = tcp_v4_save_options(sk, skb); + +	dst = inet_csk_route_req(sk, req); +	if(!dst) +		goto drop_and_free; +  	tcp_clear_options(&tmp_opt); -	tmp_opt.mss_clamp = 536; -	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss; +	tmp_opt.mss_clamp = TCP_MSS_DEFAULT; +	tmp_opt.user_mss  = tp->rx_opt.user_mss; +	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + +	if (tmp_opt.cookie_plus > 0 && +	    tmp_opt.saw_tstamp && +	    !tp->rx_opt.cookie_out_never && +	    (sysctl_tcp_cookie_size > 0 || +	     (tp->cookie_values != NULL && +	      tp->cookie_values->cookie_desired > 0))) { +		u8 *c; +		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; +		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; + +		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) +			goto drop_and_release; + +		/* Secret recipe starts with IP addresses */ +		*mess++ ^= daddr; +		*mess++ ^= saddr; -	tcp_parse_options(skb, &tmp_opt, 0); +		/* plus variable length Initiator Cookie */ +		c = (u8 *)mess; +		while (l-- > 0) +			*c++ ^= *hash_location++; + +#ifdef CONFIG_SYN_COOKIES +		want_cookie = 0;	/* not our kind of cookie */ +#endif +		tmp_ext.cookie_out_never = 0; /* false */ +		tmp_ext.cookie_plus = tmp_opt.cookie_plus; +	} else if (!tp->rx_opt.cookie_in_always) { +		/* redundant indications, but ensure initialization. */ +		tmp_ext.cookie_out_never = 1; /* true */ +		tmp_ext.cookie_plus = 0; +	} else { +		goto drop_and_release; +	} +	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;  	if (want_cookie && !tmp_opt.saw_tstamp)  		tcp_clear_options(&tmp_opt);  	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; -  	tcp_openreq_init(req, &tmp_opt, skb); -	ireq = inet_rsk(req); -	ireq->loc_addr = daddr; -	ireq->rmt_addr = saddr; -	ireq->no_srccheck = inet_sk(sk)->transparent; -	ireq->opt = tcp_v4_save_options(sk, skb); -  	if (security_inet_conn_request(sk, skb, req)) -		goto drop_and_free; +		goto drop_and_release;  	if (!want_cookie)  		TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1301,10 +1345,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  		 */  		if (tmp_opt.saw_tstamp &&  		    tcp_death_row.sysctl_tw_recycle && -		    (dst = inet_csk_route_req(sk, req)) != NULL &&  		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&  		    peer->v4daddr == saddr) { -			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && +			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&  			    (s32)(peer->tcp_ts - req->ts_recent) >  							TCP_PAWS_WINDOW) {  				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); @@ -1333,7 +1376,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  	}  	tcp_rsk(req)->snt_isn = isn; -	if (__tcp_v4_send_synack(sk, req, dst) || want_cookie) +	if (__tcp_v4_send_synack(sk, dst, req, +				 (struct request_values *)&tmp_ext) || +	    want_cookie)  		goto drop_and_free;  	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); @@ -1380,9 +1425,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  	newtp		      = tcp_sk(newsk);  	newinet		      = inet_sk(newsk);  	ireq		      = inet_rsk(req); -	newinet->daddr	      = ireq->rmt_addr; -	newinet->rcv_saddr    = ireq->loc_addr; -	newinet->saddr	      = ireq->loc_addr; +	newinet->inet_daddr   = ireq->rmt_addr; +	newinet->inet_rcv_saddr = ireq->loc_addr; +	newinet->inet_saddr	      = ireq->loc_addr;  	newinet->opt	      = ireq->opt;  	ireq->opt	      = NULL;  	newinet->mc_index     = inet_iif(skb); @@ -1390,7 +1435,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  	inet_csk(newsk)->icsk_ext_hdr_len = 0;  	if (newinet->opt)  		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; -	newinet->id = newtp->write_seq ^ jiffies; +	newinet->inet_id = newtp->write_seq ^ jiffies;  	tcp_mtup_init(newsk);  	tcp_sync_mss(newsk, dst_mtu(dst)); @@ -1403,7 +1448,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  #ifdef CONFIG_TCP_MD5SIG  	/* Copy over the MD5 key from the original socket */ -	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { +	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr); +	if (key != NULL) {  		/*  		 * We're using one, so create a matching key  		 * on the newsk structure. If we fail to get @@ -1412,13 +1458,13 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  		 */  		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);  		if (newkey != NULL) -			tcp_v4_md5_do_add(newsk, newinet->daddr, +			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,  					  newkey, key->keylen);  		newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;  	}  #endif -	__inet_hash_nolisten(newsk); +	__inet_hash_nolisten(newsk, NULL);  	__inet_inherit_port(sk, newsk);  	return newsk; @@ -1711,8 +1757,8 @@ int tcp_v4_remember_stamp(struct sock *sk)  	struct inet_peer *peer = NULL;  	int release_it = 0; -	if (!rt || rt->rt_dst != inet->daddr) { -		peer = inet_getpeer(inet->daddr, 1); +	if (!rt || rt->rt_dst != inet->inet_daddr) { +		peer = inet_getpeer(inet->inet_daddr, 1);  		release_it = 1;  	} else {  		if (!rt->peer) @@ -1722,9 +1768,9 @@ int tcp_v4_remember_stamp(struct sock *sk)  	if (peer) {  		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || -		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && -		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { -			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; +		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && +		     peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { +			peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;  			peer->tcp_ts = tp->rx_opt.ts_recent;  		}  		if (release_it) @@ -1743,9 +1789,9 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)  		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);  		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || -		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && -		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { -			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; +		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && +		     peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { +			peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;  			peer->tcp_ts	   = tcptw->tw_ts_recent;  		}  		inet_putpeer(peer); @@ -1810,7 +1856,7 @@ static int tcp_v4_init_sock(struct sock *sk)  	 */  	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;  	tp->snd_cwnd_clamp = ~0; -	tp->mss_cache = 536; +	tp->mss_cache = TCP_MSS_DEFAULT;  	tp->reordering = sysctl_tcp_reordering;  	icsk->icsk_ca_ops = &tcp_init_congestion_ops; @@ -1826,6 +1872,19 @@ static int tcp_v4_init_sock(struct sock *sk)  	tp->af_specific = &tcp_sock_ipv4_specific;  #endif +	/* TCP Cookie Transactions */ +	if (sysctl_tcp_cookie_size > 0) { +		/* Default, cookies without s_data_payload. */ +		tp->cookie_values = +			kzalloc(sizeof(*tp->cookie_values), +				sk->sk_allocation); +		if (tp->cookie_values != NULL) +			kref_init(&tp->cookie_values->kref); +	} +	/* Presumed zeroed, in order of appearance: +	 *	cookie_in_always, cookie_out_never, +	 *	s_data_constant, s_data_in, s_data_out +	 */  	sk->sk_sndbuf = sysctl_tcp_wmem[1];  	sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -1879,6 +1938,13 @@ void tcp_v4_destroy_sock(struct sock *sk)  		sk->sk_sndmsg_page = NULL;  	} +	/* TCP Cookie Transactions */ +	if (tp->cookie_values != NULL) { +		kref_put(&tp->cookie_values->kref, +			 tcp_cookie_values_release); +		tp->cookie_values = NULL; +	} +  	percpu_counter_dec(&tcp_sockets_allocated);  } @@ -2000,7 +2066,7 @@ static void *established_get_first(struct seq_file *seq)  	struct net *net = seq_file_net(seq);  	void *rc = NULL; -	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { +	for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {  		struct sock *sk;  		struct hlist_nulls_node *node;  		struct inet_timewait_sock *tw; @@ -2061,10 +2127,10 @@ get_tw:  		st->state = TCP_SEQ_STATE_ESTABLISHED;  		/* Look for next non empty bucket */ -		while (++st->bucket < tcp_hashinfo.ehash_size && +		while (++st->bucket <= tcp_hashinfo.ehash_mask &&  				empty_bucket(st))  			; -		if (st->bucket >= tcp_hashinfo.ehash_size) +		if (st->bucket > tcp_hashinfo.ehash_mask)  			return NULL;  		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); @@ -2225,7 +2291,7 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,  		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",  		i,  		ireq->loc_addr, -		ntohs(inet_sk(sk)->sport), +		ntohs(inet_sk(sk)->inet_sport),  		ireq->rmt_addr,  		ntohs(ireq->rmt_port),  		TCP_SYN_RECV, @@ -2248,10 +2314,11 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)  	struct tcp_sock *tp = tcp_sk(sk);  	const struct inet_connection_sock *icsk = inet_csk(sk);  	struct inet_sock *inet = inet_sk(sk); -	__be32 dest = inet->daddr; -	__be32 src = inet->rcv_saddr; -	__u16 destp = ntohs(inet->dport); -	__u16 srcp = ntohs(inet->sport); +	__be32 dest = inet->inet_daddr; +	__be32 src = inet->inet_rcv_saddr; +	__u16 destp = ntohs(inet->inet_dport); +	__u16 srcp = ntohs(inet->inet_sport); +	int rx_queue;  	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {  		timer_active	= 1; @@ -2267,12 +2334,19 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)  		timer_expires = jiffies;  	} +	if (sk->sk_state == TCP_LISTEN) +		rx_queue = sk->sk_ack_backlog; +	else +		/* +		 * because we dont lock socket, we might find a transient negative value +		 */ +		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); +  	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "  			"%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",  		i, src, srcp, dest, destp, sk->sk_state,  		tp->write_seq - tp->snd_una, -		sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : -					     (tp->rcv_nxt - tp->copied_seq), +		rx_queue,  		timer_active,  		jiffies_to_clock_t(timer_expires - jiffies),  		icsk->icsk_retransmits, @@ -2463,12 +2537,17 @@ static int __net_init tcp_sk_init(struct net *net)  static void __net_exit tcp_sk_exit(struct net *net)  {  	inet_ctl_sock_destroy(net->ipv4.tcp_sock); -	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); +} + +static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) +{ +	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);  }  static struct pernet_operations __net_initdata tcp_sk_ops = { -       .init = tcp_sk_init, -       .exit = tcp_sk_exit, +       .init	   = tcp_sk_init, +       .exit	   = tcp_sk_exit, +       .exit_batch = tcp_sk_exit_batch,  };  void __init tcp_v4_init(void)  | 
