From 9ad7c049f0f79c418e293b1b68cf10d68f54fcdb Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Wed, 8 Jun 2011 11:08:38 +0000 Subject: tcp: RFC2988bis + taking RTT sample from 3WHS for the passive open side This patch lowers the default initRTO from 3secs to 1sec per RFC2988bis. It falls back to 3secs if the SYN or SYN-ACK packet has been retransmitted, AND the TCP timestamp option is not on. It also adds support to take RTT sample during 3WHS on the passive open side, just like its active open counterpart, and uses it, if valid, to seed the initRTO for the data transmission phase. The patch also resets ssthresh to its initial default at the beginning of the data transmission phase, and reduces cwnd to 1 if there has been MORE THAN ONE retransmission during 3WHS per RFC5681. Signed-off-by: H.K. Jerry Chu Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a7d6671e33b8..617dee3ccfb1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -429,8 +429,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) break; icsk->icsk_backoff--; - inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << - icsk->icsk_backoff; + inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : + TCP_TIMEOUT_INIT) << icsk->icsk_backoff; tcp_bound_rto(sk); skb = tcp_write_queue_head(sk); @@ -1384,6 +1384,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->snt_synack = tcp_time_stamp; if (tcp_v4_send_synack(sk, dst, req, (struct request_values *)&tmp_ext) || @@ -1458,6 +1459,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); + if (tcp_rsk(req)->snt_synack) + tcp_valid_rtt_meas(newsk, + tcp_time_stamp - tcp_rsk(req)->snt_synack); + newtp->total_retrans = req->retrans; #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ @@ -1854,7 +1859,7 @@ static int tcp_v4_init_sock(struct sock *sk) * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ - tp->snd_cwnd = 2; + tp->snd_cwnd = TCP_INIT_CWND; /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. -- cgit v1.2.3 From 1eddceadb0d6441cd39b2c38705a8f5fec86e770 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jun 2011 03:45:15 +0000 Subject: net: rfs: enable RFS before first data packet is received MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le jeudi 16 juin 2011 à 23:38 -0400, David Miller a écrit : > From: Ben Hutchings > Date: Fri, 17 Jun 2011 00:50:46 +0100 > > > On Wed, 2011-06-15 at 04:15 +0200, Eric Dumazet wrote: > >> @@ -1594,6 +1594,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) > >> goto discard; > >> > >> if (nsk != sk) { > >> + sock_rps_save_rxhash(nsk, skb->rxhash); > >> if (tcp_child_process(sk, nsk, skb)) { > >> rsk = nsk; > >> goto reset; > >> > > > > I haven't tried this, but it looks reasonable to me. > > > > What about IPv6? The logic in tcp_v6_do_rcv() looks very similar. > > Indeed ipv6 side needs the same fix. > > Eric please add that part and resubmit. And in fact I might stick > this into net-2.6 instead of net-next-2.6 > OK, here is the net-2.6 based one then, thanks ! [PATCH v2] net: rfs: enable RFS before first data packet is received First packet received on a passive tcp flow is not correctly RFS steered. One sock_rps_record_flow() call is missing in inet_accept() But before that, we also must record rxhash when child socket is setup. Signed-off-by: Eric Dumazet CC: Tom Herbert CC: Ben Hutchings CC: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 3 files changed, 3 insertions(+) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9c1926027a26..eae1f676f870 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -676,6 +676,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); + sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a7d6671e33b8..708dc203b034 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1589,6 +1589,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fd28711ba5..87551ca568cd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1644,6 +1644,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) -- cgit v1.2.3 From 6e5714eaf77d79ae1c8b47e3e040ff5411b717ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Aug 2011 20:50:44 -0700 Subject: net: Compute protocol sequence numbers and fragment IDs using MD5. Computers have become a lot faster since we compromised on the partial MD4 hash which we use currently for performance reasons. MD5 is a much safer choice, and is inline with both RFC1948 and other ISS generators (OpenBSD, Solaris, etc.) Furthermore, only having 24-bits of the sequence number be truly unpredictable is a very serious limitation. So the periodic regeneration and 8-bit counter have been removed. We compute and use a full 32-bit sequence number. For ipv6, DCCP was found to use a 32-bit truncated initial sequence number (it needs 43-bits) and that is fixed here as well. Reported-by: Dan Kaminsky Tested-by: Willy Tarreau Signed-off-by: David S. Miller --- drivers/char/random.c | 349 +------------------------------ include/linux/random.h | 12 -- include/net/secure_seq.h | 20 ++ net/core/Makefile | 2 +- net/core/secure_seq.c | 184 ++++++++++++++++ net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 9 +- net/ipv4/inet_hashtables.c | 1 + net/ipv4/inetpeer.c | 1 + net/ipv4/netfilter/nf_nat_proto_common.c | 1 + net/ipv4/route.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/inet6_hashtables.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 14 files changed, 223 insertions(+), 361 deletions(-) create mode 100644 include/net/secure_seq.h create mode 100644 net/core/secure_seq.c (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/drivers/char/random.c b/drivers/char/random.c index 729281961f22..c35a785005b0 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1300,345 +1300,14 @@ ctl_table random_table[] = { }; #endif /* CONFIG_SYSCTL */ -/******************************************************************** - * - * Random functions for networking - * - ********************************************************************/ - -/* - * TCP initial sequence number picking. This uses the random number - * generator to pick an initial secret value. This value is hashed - * along with the TCP endpoint information to provide a unique - * starting point for each pair of TCP endpoints. This defeats - * attacks which rely on guessing the initial TCP sequence number. - * This algorithm was suggested by Steve Bellovin. - * - * Using a very strong hash was taking an appreciable amount of the total - * TCP connection establishment time, so this is a weaker hash, - * compensated for by changing the secret periodically. - */ - -/* F, G and H are basic MD4 functions: selection, majority, parity */ -#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) - -/* - * The generic round function. The application is so specific that - * we don't bother protecting all the arguments with parens, as is generally - * good macro practice, in favor of extra legibility. - * Rotation is separate from addition to prevent recomputation - */ -#define ROUND(f, a, b, c, d, x, s) \ - (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s))) -#define K1 0 -#define K2 013240474631UL -#define K3 015666365641UL - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - -static __u32 twothirdsMD4Transform(__u32 const buf[4], __u32 const in[12]) -{ - __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; - - /* Round 1 */ - ROUND(F, a, b, c, d, in[ 0] + K1, 3); - ROUND(F, d, a, b, c, in[ 1] + K1, 7); - ROUND(F, c, d, a, b, in[ 2] + K1, 11); - ROUND(F, b, c, d, a, in[ 3] + K1, 19); - ROUND(F, a, b, c, d, in[ 4] + K1, 3); - ROUND(F, d, a, b, c, in[ 5] + K1, 7); - ROUND(F, c, d, a, b, in[ 6] + K1, 11); - ROUND(F, b, c, d, a, in[ 7] + K1, 19); - ROUND(F, a, b, c, d, in[ 8] + K1, 3); - ROUND(F, d, a, b, c, in[ 9] + K1, 7); - ROUND(F, c, d, a, b, in[10] + K1, 11); - ROUND(F, b, c, d, a, in[11] + K1, 19); - - /* Round 2 */ - ROUND(G, a, b, c, d, in[ 1] + K2, 3); - ROUND(G, d, a, b, c, in[ 3] + K2, 5); - ROUND(G, c, d, a, b, in[ 5] + K2, 9); - ROUND(G, b, c, d, a, in[ 7] + K2, 13); - ROUND(G, a, b, c, d, in[ 9] + K2, 3); - ROUND(G, d, a, b, c, in[11] + K2, 5); - ROUND(G, c, d, a, b, in[ 0] + K2, 9); - ROUND(G, b, c, d, a, in[ 2] + K2, 13); - ROUND(G, a, b, c, d, in[ 4] + K2, 3); - ROUND(G, d, a, b, c, in[ 6] + K2, 5); - ROUND(G, c, d, a, b, in[ 8] + K2, 9); - ROUND(G, b, c, d, a, in[10] + K2, 13); - - /* Round 3 */ - ROUND(H, a, b, c, d, in[ 3] + K3, 3); - ROUND(H, d, a, b, c, in[ 7] + K3, 9); - ROUND(H, c, d, a, b, in[11] + K3, 11); - ROUND(H, b, c, d, a, in[ 2] + K3, 15); - ROUND(H, a, b, c, d, in[ 6] + K3, 3); - ROUND(H, d, a, b, c, in[10] + K3, 9); - ROUND(H, c, d, a, b, in[ 1] + K3, 11); - ROUND(H, b, c, d, a, in[ 5] + K3, 15); - ROUND(H, a, b, c, d, in[ 9] + K3, 3); - ROUND(H, d, a, b, c, in[ 0] + K3, 9); - ROUND(H, c, d, a, b, in[ 4] + K3, 11); - ROUND(H, b, c, d, a, in[ 8] + K3, 15); - - return buf[1] + b; /* "most hashed" word */ - /* Alternative: return sum of all words? */ -} -#endif - -#undef ROUND -#undef F -#undef G -#undef H -#undef K1 -#undef K2 -#undef K3 - -/* This should not be decreased so low that ISNs wrap too fast. */ -#define REKEY_INTERVAL (300 * HZ) -/* - * Bit layout of the tcp sequence numbers (before adding current time): - * bit 24-31: increased after every key exchange - * bit 0-23: hash(source,dest) - * - * The implementation is similar to the algorithm described - * in the Appendix of RFC 1185, except that - * - it uses a 1 MHz clock instead of a 250 kHz clock - * - it performs a rekey every 5 minutes, which is equivalent - * to a (source,dest) tulple dependent forward jump of the - * clock by 0..2^(HASH_BITS+1) - * - * Thus the average ISN wraparound time is 68 minutes instead of - * 4.55 hours. - * - * SMP cleanup and lock avoidance with poor man's RCU. - * Manfred Spraul - * - */ -#define COUNT_BITS 8 -#define COUNT_MASK ((1 << COUNT_BITS) - 1) -#define HASH_BITS 24 -#define HASH_MASK ((1 << HASH_BITS) - 1) +static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; -static struct keydata { - __u32 count; /* already shifted to the final position */ - __u32 secret[12]; -} ____cacheline_aligned ip_keydata[2]; - -static unsigned int ip_cnt; - -static void rekey_seq_generator(struct work_struct *work); - -static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator); - -/* - * Lock avoidance: - * The ISN generation runs lockless - it's just a hash over random data. - * State changes happen every 5 minutes when the random key is replaced. - * Synchronization is performed by having two copies of the hash function - * state and rekey_seq_generator always updates the inactive copy. - * The copy is then activated by updating ip_cnt. - * The implementation breaks down if someone blocks the thread - * that processes SYN requests for more than 5 minutes. Should never - * happen, and even if that happens only a not perfectly compliant - * ISN is generated, nothing fatal. - */ -static void rekey_seq_generator(struct work_struct *work) +static int __init random_int_secret_init(void) { - struct keydata *keyptr = &ip_keydata[1 ^ (ip_cnt & 1)]; - - get_random_bytes(keyptr->secret, sizeof(keyptr->secret)); - keyptr->count = (ip_cnt & COUNT_MASK) << HASH_BITS; - smp_wmb(); - ip_cnt++; - schedule_delayed_work(&rekey_work, - round_jiffies_relative(REKEY_INTERVAL)); -} - -static inline struct keydata *get_keyptr(void) -{ - struct keydata *keyptr = &ip_keydata[ip_cnt & 1]; - - smp_rmb(); - - return keyptr; -} - -static __init int seqgen_init(void) -{ - rekey_seq_generator(NULL); + get_random_bytes(random_int_secret, sizeof(random_int_secret)); return 0; } -late_initcall(seqgen_init); - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport) -{ - __u32 seq; - __u32 hash[12]; - struct keydata *keyptr = get_keyptr(); - - /* The procedure is the same as for IPv4, but addresses are longer. - * Thus we must use twothirdsMD4Transform. - */ - - memcpy(hash, saddr, 16); - hash[4] = ((__force u16)sport << 16) + (__force u16)dport; - memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7); - - seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK; - seq += keyptr->count; - - seq += ktime_to_ns(ktime_get_real()); - - return seq; -} -EXPORT_SYMBOL(secure_tcpv6_sequence_number); -#endif - -/* The code below is shamelessly stolen from secure_tcp_sequence_number(). - * All blames to Andrey V. Savochkin . - */ -__u32 secure_ip_id(__be32 daddr) -{ - struct keydata *keyptr; - __u32 hash[4]; - - keyptr = get_keyptr(); - - /* - * Pick a unique starting offset for each IP destination. - * The dest ip address is placed in the starting vector, - * which is then hashed with random data. - */ - hash[0] = (__force __u32)daddr; - hash[1] = keyptr->secret[9]; - hash[2] = keyptr->secret[10]; - hash[3] = keyptr->secret[11]; - - return half_md4_transform(hash, keyptr->secret); -} - -__u32 secure_ipv6_id(const __be32 daddr[4]) -{ - const struct keydata *keyptr; - __u32 hash[4]; - - keyptr = get_keyptr(); - - hash[0] = (__force __u32)daddr[0]; - hash[1] = (__force __u32)daddr[1]; - hash[2] = (__force __u32)daddr[2]; - hash[3] = (__force __u32)daddr[3]; - - return half_md4_transform(hash, keyptr->secret); -} - -#ifdef CONFIG_INET - -__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) -{ - __u32 seq; - __u32 hash[4]; - struct keydata *keyptr = get_keyptr(); - - /* - * Pick a unique starting offset for each TCP connection endpoints - * (saddr, daddr, sport, dport). - * Note that the words are placed into the starting vector, which is - * then mixed with a partial MD4 over random data. - */ - hash[0] = (__force u32)saddr; - hash[1] = (__force u32)daddr; - hash[2] = ((__force u16)sport << 16) + (__force u16)dport; - hash[3] = keyptr->secret[11]; - - seq = half_md4_transform(hash, keyptr->secret) & HASH_MASK; - seq += keyptr->count; - /* - * As close as possible to RFC 793, which - * suggests using a 250 kHz clock. - * Further reading shows this assumes 2 Mb/s networks. - * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. - * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but - * we also need to limit the resolution so that the u32 seq - * overlaps less than one time per MSL (2 minutes). - * Choosing a clock of 64 ns period is OK. (period of 274 s) - */ - seq += ktime_to_ns(ktime_get_real()) >> 6; - - return seq; -} - -/* Generate secure starting point for ephemeral IPV4 transport port search */ -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) -{ - struct keydata *keyptr = get_keyptr(); - u32 hash[4]; - - /* - * Pick a unique starting offset for each ephemeral port search - * (saddr, daddr, dport) and 48bits of random data. - */ - hash[0] = (__force u32)saddr; - hash[1] = (__force u32)daddr; - hash[2] = (__force u32)dport ^ keyptr->secret[10]; - hash[3] = keyptr->secret[11]; - - return half_md4_transform(hash, keyptr->secret); -} -EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, - __be16 dport) -{ - struct keydata *keyptr = get_keyptr(); - u32 hash[12]; - - memcpy(hash, saddr, 16); - hash[4] = (__force u32)dport; - memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7); - - return twothirdsMD4Transform((const __u32 *)daddr, hash); -} -#endif - -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) -/* Similar to secure_tcp_sequence_number but generate a 48 bit value - * bit's 32-47 increase every key exchange - * 0-31 hash(source, dest) - */ -u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) -{ - u64 seq; - __u32 hash[4]; - struct keydata *keyptr = get_keyptr(); - - hash[0] = (__force u32)saddr; - hash[1] = (__force u32)daddr; - hash[2] = ((__force u16)sport << 16) + (__force u16)dport; - hash[3] = keyptr->secret[11]; - - seq = half_md4_transform(hash, keyptr->secret); - seq |= ((u64)keyptr->count) << (32 - HASH_BITS); - - seq += ktime_to_ns(ktime_get_real()); - seq &= (1ull << 48) - 1; - - return seq; -} -EXPORT_SYMBOL(secure_dccp_sequence_number); -#endif - -#endif /* CONFIG_INET */ - +late_initcall(random_int_secret_init); /* * Get a random word for internal kernel use only. Similar to urandom but @@ -1646,17 +1315,15 @@ EXPORT_SYMBOL(secure_dccp_sequence_number); * value is not cryptographically secure but for several uses the cost of * depleting entropy is too high */ -DEFINE_PER_CPU(__u32 [4], get_random_int_hash); +DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash); unsigned int get_random_int(void) { - struct keydata *keyptr; __u32 *hash = get_cpu_var(get_random_int_hash); - int ret; + unsigned int ret; - keyptr = get_keyptr(); hash[0] += current->pid + jiffies + get_cycles(); - - ret = half_md4_transform(hash, keyptr->secret); + md5_transform(hash, random_int_secret); + ret = hash[0]; put_cpu_var(get_random_int_hash); return ret; diff --git a/include/linux/random.h b/include/linux/random.h index ce29a040c8dc..d13059f3ea32 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -57,18 +57,6 @@ extern void add_interrupt_randomness(int irq); extern void get_random_bytes(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); -extern __u32 secure_ip_id(__be32 daddr); -extern __u32 secure_ipv6_id(const __be32 daddr[4]); -extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, - __be16 dport); -extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport); -extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); - #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; #endif diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h new file mode 100644 index 000000000000..d97f6892c019 --- /dev/null +++ b/include/net/secure_seq.h @@ -0,0 +1,20 @@ +#ifndef _NET_SECURE_SEQ +#define _NET_SECURE_SEQ + +#include + +extern __u32 secure_ip_id(__be32 daddr); +extern __u32 secure_ipv6_id(const __be32 daddr[4]); +extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport); +extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport); +extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +extern u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport); + +#endif /* _NET_SECURE_SEQ */ diff --git a/net/core/Makefile b/net/core/Makefile index 8a04dd22cf77..0d357b1c4e57 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c new file mode 100644 index 000000000000..45329d7c9dd9 --- /dev/null +++ b/net/core/secure_seq.c @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; + +static int __init net_secret_init(void) +{ + get_random_bytes(net_secret, sizeof(net_secret)); + return 0; +} +late_initcall(net_secret_init); + +static u32 seq_scale(u32 seq) +{ + /* + * As close as possible to RFC 793, which + * suggests using a 250 kHz clock. + * Further reading shows this assumes 2 Mb/s networks. + * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. + * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but + * we also need to limit the resolution so that the u32 seq + * overlaps less than one time per MSL (2 minutes). + * Choosing a clock of 64 ns period is OK. (period of 274 s) + */ + return seq + (ktime_to_ns(ktime_get_real()) >> 6); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return seq_scale(hash[0]); +} +EXPORT_SYMBOL(secure_tcpv6_sequence_number); + +u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + (__force u32) daddr[i]; + secret[4] = net_secret[4] + (__force u32)dport; + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return hash[0]; +} +#endif + +#ifdef CONFIG_INET +__u32 secure_ip_id(__be32 daddr) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force __u32) daddr; + hash[1] = net_secret[13]; + hash[2] = net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_ipv6_id(const __be32 daddr[4]) +{ + __u32 hash[4]; + + memcpy(hash, daddr, 16); + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return seq_scale(hash[0]); +} + +u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = (__force u32)dport ^ net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} +EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); +#endif + +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccp_sequence_number); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccpv6_sequence_number); +#endif +#endif diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8c36adfd1919..332639b56f4d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "ackvec.h" #include "ccid.h" diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8dc4348774a5..b74f76117dcf 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "dccp.h" #include "ipv6.h" @@ -69,13 +70,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } -static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport ) -{ - return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); -} - -static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) +static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) { return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3c0369a3a663..984ec656b03b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -21,6 +21,7 @@ #include #include +#include #include /* diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e38213817d0a..86f13c67ea85 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * Theory of operations. diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 3e61faf23a9a..f52d41ea0690 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6afc4eb50591..e3dec1c9f09d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,6 +109,7 @@ #include #endif #include +#include #define RT_FL_TOS(oldflp4) \ ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 955b8e65b69e..1c12b8ec849d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b53197233709..73f1a00a96af 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -20,6 +20,7 @@ #include #include #include +#include #include int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 78aa53492b3e..d1fb63f4aeb7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -61,6 +61,7 @@ #include #include #include +#include #include -- cgit v1.2.3