diff options
Diffstat (limited to 'net/ipv4/tcp.c')
| -rw-r--r-- | net/ipv4/tcp.c | 153 |
1 files changed, 79 insertions, 74 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6596b4feeddc..6c11eece262c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); -int sysctl_tcp_mem[3] __read_mostly; +long sysctl_tcp_mem[3] __read_mostly; int sysctl_tcp_wmem[3] __read_mostly; int sysctl_tcp_rmem[3] __read_mostly; @@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); -atomic_t tcp_memory_allocated; /* Current allocated memory. */ +atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); /* @@ -315,7 +315,6 @@ struct tcp_splice_state { * is strict, actions are advisory and have some latency. */ int tcp_memory_pressure __read_mostly; - EXPORT_SYMBOL(tcp_memory_pressure); void tcp_enter_memory_pressure(struct sock *sk) @@ -325,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk) tcp_memory_pressure = 1; } } - EXPORT_SYMBOL(tcp_enter_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ @@ -388,8 +386,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask = 0; - if (sk->sk_err) - mask = POLLERR; /* * POLLHUP is certainly not done right. But poll() doesn't @@ -453,13 +449,20 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) mask |= POLLOUT | POLLWRNORM; } - } + } else + mask |= POLLOUT | POLLWRNORM; if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; } + /* This barrier is coupled with smp_wmb() in tcp_reset() */ + smp_rmb(); + if (sk->sk_err) + mask |= POLLERR; + return mask; } +EXPORT_SYMBOL(tcp_poll); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { @@ -508,10 +511,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(answ, (int __user *)arg); } +EXPORT_SYMBOL(tcp_ioctl); static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; } @@ -527,7 +531,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; - tcb->flags = TCPCB_FLAG_ACK; + tcb->flags = TCPHDR_ACK; tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); @@ -608,6 +612,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ssize_t spliced; int ret; + sock_rps_record_flow(sk); /* * We can't seek on a socket input */ @@ -675,6 +680,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, return ret; } +EXPORT_SYMBOL(tcp_splice_read); struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) { @@ -815,7 +821,7 @@ new_segment: skb_shinfo(skb)->gso_segs = 0; if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; copied += copy; poffset += copy; @@ -856,15 +862,15 @@ out_err: return sk_stream_error(sk, flags, err); } -ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) +int tcp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { ssize_t res; - struct sock *sk = sock->sk; if (!(sk->sk_route_caps & NETIF_F_SG) || !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) - return sock_no_sendpage(sock, page, offset, size, flags); + return sock_no_sendpage(sk->sk_socket, page, offset, size, + flags); lock_sock(sk); TCP_CHECK_TIMER(sk); @@ -873,6 +879,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, release_sock(sk); return res; } +EXPORT_SYMBOL(tcp_sendpage); #define TCP_PAGE(sk) (sk->sk_sndmsg_page) #define TCP_OFF(sk) (sk->sk_sndmsg_off) @@ -897,10 +904,9 @@ static inline int select_size(struct sock *sk, int sg) return tmp; } -int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, +int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { - struct sock *sk = sock->sk; struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -937,7 +943,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, sg = sk->sk_route_caps & NETIF_F_SG; while (--iovlen >= 0) { - int seglen = iov->iov_len; + size_t seglen = iov->iov_len; unsigned char __user *from = iov->iov_base; iov++; @@ -1061,7 +1067,7 @@ new_segment: } if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; @@ -1121,6 +1127,7 @@ out_err: release_sock(sk); return err; } +EXPORT_SYMBOL(tcp_sendmsg); /* * Handle reading urgent data. BSD has very simple semantics for @@ -1186,7 +1193,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), - KERN_INFO "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", + "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); #endif @@ -1380,6 +1387,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_cleanup_rbuf(sk, copied); return copied; } +EXPORT_SYMBOL(tcp_read_sock); /* * This routine copies from a sock struct into the user buffer. @@ -1469,10 +1477,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * shouldn't happen. */ if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), - KERN_INFO "recvmsg bug: copied %X " - "seq %X rcvnxt %X fl %X\n", *seq, - TCP_SKB_CB(skb)->seq, tp->rcv_nxt, - flags)) + "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n", + *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, + flags)) break; offset = *seq - TCP_SKB_CB(skb)->seq; @@ -1482,10 +1489,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: " - "copied %X seq %X rcvnxt %X fl %X\n", - *seq, TCP_SKB_CB(skb)->seq, - tp->rcv_nxt, flags); + WARN(!(flags & MSG_PEEK), + "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", + *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); } /* Well, if we have backlog, try to process it now yet. */ @@ -1774,6 +1780,7 @@ recv_urg: err = tcp_recv_urg(sk, msg, len, flags); goto out; } +EXPORT_SYMBOL(tcp_recvmsg); void tcp_set_state(struct sock *sk, int state) { @@ -1866,6 +1873,7 @@ void tcp_shutdown(struct sock *sk, int how) tcp_send_fin(sk); } } +EXPORT_SYMBOL(tcp_shutdown); void tcp_close(struct sock *sk, long timeout) { @@ -1898,6 +1906,10 @@ void tcp_close(struct sock *sk, long timeout) sk_mem_reclaim(sk); + /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ + if (sk->sk_state == TCP_CLOSE) + goto adjudge_to_death; + /* As outlined in RFC 2525, section 2.17, we send a RST here because * data was lost. To witness the awful effects of the old behavior of * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk @@ -2001,11 +2013,8 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - int orphan_count = percpu_counter_read_positive( - sk->sk_prot->orphan_count); - sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, orphan_count)) { + if (tcp_too_many_orphans(sk, 0)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -2025,6 +2034,7 @@ out: local_bh_enable(); sock_put(sk); } +EXPORT_SYMBOL(tcp_close); /* These states need RST on ABORT according to RFC793 */ @@ -2098,6 +2108,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_error_report(sk); return err; } +EXPORT_SYMBOL(tcp_disconnect); /* * Socket option code for TCP. @@ -2175,6 +2186,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, GFP_KERNEL); if (cvp == NULL) return -ENOMEM; + + kref_init(&cvp->kref); } lock_sock(sk); tp->rx_opt.cookie_in_always = @@ -2189,12 +2202,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, */ kref_put(&tp->cookie_values->kref, tcp_cookie_values_release); - kref_init(&cvp->kref); - tp->cookie_values = cvp; } else { cvp = tp->cookie_values; } } + if (cvp != NULL) { cvp->cookie_desired = ctd.tcpct_cookie_desired; @@ -2208,6 +2220,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, cvp->s_data_desired = ctd.tcpct_s_data_desired; cvp->s_data_constant = 0; /* false */ } + + tp->cookie_values = cvp; } release_sock(sk); return err; @@ -2230,7 +2244,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ - if (val < 8 || val > MAX_TCP_WINDOW) { + if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { err = -EINVAL; break; } @@ -2376,7 +2390,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = tp->af_specific->md5_parse(sk, optval, optlen); break; #endif - + case TCP_USER_TIMEOUT: + /* Cap the max timeout in ms TCP will retry/retrans + * before giving up and aborting (ETIMEDOUT) a connection. + */ + icsk->icsk_user_timeout = msecs_to_jiffies(val); + break; default: err = -ENOPROTOOPT; break; @@ -2396,6 +2415,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } +EXPORT_SYMBOL(tcp_setsockopt); #ifdef CONFIG_COMPAT int compat_tcp_setsockopt(struct sock *sk, int level, int optname, @@ -2406,7 +2426,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(compat_tcp_setsockopt); #endif @@ -2472,7 +2491,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; } - EXPORT_SYMBOL_GPL(tcp_get_info); static int do_tcp_getsockopt(struct sock *sk, int level, @@ -2590,6 +2608,16 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; } + case TCP_THIN_LINEAR_TIMEOUTS: + val = tp->thin_lto; + break; + case TCP_THIN_DUPACK: + val = tp->thin_dupack; + break; + + case TCP_USER_TIMEOUT: + val = jiffies_to_msecs(icsk->icsk_user_timeout); + break; default: return -ENOPROTOOPT; } @@ -2611,6 +2639,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, optval, optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } +EXPORT_SYMBOL(tcp_getsockopt); #ifdef CONFIG_COMPAT int compat_tcp_getsockopt(struct sock *sk, int level, int optname, @@ -2621,7 +2650,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, optval, optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(compat_tcp_getsockopt); #endif @@ -2858,7 +2886,6 @@ void tcp_free_md5sig_pool(void) if (pool) __tcp_free_md5sig_pool(pool); } - EXPORT_SYMBOL(tcp_free_md5sig_pool); static struct tcp_md5sig_pool * __percpu * @@ -2934,7 +2961,6 @@ retry: } return pool; } - EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2958,7 +2984,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) spin_unlock(&tcp_md5sig_pool_lock); if (p) - return *per_cpu_ptr(p, smp_processor_id()); + return *this_cpu_ptr(p); local_bh_enable(); return NULL; @@ -2986,7 +3012,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, th->check = old_checksum; return err; } - EXPORT_SYMBOL(tcp_md5_hash_header); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, @@ -2999,6 +3024,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, const unsigned head_data_len = skb_headlen(skb) > header_len ? skb_headlen(skb) - header_len : 0; const struct skb_shared_info *shi = skb_shinfo(skb); + struct sk_buff *frag_iter; sg_init_table(&sg, 1); @@ -3013,9 +3039,12 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, return 1; } + skb_walk_frags(skb, frag_iter) + if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) + return 1; + return 0; } - EXPORT_SYMBOL(tcp_md5_hash_skb_data); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) @@ -3025,7 +3054,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) sg_init_one(&sg, key->key, key->keylen); return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); } - EXPORT_SYMBOL(tcp_md5_hash_key); #endif @@ -3192,7 +3220,7 @@ void __init tcp_init(void) { struct sk_buff *skb = NULL; unsigned long nr_pages, limit; - int order, i, max_share; + int i, max_share, cnt; unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3241,22 +3269,12 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); } - /* Try to be a bit smarter and adjust defaults depending - * on available memory. - */ - for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); - order++) - ; - if (order >= 4) { - tcp_death_row.sysctl_max_tw_buckets = 180000; - sysctl_tcp_max_orphans = 4096 << (order - 4); - sysctl_max_syn_backlog = 1024; - } else if (order < 3) { - tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); - sysctl_tcp_max_orphans >>= (3 - order); - sysctl_max_syn_backlog = 128; - } + + cnt = tcp_hashinfo.ehash_mask + 1; + + tcp_death_row.sysctl_max_tw_buckets = cnt / 2; + sysctl_tcp_max_orphans = cnt / 2; + sysctl_max_syn_backlog = max(128, cnt / 256); /* Set the pressure threshold to be a fraction of global memory that * is up to 1/2 at 256 MB, decreasing toward zero with the amount of @@ -3297,16 +3315,3 @@ void __init tcp_init(void) tcp_secret_retiring = &tcp_secret_two; tcp_secret_secondary = &tcp_secret_two; } - -EXPORT_SYMBOL(tcp_close); -EXPORT_SYMBOL(tcp_disconnect); -EXPORT_SYMBOL(tcp_getsockopt); -EXPORT_SYMBOL(tcp_ioctl); -EXPORT_SYMBOL(tcp_poll); -EXPORT_SYMBOL(tcp_read_sock); -EXPORT_SYMBOL(tcp_recvmsg); -EXPORT_SYMBOL(tcp_sendmsg); -EXPORT_SYMBOL(tcp_splice_read); -EXPORT_SYMBOL(tcp_sendpage); -EXPORT_SYMBOL(tcp_setsockopt); -EXPORT_SYMBOL(tcp_shutdown); |
