diff options
author | David S. Miller <davem@davemloft.net> | 2014-09-15 22:41:12 +0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-09-15 22:41:12 +0400 |
commit | 437024067ac1fbda7bb3a795e75922f9034672fb (patch) | |
tree | c0686708da032f9f6f842903163fa6da23ef649f | |
parent | 13bb5180e82349d9fcfa11cd17b3c1e7b558a902 (diff) | |
parent | b3d6cb92fd190d720a01075c4d20cdca896663fc (diff) | |
download | linux-437024067ac1fbda7bb3a795e75922f9034672fb.tar.xz |
Merge branch 'tcpflags'
Eric Dumazet says:
====================
tcp: no longer keep around headers in input path
Looking at tcp_try_coalesce() I was wondering why I did :
if (tcp_hdr(from)->fin)
return false;
The answer would be to allow the aggregation, if we simply OR the FIN and PSH
flags eventually present in @from to @to packet. (Note a change is also
needed in skb_try_coalesce() to avoid calling skb_put() with 0 len)
Then, looking at tcp_recvmsg(), I realized we access tcp_hdr(skb)->syn
(and maybe tcp_hdr(skb)->fin) for every packet we process from socket
receive queue.
We have to understand TCP flags are cold in cpu caches most of the time
(assuming TCP timestamps, and that application calls recvmsg() a long
time after incoming packet was processed), and bringing a whole
cache line only to access one bit is not very nice.
It would make sense to use in TCP input path TCP_SKB_CB(skb)->tcp_flags
as we do in output path.
This saves one cache line miss, and TCP tcp_collapse() can avoid dealing
with the headers.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/core/skbuff.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 18 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 1 |
5 files changed, 22 insertions, 32 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c8259ac38745..29f7f0121491 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3936,7 +3936,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return false; if (len <= skb_tailroom(to)) { - BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); + if (len) + BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); *delta_truesize = 0; return true; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 541f26a67ba2..070aeff1b131 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1510,9 +1510,9 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { offset = seq - TCP_SKB_CB(skb)->seq; - if (tcp_hdr(skb)->syn) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) offset--; - if (offset < skb->len || tcp_hdr(skb)->fin) { + if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { *off = offset; return skb; } @@ -1585,7 +1585,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, if (offset + 1 != skb->len) continue; } - if (tcp_hdr(skb)->fin) { + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { sk_eat_skb(sk, skb, false); ++seq; break; @@ -1722,11 +1722,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, break; offset = *seq - TCP_SKB_CB(skb)->seq; - if (tcp_hdr(skb)->syn) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) offset--; if (offset < skb->len) goto found_ok_skb; - if (tcp_hdr(skb)->fin) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; WARN(!(flags & MSG_PEEK), "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", @@ -1959,7 +1959,7 @@ skip_copy: if (used + offset < skb->len) continue; - if (tcp_hdr(skb)->fin) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, copied_early); @@ -2160,8 +2160,10 @@ void tcp_close(struct sock *sk, long timeout) * reader process may not have drained the data yet! */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { - u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - - tcp_hdr(skb)->fin; + u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + len--; data_was_unread += len; __kfree_skb(skb); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f97003ad0af5..ea92f23ffaf1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4093,7 +4093,7 @@ static void tcp_ofo_queue(struct sock *sk) __skb_unlink(skb, &tp->out_of_order_queue); __skb_queue_tail(&sk->sk_receive_queue, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - if (tcp_hdr(skb)->fin) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); } } @@ -4143,9 +4143,6 @@ static bool tcp_try_coalesce(struct sock *sk, *fragstolen = false; - if (tcp_hdr(from)->fin) - return false; - /* Its possible this segment overlaps with prior segment in queue */ if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) return false; @@ -4158,6 +4155,7 @@ static bool tcp_try_coalesce(struct sock *sk, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; + TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; return true; } @@ -4513,7 +4511,7 @@ restart: * - bloated or contains data before "start" or * overlaps to the next one. */ - if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin && + if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && (tcp_win_from_space(skb->truesize) > skb->len || before(TCP_SKB_CB(skb)->seq, start))) { end_of_skbs = false; @@ -4532,30 +4530,18 @@ restart: /* Decided to skip this, advance start seq. */ start = TCP_SKB_CB(skb)->end_seq; } - if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) + if (end_of_skbs || + (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) return; while (before(start, end)) { + int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start); struct sk_buff *nskb; - unsigned int header = skb_headroom(skb); - int copy = SKB_MAX_ORDER(header, 0); - /* Too big header? This can happen with IPv6. */ - if (copy < 0) - return; - if (end - start < copy) - copy = end - start; - nskb = alloc_skb(copy + header, GFP_ATOMIC); + nskb = alloc_skb(copy, GFP_ATOMIC); if (!nskb) return; - skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head); - skb_set_network_header(nskb, (skb_network_header(skb) - - skb->head)); - skb_set_transport_header(nskb, (skb_transport_header(skb) - - skb->head)); - skb_reserve(nskb, header); - memcpy(nskb->head, skb->head, header); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; __skb_queue_before(list, skb, nskb); @@ -4579,8 +4565,7 @@ restart: skb = tcp_collapse_one(sk, skb, list); if (!skb || skb == tail || - tcp_hdr(skb)->syn || - tcp_hdr(skb)->fin) + (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) return; } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7881b96d2b72..006b045716d8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1638,6 +1638,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1835480336ac..de51a88bec6f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1415,6 +1415,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; |