diff options
author | Eric Dumazet <edumazet@google.com> | 2012-05-02 11:55:58 +0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-05-03 05:11:11 +0400 |
commit | 923dd347b8904c24bcac89bf038ed4da87f8aa90 (patch) | |
tree | d329204cb40e4d13e07ffc538fd9978c334ef90c | |
parent | eeb7fc7bc095546b21188e8e076a59bce73f9ca6 (diff) | |
download | linux-923dd347b8904c24bcac89bf038ed4da87f8aa90.tar.xz |
net: take care of cloned skbs in tcp_try_coalesce()
Before stealing fragments or skb head, we must make sure skbs are not
cloned.
Alexander was worried about destination skb being cloned : In bridge
setups, a driver could be fooled if skb->data_len would not match skb
nr_frags.
If source skb is cloned, we must take references on pages instead.
Bug happened using tcpdump (if not using mmap())
Introduce kfree_skb_partial() helper to cleanup code.
Reported-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp_input.c | 42 |
1 files changed, 28 insertions, 14 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7096790e06bf..a8829370f712 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4532,6 +4532,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) * @sk: socket * @to: prior buffer * @from: buffer to add in queue + * @fragstolen: pointer to boolean * * Before queueing skb @from after @to, try to merge them * to reduce overall memory use and queue lengths, if cost is small. @@ -4544,10 +4545,10 @@ static bool tcp_try_coalesce(struct sock *sk, struct sk_buff *from, bool *fragstolen) { - int delta, len = from->len; + int i, delta, len = from->len; *fragstolen = false; - if (tcp_hdr(from)->fin) + if (tcp_hdr(from)->fin || skb_cloned(to)) return false; if (len <= skb_tailroom(to)) { BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); @@ -4574,7 +4575,13 @@ copyfrags: skb_shinfo(from)->frags, skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; - skb_shinfo(from)->nr_frags = 0; + + if (skb_cloned(from)) + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) + skb_frag_ref(from, i); + else + skb_shinfo(from)->nr_frags = 0; + to->truesize += delta; atomic_add(delta, &sk->sk_rmem_alloc); sk_mem_charge(sk, delta); @@ -4592,13 +4599,26 @@ copyfrags: offset = from->data - (unsigned char *)page_address(page); skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, page, offset, skb_headlen(from)); - *fragstolen = true; + + if (skb_cloned(from)) + get_page(page); + else + *fragstolen = true; + delta = len; /* we dont know real truesize... */ goto copyfrags; } return false; } +static void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) +{ + if (head_stolen) + kmem_cache_free(skbuff_head_cache, skb); + else + __kfree_skb(skb); +} + static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -4642,10 +4662,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { __skb_queue_after(&tp->out_of_order_queue, skb1, skb); } else { - if (fragstolen) - kmem_cache_free(skbuff_head_cache, skb); - else - __kfree_skb(skb); + kfree_skb_partial(skb, fragstolen); skb = NULL; } @@ -4804,12 +4821,9 @@ queue_and_out: tcp_fast_path_check(sk); - if (eaten > 0) { - if (fragstolen) - kmem_cache_free(skbuff_head_cache, skb); - else - __kfree_skb(skb); - } else if (!sock_flag(sk, SOCK_DEAD)) + if (eaten > 0) + kfree_skb_partial(skb, fragstolen); + else if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, 0); return; } |