diff options
author | Eric Dumazet <edumazet@google.com> | 2019-03-26 18:34:55 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-03-27 23:59:02 +0300 |
commit | 4f661542a40217713f2cee0bb6678fbb30d9d367 (patch) | |
tree | 8c49f8cb2bb8e14e76b8f912ec39659dc06cb4e5 | |
parent | 4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9 (diff) | |
download | linux-4f661542a40217713f2cee0bb6678fbb30d9d367.tar.xz |
tcp: fix zerocopy and notsent_lowat issues
My recent patch had at least three problems :
1) TX zerocopy wants notification when skb is acknowledged,
thus we need to call skb_zcopy_clear() if the skb is
cached into sk->sk_tx_skb_cache
2) Some applications might expect precise EPOLLOUT
notifications, so we need to update sk->sk_wmem_queued
and call sk_mem_uncharge() from sk_wmem_free_skb()
in all cases. The SOCK_QUEUE_SHRUNK flag must also be set.
3) Reuse of saved skb should have used skb_cloned() instead
of simply checking if the fast clone has been freed.
Fixes: 472c2e07eef0 ("tcp: add one skb cache for tx")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sock.h | 7 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 13 |
2 files changed, 7 insertions, 13 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 577d91fb5626..7fa223278522 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1465,13 +1465,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { + sock_set_flag(sk, SOCK_QUEUE_SHRUNK); + sk->sk_wmem_queued -= skb->truesize; + sk_mem_uncharge(sk, skb->truesize); if (!sk->sk_tx_skb_cache) { + skb_zcopy_clear(skb, true); sk->sk_tx_skb_cache = skb; return; } - sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - sk->sk_wmem_queued -= skb->truesize; - sk_mem_uncharge(sk, skb->truesize); __kfree_skb(skb); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 29b94edf05f9..82bd707c0347 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -865,14 +865,9 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, { struct sk_buff *skb; - skb = sk->sk_tx_skb_cache; - if (skb && !size) { - const struct sk_buff_fclones *fclones; - - fclones = container_of(skb, struct sk_buff_fclones, skb1); - if (refcount_read(&fclones->fclone_ref) == 1) { - sk->sk_wmem_queued -= skb->truesize; - sk_mem_uncharge(sk, skb->truesize); + if (likely(!size)) { + skb = sk->sk_tx_skb_cache; + if (skb && !skb_cloned(skb)) { skb->truesize -= skb->data_len; sk->sk_tx_skb_cache = NULL; pskb_trim(skb, 0); @@ -2543,8 +2538,6 @@ void tcp_write_queue_purge(struct sock *sk) tcp_rtx_queue_purge(sk); skb = sk->sk_tx_skb_cache; if (skb) { - sk->sk_wmem_queued -= skb->truesize; - sk_mem_uncharge(sk, skb->truesize); __kfree_skb(skb); sk->sk_tx_skb_cache = NULL; } |