summaryrefslogtreecommitdiff
path: root/include/net/tcp.h
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-10-06 07:24:48 +0300
committerDavid S. Miller <davem@davemloft.net>2017-10-06 07:24:48 +0300
commitcec451ce60e50dba6d4136b7d1e62a5900cd264f (patch)
treecb70c1552a2c58cc5b8f63bd8e85b7fdc3b33497 /include/net/tcp.h
parentb1fb67fa501c4787035317f84db6caf013385581 (diff)
parentbef06223083b81d2064824afe2bc85be416ab73a (diff)
downloadlinux-cec451ce60e50dba6d4136b7d1e62a5900cd264f.tar.xz
Merge branch 'tcp-improving-RACK-cpu-performance'
Yuchung Cheng says: ==================== tcp: improving RACK cpu performance This patch set improves the CPU consumption of the RACK TCP loss recovery algorithm, in particular for high-speed networks. Currently, for every ACK in recovery RACK can potentially iterate over all sent packets in the write queue. On large BDP networks with non-trivial losses the RACK write queue walk CPU usage becomes unreasonably high. This patch introduces a new queue in TCP that keeps only skbs sent and not yet (s)acked or marked lost, in time order instead of sequence order. With that, RACK can examine this time-sorted list and only check packets that were sent recently, within the reordering window, per ACK. This is the fastest way without any write queue walks. The number of skbs examined per ACK is reduced by orders of magnitude. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r--include/net/tcp.h24
1 files changed, 23 insertions, 1 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 426c2e986016..3b16f353b539 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1589,14 +1589,34 @@ enum tcp_chrono {
void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);
+/* This helper is needed, because skb->tcp_tsorted_anchor uses
+ * the same memory storage than skb->destructor/_skb_refdst
+ */
+static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb)
+{
+ skb->destructor = NULL;
+ skb->_skb_refdst = 0UL;
+}
+
+#define tcp_skb_tsorted_save(skb) { \
+ unsigned long _save = skb->_skb_refdst; \
+ skb->_skb_refdst = 0UL;
+
+#define tcp_skb_tsorted_restore(skb) \
+ skb->_skb_refdst = _save; \
+}
+
/* write queue abstraction */
static inline void tcp_write_queue_purge(struct sock *sk)
{
struct sk_buff *skb;
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
- while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
+ while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+ tcp_skb_tsorted_anchor_cleanup(skb);
sk_wmem_free_skb(sk, skb);
+ }
+ INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
sk_mem_reclaim(sk);
tcp_clear_all_retrans_hints(tcp_sk(sk));
}
@@ -1711,6 +1731,8 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new,
static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
{
+ list_del(&skb->tcp_tsorted_anchor);
+ tcp_skb_tsorted_anchor_cleanup(skb);
__skb_unlink(skb, &sk->sk_write_queue);
}