diff options
| author | David S. Miller <davem@davemloft.net> | 2017-10-07 02:28:54 +0300 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2017-10-07 02:28:54 +0300 |
| commit | ca82214144d925155977abe7c0af7c2c252b837f (patch) | |
| tree | a9e03880b4f700a0f45026f06262a916d42f7e5e /include | |
| parent | f5333f80c3b33183ce8b5b880adba028b728fbf6 (diff) | |
| parent | 75c119afe14f74b4dd967d75ed9f57ab6c0ef045 (diff) | |
| download | linux-ca82214144d925155977abe7c0af7c2c252b837f.tar.xz | |
Merge branch 'tcp-rbtree-retransmit-queue'
Eric Dumazet says:
====================
tcp: implement rb-tree based retransmit queue
This patch series implement RB-tree based retransmit queue for TCP,
to better match modern BDP.
Tested:
On receiver :
netem on ingress : delay 150ms 200us loss 1
GRO disabled to force stress and SACK storms.
for f in `seq 1 10`
do
./netperf -H lpaa6 -l30 -- -K bbr -o THROUGHPUT|tail -1
done | awk '{print $0} {sum += $0} END {printf "%7u\n",sum}'
Before patch :
323.87 351.48 339.59 338.62 306.72
204.07 304.93 291.88 202.47 176.88
-> 2840
After patch:
1700.83 2207.98 2070.17 1544.26 2114.76
2124.89 1693.14 1080.91 2216.82 1299.94
-> 18053
Average of 1805 Mbits istead of 284 Mbits.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/skbuff.h | 18 | ||||
| -rw-r--r-- | include/net/sock.h | 7 | ||||
| -rw-r--r-- | include/net/tcp.h | 100 |
3 files changed, 69 insertions, 56 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 01a985937867..03634ec2f918 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3158,6 +3158,12 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) return __skb_grow(skb, len); } +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) +#define skb_rb_first(root) rb_to_skb(rb_first(root)) +#define skb_rb_last(root) rb_to_skb(rb_last(root)) +#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) +#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ @@ -3172,6 +3178,18 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) for (; skb != (struct sk_buff *)(queue); \ skb = skb->next) +#define skb_rbtree_walk(skb, root) \ + for (skb = skb_rb_first(root); skb != NULL; \ + skb = skb_rb_next(skb)) + +#define skb_rbtree_walk_from(skb) \ + for (; skb != NULL; \ + skb = skb_rb_next(skb)) + +#define skb_rbtree_walk_from_safe(skb, tmp) \ + for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \ + skb = tmp) + #define skb_queue_walk_from_safe(queue, skb, tmp) \ for (tmp = skb->next; \ skb != (struct sk_buff *)(queue); \ diff --git a/include/net/sock.h b/include/net/sock.h index a6b9a8d1a6df..4827094f1db4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -60,7 +60,7 @@ #include <linux/sched.h> #include <linux/wait.h> #include <linux/cgroup-defs.h> - +#include <linux/rbtree.h> #include <linux/filter.h> #include <linux/rculist_nulls.h> #include <linux/poll.h> @@ -397,7 +397,10 @@ struct sock { int sk_wmem_queued; refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; - struct sk_buff *sk_send_head; + union { + struct sk_buff *sk_send_head; + struct rb_root tcp_rtx_queue; + }; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; diff --git a/include/net/tcp.h b/include/net/tcp.h index 3b16f353b539..5a95e5886b55 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -551,7 +551,13 @@ void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); void tcp_enter_recovery(struct sock *sk, bool ece_ack); int tcp_trim_head(struct sock *, struct sk_buff *, u32); -int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); +enum tcp_queue { + TCP_FRAG_IN_WRITE_QUEUE, + TCP_FRAG_IN_RTX_QUEUE, +}; +int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, + struct sk_buff *skb, u32 len, + unsigned int mss_now, gfp_t gfp); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); @@ -1606,19 +1612,11 @@ static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) skb->_skb_refdst = _save; \ } -/* write queue abstraction */ -static inline void tcp_write_queue_purge(struct sock *sk) -{ - struct sk_buff *skb; +void tcp_write_queue_purge(struct sock *sk); - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { - tcp_skb_tsorted_anchor_cleanup(skb); - sk_wmem_free_skb(sk, skb); - } - INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); - sk_mem_reclaim(sk); - tcp_clear_all_retrans_hints(tcp_sk(sk)); +static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk) +{ + return skb_rb_first(&sk->tcp_rtx_queue); } static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) @@ -1643,18 +1641,12 @@ static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk, return skb_queue_prev(&sk->sk_write_queue, skb); } -#define tcp_for_write_queue(skb, sk) \ - skb_queue_walk(&(sk)->sk_write_queue, skb) - -#define tcp_for_write_queue_from(skb, sk) \ - skb_queue_walk_from(&(sk)->sk_write_queue, skb) - #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) static inline struct sk_buff *tcp_send_head(const struct sock *sk) { - return sk->sk_send_head; + return skb_peek(&sk->sk_write_queue); } static inline bool tcp_skb_is_last(const struct sock *sk, @@ -1663,29 +1655,30 @@ static inline bool tcp_skb_is_last(const struct sock *sk, return skb_queue_is_last(&sk->sk_write_queue, skb); } -static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb) +static inline bool tcp_write_queue_empty(const struct sock *sk) { - if (tcp_skb_is_last(sk, skb)) - sk->sk_send_head = NULL; - else - sk->sk_send_head = tcp_write_queue_next(sk, skb); + return skb_queue_empty(&sk->sk_write_queue); +} + +static inline bool tcp_rtx_queue_empty(const struct sock *sk) +{ + return RB_EMPTY_ROOT(&sk->tcp_rtx_queue); +} + +static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk) +{ + return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk); } static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) { - if (sk->sk_send_head == skb_unlinked) { - sk->sk_send_head = NULL; + if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - } + if (tcp_sk(sk)->highest_sack == skb_unlinked) tcp_sk(sk)->highest_sack = NULL; } -static inline void tcp_init_send_head(struct sock *sk) -{ - sk->sk_send_head = NULL; -} - static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) { __skb_queue_tail(&sk->sk_write_queue, skb); @@ -1696,8 +1689,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb __tcp_add_write_queue_tail(sk, skb); /* Queue it, remembering where we must start sending. */ - if (sk->sk_send_head == NULL) { - sk->sk_send_head = skb; + if (sk->sk_write_queue.next == skb) { tcp_chrono_start(sk, TCP_CHRONO_BUSY); if (tcp_sk(sk)->highest_sack == NULL) @@ -1710,35 +1702,32 @@ static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *s __skb_queue_head(&sk->sk_write_queue, skb); } -/* Insert buff after skb on the write queue of sk. */ -static inline void tcp_insert_write_queue_after(struct sk_buff *skb, - struct sk_buff *buff, - struct sock *sk) -{ - __skb_queue_after(&sk->sk_write_queue, skb, buff); -} - /* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, struct sock *sk) { __skb_queue_before(&sk->sk_write_queue, skb, new); - - if (sk->sk_send_head == skb) - sk->sk_send_head = new; } static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { - list_del(&skb->tcp_tsorted_anchor); - tcp_skb_tsorted_anchor_cleanup(skb); __skb_unlink(skb, &sk->sk_write_queue); } -static inline bool tcp_write_queue_empty(struct sock *sk) +void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb); + +static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk) { - return skb_queue_empty(&sk->sk_write_queue); + tcp_skb_tsorted_anchor_cleanup(skb); + rb_erase(&skb->rbnode, &sk->tcp_rtx_queue); +} + +static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk) +{ + list_del(&skb->tcp_tsorted_anchor); + tcp_rtx_queue_unlink(skb, sk); + sk_wmem_free_skb(sk, skb); } static inline void tcp_push_pending_frames(struct sock *sk) @@ -1767,8 +1756,9 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) { - tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL : - tcp_write_queue_next(sk, skb); + struct sk_buff *next = skb_rb_next(skb); + + tcp_sk(sk)->highest_sack = next ?: tcp_send_head(sk); } static inline struct sk_buff *tcp_highest_sack(struct sock *sk) @@ -1778,7 +1768,9 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk) static inline void tcp_highest_sack_reset(struct sock *sk) { - tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); + struct sk_buff *skb = tcp_rtx_queue_head(sk); + + tcp_sk(sk)->highest_sack = skb ?: tcp_send_head(sk); } /* Called when old skb is about to be deleted (to be combined with new skb) */ @@ -1948,7 +1940,7 @@ extern void tcp_rack_reo_timeout(struct sock *sk); /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) { - const struct sk_buff *skb = tcp_write_queue_head(sk); + const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); |
