diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-05-05 12:07:37 +0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-05 12:07:37 +0400 |
commit | ec7d2f2cf3a1b76202986519ec4f8ec75b2de232 (patch) | |
tree | 177c324eb0cf7e687d1bbd10a6add3a7d5979002 /include/linux/skbuff.h | |
parent | 8753d29fd5daf890004a38c80835e1eb3acda394 (diff) | |
download | linux-ec7d2f2cf3a1b76202986519ec4f8ec75b2de232.tar.xz |
net: __alloc_skb() speedup
With following patch I can reach maximum rate of my pktgen+udpsink
simulator :
- 'old' machine : dual quad core E5450 @3.00GHz
- 64 UDP rx flows (only differ by destination port)
- RPS enabled, NIC interrupts serviced on cpu0
- rps dispatched on 7 other cores. (~130.000 IPI per second)
- SLAB allocator (faster than SLUB in this workload)
- tg3 NIC
- 1.080.000 pps without a single drop at NIC level.
Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch
first sk_buff cache line, the second to prefetch the shinfo part.
Also using one memset() to initialize all skb_shared_info fields instead
of one by one to reduce number of instructions, using long word moves.
All skb_shared_info fields before 'dataref' are cleared in
__alloc_skb().
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r-- | include/linux/skbuff.h | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 746a652b9f6f..88d55395a27c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -187,7 +187,6 @@ union skb_shared_tx { * the end of the header data, ie. at skb->end. */ struct skb_shared_info { - atomic_t dataref; unsigned short nr_frags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ @@ -197,6 +196,12 @@ struct skb_shared_info { union skb_shared_tx tx_flags; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; + + /* + * Warning : all fields before dataref are cleared in __alloc_skb() + */ + atomic_t dataref; + skb_frag_t frags[MAX_SKB_FRAGS]; /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ |