diff options
| author | David S. Miller <davem@davemloft.net> | 2017-05-18 17:07:42 +0300 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2017-05-18 17:07:42 +0300 |
| commit | f646c75b79c31a7fc40211e7291287e2b99ee168 (patch) | |
| tree | 240db037b0414eda56435162247acabd75dd1477 /include/linux | |
| parent | 1fc4d180b3c6bed0e7f5160bcd553aec89594962 (diff) | |
| parent | c67df11f6e48061e43e9bf9dade83fe268b47d27 (diff) | |
| download | linux-f646c75b79c31a7fc40211e7291287e2b99ee168.tar.xz | |
Merge branch 'vhost_net-rx-batch-dequeuing'
Jason Wang says:
====================
vhost_net rx batch dequeuing
This series tries to implement rx batching for vhost-net. This is done
by batching the dequeuing from skb_array which was exported by
underlayer socket and pass the sbk back through msg_control to finish
userspace copying. This is also the requirement for more batching
implemention on rx path.
Tests shows at most 7.56% improvment bon rx pps on top of batch
zeroing and no obvious changes for TCP_STREAM/TCP_RR result.
Please review.
Thanks
Changes from V4:
- drop batch zeroing patch
- renew the performance numbers
- move skb pointer array out of vhost_net structure
Changes from V3:
- add batch zeroing patch to fix the build warnings
Changes from V2:
- rebase to net-next HEAD
- use unconsume helpers to put skb back on releasing
- introduce and use vhost_net internal buffer helpers
- renew performance numbers on top of batch zeroing
Changes from V1:
- switch to use for() in __ptr_ring_consume_batched()
- rename peek_head_len_batched() to fetch_skbs()
- use skb_array_consume_batched() instead of
skb_array_consume_batched_bh() since no consumer run in bh
- drop the lockless peeking patch since skb_array could be resized, so
it's not safe to call lockless one
====================
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/if_tap.h | 5 | ||||
| -rw-r--r-- | include/linux/if_tun.h | 5 | ||||
| -rw-r--r-- | include/linux/ptr_ring.h | 120 | ||||
| -rw-r--r-- | include/linux/skb_array.h | 31 |
4 files changed, 161 insertions, 0 deletions
diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 3482c3c2037d..4837157da0dc 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -3,6 +3,7 @@ #if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); +struct skb_array *tap_get_skb_array(struct file *file); #else #include <linux/err.h> #include <linux/errno.h> @@ -12,6 +13,10 @@ static inline struct socket *tap_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } +static inline struct skb_array *tap_get_skb_array(struct file *f) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TAP */ #include <net/sock.h> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index ed6da2e6df90..bf9bdf42d577 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -19,6 +19,7 @@ #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); +struct skb_array *tun_get_skb_array(struct file *file); #else #include <linux/err.h> #include <linux/errno.h> @@ -28,5 +29,9 @@ static inline struct socket *tun_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } +static inline struct skb_array *tun_get_skb_array(struct file *f) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TUN */ #endif /* __IF_TUN_H */ diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6b2e0dd88569..d8c97ec8a8e6 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -278,6 +278,22 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) return ptr; } +static inline int __ptr_ring_consume_batched(struct ptr_ring *r, + void **array, int n) +{ + void *ptr; + int i; + + for (i = 0; i < n; i++) { + ptr = __ptr_ring_consume(r); + if (!ptr) + break; + array[i] = ptr; + } + + return i; +} + /* * Note: resize (below) nests producer lock within consumer lock, so if you * call this in interrupt or BH context, you must disable interrupts/BH when @@ -328,6 +344,55 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r) return ptr; } +static inline int ptr_ring_consume_batched(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock(&r->consumer_lock); + + return ret; +} + +static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock_irq(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_irq(&r->consumer_lock); + + return ret; +} + +static inline int ptr_ring_consume_batched_any(struct ptr_ring *r, + void **array, int n) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&r->consumer_lock, flags); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ret; +} + +static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock_bh(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_bh(&r->consumer_lock); + + return ret; +} + /* Cast to structure type and call a function without discarding from FIFO. * Function must return a value. * Callers must take consumer_lock. @@ -403,6 +468,61 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) return 0; } +/* + * Return entries into ring. Destroy entries that don't fit. + * + * Note: this is expected to be a rare slow path operation. + * + * Note: producer lock is nested within consumer lock, so if you + * resize you must make sure all uses nest correctly. + * In particular if you consume ring in interrupt or BH context, you must + * disable interrupts/BH when doing so. + */ +static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, + void (*destroy)(void *)) +{ + unsigned long flags; + int head; + + spin_lock_irqsave(&r->consumer_lock, flags); + spin_lock(&r->producer_lock); + + if (!r->size) + goto done; + + /* + * Clean out buffered entries (for simplicity). This way following code + * can test entries for NULL and if not assume they are valid. + */ + head = r->consumer_head - 1; + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + r->consumer_tail = r->consumer_head; + + /* + * Go over entries in batch, start moving head back and copy entries. + * Stop when we run into previously unconsumed entries. + */ + while (n) { + head = r->consumer_head - 1; + if (head < 0) + head = r->size - 1; + if (r->queue[head]) { + /* This batch entry will have to be destroyed. */ + goto done; + } + r->queue[head] = batch[--n]; + r->consumer_tail = r->consumer_head = head; + } + +done: + /* Destroy all entries left in the batch. */ + while (n) + destroy(batch[--n]); + spin_unlock(&r->producer_lock); + spin_unlock_irqrestore(&r->consumer_lock, flags); +} + static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, int size, gfp_t gfp, void (*destroy)(void *)) diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index f4dfade428f0..35226cd4efb0 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -97,21 +97,46 @@ static inline struct sk_buff *skb_array_consume(struct skb_array *a) return ptr_ring_consume(&a->ring); } +static inline int skb_array_consume_batched(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched(&a->ring, (void **)array, n); +} + static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a) { return ptr_ring_consume_irq(&a->ring); } +static inline int skb_array_consume_batched_irq(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched_irq(&a->ring, (void **)array, n); +} + static inline struct sk_buff *skb_array_consume_any(struct skb_array *a) { return ptr_ring_consume_any(&a->ring); } +static inline int skb_array_consume_batched_any(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched_any(&a->ring, (void **)array, n); +} + + static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a) { return ptr_ring_consume_bh(&a->ring); } +static inline int skb_array_consume_batched_bh(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched_bh(&a->ring, (void **)array, n); +} + static inline int __skb_array_len_with_tag(struct sk_buff *skb) { if (likely(skb)) { @@ -156,6 +181,12 @@ static void __skb_array_destroy_skb(void *ptr) kfree_skb(ptr); } +static inline void skb_array_unconsume(struct skb_array *a, + struct sk_buff **skbs, int n) +{ + ptr_ring_unconsume(&a->ring, (void **)skbs, n, __skb_array_destroy_skb); +} + static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) { return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); |
