diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2018-08-09 22:50:45 +0300 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-08-09 22:50:45 +0300 |
commit | 9c95420117393ed5f76de373e3c6479c21e3e380 (patch) | |
tree | e758a1cbc47dcaff5e9c13ee2b336dd48994c881 | |
parent | bf9bae0ea6ec7013ef37b19fbbf29b62a35474fb (diff) | |
parent | 1bf9116d0866a649104a5dfa008c302ad54d1e02 (diff) | |
download | linux-9c95420117393ed5f76de373e3c6479c21e3e380.tar.xz |
Merge branch 'bpf-fix-cpu-and-devmap-teardown'
Jesper Dangaard Brouer says:
====================
Removing entries from cpumap and devmap, goes through a number of
syncronization steps to make sure no new xdp_frames can be enqueued.
But there is a small chance, that xdp_frames remains which have not
been flushed/processed yet. Flushing these during teardown, happens
from RCU context and not as usual under RX NAPI context.
The optimization introduced in commt 389ab7f01af9 ("xdp: introduce
xdp_return_frame_rx_napi"), missed that the flush operation can also
be called from RCU context. Thus, we cannot always use the
xdp_return_frame_rx_napi call, which take advantage of the protection
provided by XDP RX running under NAPI protection.
The samples/bpf xdp_redirect_cpu have a --stress-mode, that is
adjusted to easier reproduce (verified by Red Hat QA).
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r-- | kernel/bpf/cpumap.c | 15 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 14 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_cpu_kern.c | 2 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_cpu_user.c | 4 |
4 files changed, 21 insertions, 14 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index e0918d180f08..46f5f29605d4 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -69,7 +69,7 @@ struct bpf_cpu_map { }; static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, - struct xdp_bulk_queue *bq); + struct xdp_bulk_queue *bq, bool in_napi_ctx); static u64 cpu_map_bitmap_size(const union bpf_attr *attr) { @@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu) struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); /* No concurrent bq_enqueue can run at this point */ - bq_flush_to_queue(rcpu, bq); + bq_flush_to_queue(rcpu, bq, false); } free_percpu(rcpu->bulkq); /* Cannot kthread_stop() here, last put free rcpu resources */ @@ -558,7 +558,7 @@ const struct bpf_map_ops cpu_map_ops = { }; static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, - struct xdp_bulk_queue *bq) + struct xdp_bulk_queue *bq, bool in_napi_ctx) { unsigned int processed = 0, drops = 0; const int to_cpu = rcpu->cpu; @@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, err = __ptr_ring_produce(q, xdpf); if (err) { drops++; - xdp_return_frame_rx_napi(xdpf); + if (likely(in_napi_ctx)) + xdp_return_frame_rx_napi(xdpf); + else + xdp_return_frame(xdpf); } processed++; } @@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) - bq_flush_to_queue(rcpu, bq); + bq_flush_to_queue(rcpu, bq, true); /* Notice, xdp_buff/page MUST be queued here, long enough for * driver to code invoking us to finished, due to driver @@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map) /* Flush all frames in bulkq to real queue */ bq = this_cpu_ptr(rcpu->bulkq); - bq_flush_to_queue(rcpu, bq); + bq_flush_to_queue(rcpu, bq, true); /* If already running, costs spin_lock_irqsave + smb_mb */ wake_up_process(rcpu->kthread); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index d361fc1e3bf3..750d45edae79 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -217,7 +217,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) } static int bq_xmit_all(struct bpf_dtab_netdev *obj, - struct xdp_bulk_queue *bq, u32 flags) + struct xdp_bulk_queue *bq, u32 flags, + bool in_napi_ctx) { struct net_device *dev = obj->dev; int sent = 0, drops = 0, err = 0; @@ -254,7 +255,10 @@ error: struct xdp_frame *xdpf = bq->q[i]; /* RX path under NAPI protection, can return frames faster */ - xdp_return_frame_rx_napi(xdpf); + if (likely(in_napi_ctx)) + xdp_return_frame_rx_napi(xdpf); + else + xdp_return_frame(xdpf); drops++; } goto out; @@ -286,7 +290,7 @@ void __dev_map_flush(struct bpf_map *map) __clear_bit(bit, bitmap); bq = this_cpu_ptr(dev->bulkq); - bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); + bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true); } } @@ -316,7 +320,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) - bq_xmit_all(obj, bq, 0); + bq_xmit_all(obj, bq, 0, true); /* Ingress dev_rx will be the same for all xdp_frame's in * bulk_queue, because bq stored per-CPU and must be flushed @@ -385,7 +389,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) __clear_bit(dev->bit, bitmap); bq = per_cpu_ptr(dev->bulkq, cpu); - bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); + bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false); } } } diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c index 303e9e7161f3..4938dcbaecbf 100644 --- a/samples/bpf/xdp_redirect_cpu_kern.c +++ b/samples/bpf/xdp_redirect_cpu_kern.c @@ -14,7 +14,7 @@ #include <uapi/linux/bpf.h> #include "bpf_helpers.h" -#define MAX_CPUS 12 /* WARNING - sync with _user.c */ +#define MAX_CPUS 64 /* WARNING - sync with _user.c */ /* Special map type that can XDP_REDIRECT frames to another CPU */ struct bpf_map_def SEC("maps") cpu_map = { diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index f6efaefd485b..4b4d78fffe30 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -19,7 +19,7 @@ static const char *__doc__ = #include <arpa/inet.h> #include <linux/if_link.h> -#define MAX_CPUS 12 /* WARNING - sync with _kern.c */ +#define MAX_CPUS 64 /* WARNING - sync with _kern.c */ /* How many xdp_progs are defined in _kern.c */ #define MAX_PROG 5 @@ -527,7 +527,7 @@ static void stress_cpumap(void) * procedure. */ create_cpu_entry(1, 1024, 0, false); - create_cpu_entry(1, 128, 0, false); + create_cpu_entry(1, 8, 0, false); create_cpu_entry(1, 16000, 0, false); } |