diff options
author | David S. Miller <davem@davemloft.net> | 2017-08-29 20:51:29 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-29 20:51:29 +0300 |
commit | 25d4dae1a64f7abc5d40faef08f7c7de2a5ece79 (patch) | |
tree | fb91db813a3b9c08d734ccffe5b19465b7274902 | |
parent | d0fcece7704c935a9c8740bddff21575048f27ba (diff) | |
parent | 3ffab54602644f69485632435ef154a442ae7189 (diff) | |
download | linux-25d4dae1a64f7abc5d40faef08f7c7de2a5ece79.tar.xz |
Merge branch 'XDP-redirect-tracepoints'
Jesper Dangaard Brouer says:
====================
XDP redirect tracepoints
I feel this is as far as I can take the tracepoint infrastructure to
assist XDP monitoring.
Tracepoints comes with a base overhead of 25 nanosec for an attached
bpf_prog, and 48 nanosec for using a full perf record. This is
problematic for the XDP use-case, but it is very convenient to use the
existing perf infrastructure.
From a performance perspective, the real solution would be to attach
another bpf_prog (that understand xdp_buff), but I'm not sure we want
to introduce yet another bpf attach API for this.
One thing left is to standardize the possible err return codes, to a
limited set, to allow easier (and faster) mapping into a bpf map.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/trace/events/xdp.h | 100 | ||||
-rw-r--r-- | net/core/filter.c | 37 | ||||
-rw-r--r-- | samples/bpf/Makefile | 4 | ||||
-rw-r--r-- | samples/bpf/xdp_monitor_kern.c | 88 | ||||
-rw-r--r-- | samples/bpf/xdp_monitor_user.c | 295 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_kern.c | 11 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_map_kern.c | 11 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_map_user.c | 22 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_user.c | 21 |
9 files changed, 543 insertions, 46 deletions
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 27cf2ef35f5f..862575ac8da9 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -31,55 +31,119 @@ TRACE_EVENT(xdp_exception, TP_ARGS(dev, xdp, act), TP_STRUCT__entry( - __array(u8, prog_tag, 8) + __field(int, prog_id) __field(u32, act) __field(int, ifindex) ), TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); - memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __entry->prog_id = xdp->aux->id; __entry->act = act; __entry->ifindex = dev->ifindex; ), - TP_printk("prog=%s action=%s ifindex=%d", - __print_hex_str(__entry->prog_tag, 8), + TP_printk("prog_id=%d action=%s ifindex=%d", + __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex) ); -TRACE_EVENT(xdp_redirect, +DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, - const struct bpf_prog *xdp, u32 act, - int to_index, int err), + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), - TP_ARGS(dev, xdp, act, to_index, err), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), TP_STRUCT__entry( - __array(u8, prog_tag, 8) + __field(int, prog_id) __field(u32, act) __field(int, ifindex) - __field(int, to_index) __field(int, err) + __field(int, to_ifindex) + __field(u32, map_id) + __field(int, map_index) ), TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); - memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); - __entry->act = act; + __entry->prog_id = xdp->aux->id; + __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; - __entry->to_index = to_index; __entry->err = err; + __entry->to_ifindex = to_ifindex; + __entry->map_id = map ? map->id : 0; + __entry->map_index = map_index; ), - TP_printk("prog=%s action=%s ifindex=%d to_index=%d err=%d", - __print_hex_str(__entry->prog_tag, 8), + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d", + __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), - __entry->ifindex, __entry->to_index, + __entry->ifindex, __entry->to_ifindex, __entry->err) ); + +DEFINE_EVENT(xdp_redirect_template, xdp_redirect, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) +); + +DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) +); + +#define _trace_xdp_redirect(dev, xdp, to) \ + trace_xdp_redirect(dev, xdp, to, 0, NULL, 0); + +#define _trace_xdp_redirect_err(dev, xdp, to, err) \ + trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0); + +DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" + " map_id=%d map_index=%d", + __entry->prog_id, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex, __entry->to_ifindex, + __entry->err, + __entry->map_id, __entry->map_index) +); + +DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" + " map_id=%d map_index=%d", + __entry->prog_id, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex, __entry->to_ifindex, + __entry->err, + __entry->map_id, __entry->map_index) +); + +#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ + trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \ + 0, map, idx); + +#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ + trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \ + err, map, idx); + #endif /* _TRACE_XDP_H */ #include <trace/define_trace.h> diff --git a/net/core/filter.c b/net/core/filter.c index 4bcd6baa80c9..c6a37fe0285b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2515,16 +2515,20 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, fwd = __dev_map_lookup_elem(map, index); if (!fwd) { err = -EINVAL; - goto out; + goto err; } if (ri->map_to_flush && ri->map_to_flush != map) xdp_do_flush_map(); err = __bpf_tx_xdp(fwd, map, xdp, index); - if (likely(!err)) - ri->map_to_flush = map; -out: - trace_xdp_redirect(dev, xdp_prog, XDP_REDIRECT, index, err); + if (unlikely(err)) + goto err; + + ri->map_to_flush = map; + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); + return 0; +err: + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); return err; } @@ -2543,12 +2547,17 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, ri->ifindex = 0; if (unlikely(!fwd)) { err = -EINVAL; - goto out; + goto err; } err = __bpf_tx_xdp(fwd, NULL, xdp, 0); -out: - trace_xdp_redirect(dev, xdp_prog, XDP_REDIRECT, index, err); + if (unlikely(err)) + goto err; + + _trace_xdp_redirect(dev, xdp_prog, index); + return 0; +err: + _trace_xdp_redirect_err(dev, xdp_prog, index, err); return err; } EXPORT_SYMBOL_GPL(xdp_do_redirect); @@ -2566,23 +2575,25 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, ri->ifindex = 0; if (unlikely(!fwd)) { err = -EINVAL; - goto out; + goto err; } if (unlikely(!(fwd->flags & IFF_UP))) { err = -ENETDOWN; - goto out; + goto err; } len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; if (skb->len > len) { err = -EMSGSIZE; - goto out; + goto err; } skb->dev = fwd; -out: - trace_xdp_redirect(dev, xdp_prog, XDP_REDIRECT, index, err); + _trace_xdp_redirect(dev, xdp_prog, index); + return 0; +err: + _trace_xdp_redirect_err(dev, xdp_prog, index, err); return err; } EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index f1010fe759fe..cf17c7932a6e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -39,6 +39,7 @@ hostprogs-y += per_socket_stats_example hostprogs-y += load_sock_ops hostprogs-y += xdp_redirect hostprogs-y += xdp_redirect_map +hostprogs-y += xdp_monitor hostprogs-y += syscall_tp # Libbpf dependencies @@ -83,6 +84,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o +xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o # Tell kbuild to always build the programs @@ -127,6 +129,7 @@ always += tcp_iw_kern.o always += tcp_clamp_kern.o always += xdp_redirect_kern.o always += xdp_redirect_map_kern.o +always += xdp_monitor_kern.o always += syscall_tp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -166,6 +169,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf HOSTLOADLIBES_xdp_redirect += -lelf HOSTLOADLIBES_xdp_redirect_map += -lelf +HOSTLOADLIBES_xdp_monitor += -lelf HOSTLOADLIBES_syscall_tp += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c new file mode 100644 index 000000000000..74f3fd8ed729 --- /dev/null +++ b/samples/bpf/xdp_monitor_kern.c @@ -0,0 +1,88 @@ +/* XDP monitor tool, based on tracepoints + * + * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + */ +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") redirect_err_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 2, + /* TODO: have entries for all possible errno's */ +}; + +/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct xdp_redirect_ctx { + unsigned short common_type; // offset:0; size:2; signed:0; + unsigned char common_flags; // offset:2; size:1; signed:0; + unsigned char common_preempt_count;// offset:3; size:1; signed:0; + int common_pid; // offset:4; size:4; signed:1; + + int prog_id; // offset:8; size:4; signed:1; + u32 act; // offset:12 size:4; signed:0; + int ifindex; // offset:16 size:4; signed:1; + int err; // offset:20 size:4; signed:1; + int to_ifindex; // offset:24 size:4; signed:1; + u32 map_id; // offset:28 size:4; signed:0; + int map_index; // offset:32 size:4; signed:1; +}; // offset:36 + +enum { + XDP_REDIRECT_SUCCESS = 0, + XDP_REDIRECT_ERROR = 1 +}; + +static __always_inline +int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) +{ + u32 key = XDP_REDIRECT_ERROR; + int err = ctx->err; + u64 *cnt; + + if (!err) + key = XDP_REDIRECT_SUCCESS; + + cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key); + if (!cnt) + return 0; + *cnt += 1; + + return 0; /* Indicate event was filtered (no further processing)*/ + /* + * Returning 1 here would allow e.g. a perf-record tracepoint + * to see and record these events, but it doesn't work well + * in-practice as stopping perf-record also unload this + * bpf_prog. Plus, there is additional overhead of doing so. + */ +} + +SEC("tracepoint/xdp/xdp_redirect_err") +int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + + +SEC("tracepoint/xdp/xdp_redirect_map_err") +int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +/* Likely unloaded when prog starts */ +SEC("tracepoint/xdp/xdp_redirect") +int trace_xdp_redirect(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +/* Likely unloaded when prog starts */ +SEC("tracepoint/xdp/xdp_redirect_map") +int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c new file mode 100644 index 000000000000..b51b4f5e3257 --- /dev/null +++ b/samples/bpf/xdp_monitor_user.c @@ -0,0 +1,295 @@ +/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. + */ +static const char *__doc__= + "XDP monitor tool, based on tracepoints\n" +; + +static const char *__doc_err_only__= + " NOTICE: Only tracking XDP redirect errors\n" + " Enable TX success stats via '--stats'\n" + " (which comes with a per packet processing overhead)\n" +; + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdint.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include <locale.h> + +#include <getopt.h> +#include <net/if.h> +#include <time.h> + +#include "libbpf.h" +#include "bpf_load.h" +#include "bpf_util.h" + +static int verbose = 1; +static bool debug = false; + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"debug", no_argument, NULL, 'D' }, + {"stats", no_argument, NULL, 'S' }, + {"sec", required_argument, NULL, 's' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + printf("\nDOCUMENTATION:\n%s\n", __doc__); + printf("\n"); + printf(" Usage: %s (options-see-below)\n", + argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-15s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value:%d)", + *long_options[i].flag); + else + printf("(internal short-option: -%c)", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ +__u64 gettime(void) +{ + struct timespec t; + int res; + + res = clock_gettime(CLOCK_MONOTONIC, &t); + if (res < 0) { + fprintf(stderr, "Error with gettimeofday! (%i)\n", res); + exit(EXIT_FAILURE); + } + return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; +} + +enum { + REDIR_SUCCESS = 0, + REDIR_ERROR = 1, +}; +#define REDIR_RES_MAX 2 +static const char *redir_names[REDIR_RES_MAX] = { + [REDIR_SUCCESS] = "Success", + [REDIR_ERROR] = "Error", +}; +static const char *err2str(int err) +{ + if (err < REDIR_RES_MAX) + return redir_names[err]; + return NULL; +} + +struct record { + __u64 counter; + __u64 timestamp; +}; + +struct stats_record { + struct record xdp_redir[REDIR_RES_MAX]; +}; + +static void stats_print_headers(bool err_only) +{ + if (err_only) + printf("\n%s\n", __doc_err_only__); + + printf("%-14s %-10s %-18s %-9s\n", + "XDP_REDIRECT", "pps ", "pps-human-readable", "measure-period"); +} + +static void stats_print(struct stats_record *rec, + struct stats_record *prev, + bool err_only) +{ + int i = 0; + + if (err_only) + i = REDIR_ERROR; + + for (; i < REDIR_RES_MAX; i++) { + struct record *r = &rec->xdp_redir[i]; + struct record *p = &prev->xdp_redir[i]; + __u64 period = 0; + __u64 packets = 0; + double pps = 0; + double period_ = 0; + + if (p->timestamp) { + packets = r->counter - p->counter; + period = r->timestamp - p->timestamp; + if (period > 0) { + period_ = ((double) period / NANOSEC_PER_SEC); + pps = packets / period_; + } + } + + printf("%-14s %-10.0f %'-18.0f %f\n", + err2str(i), pps, pps, period_); + } +} + +static __u64 get_key32_value64_percpu(int fd, __u32 key) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus]; + __u64 sum = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return 0; + } + + /* Sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + sum += values[i]; + } + return sum; +} + +static bool stats_collect(int fd, struct stats_record *rec) +{ + int i; + + /* TODO: Detect if someone unloaded the perf event_fd's, as + * this can happen by someone running perf-record -e + */ + + for (i = 0; i < REDIR_RES_MAX; i++) { + rec->xdp_redir[i].timestamp = gettime(); + rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i); + } + return true; +} + +static void stats_poll(int interval, bool err_only) +{ + struct stats_record rec, prev; + int map_fd; + + memset(&rec, 0, sizeof(rec)); + + /* Trick to pretty printf with thousands separators use %' */ + setlocale(LC_NUMERIC, "en_US"); + + /* Header */ + if (verbose) + printf("\n%s", __doc__); + + /* TODO Need more advanced stats on error types */ + if (verbose) + printf(" - Stats map: %s\n", map_data[0].name); + map_fd = map_data[0].fd; + + stats_print_headers(err_only); + fflush(stdout); + + while (1) { + memcpy(&prev, &rec, sizeof(rec)); + stats_collect(map_fd, &rec); + stats_print(&rec, &prev, err_only); + fflush(stdout); + sleep(interval); + } +} + +void print_bpf_prog_info(void) +{ + int i; + + /* Prog info */ + printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt); + for (i = 0; i < prog_cnt; i++) { + printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]); + } + + /* Maps info */ + printf("Loaded BPF prog have %d map(s)\n", map_data_count); + for (i = 0; i < map_data_count; i++) { + char *name = map_data[i].name; + int fd = map_data[i].fd; + + printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name); + } + + /* Event info */ + printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt); + for (i = 0; i < prog_cnt; i++) { + if (event_fd[i] != -1) + printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]); + } +} + +int main(int argc, char **argv) +{ + int longindex = 0, opt; + int ret = EXIT_SUCCESS; + char bpf_obj_file[256]; + + /* Default settings: */ + bool errors_only = true; + int interval = 2; + + snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "h", + long_options, &longindex)) != -1) { + switch (opt) { + case 'D': + debug = true; + break; + case 'S': + errors_only = false; + break; + case 's': + interval = atoi(optarg); + break; + case 'h': + default: + usage(argv); + return EXIT_FAILURE; + } + } + + if (load_bpf_file(bpf_obj_file)) { + printf("ERROR - bpf_log_buf: %s", bpf_log_buf); + return 1; + } + if (!prog_fd[0]) { + printf("ERROR - load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + if (debug) { + print_bpf_prog_info(); + } + + /* Unload/stop tracepoint event by closing fd's */ + if (errors_only) { + /* The prog_fd[i] and event_fd[i] depend on the + * order the functions was defined in _kern.c + */ + close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */ + close(prog_fd[2]); /* func: trace_xdp_redirect */ + close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */ + close(prog_fd[3]); /* func: trace_xdp_redirect_map */ + } + + stats_poll(interval, errors_only); + + return ret; +} diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c index a34ad457a684..1c90288d0203 100644 --- a/samples/bpf/xdp_redirect_kern.c +++ b/samples/bpf/xdp_redirect_kern.c @@ -26,6 +26,9 @@ struct bpf_map_def SEC("maps") tx_port = { .max_entries = 1, }; +/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success + * feedback. Redirect TX errors can be caught via a tracepoint. + */ struct bpf_map_def SEC("maps") rxcnt = { .type = BPF_MAP_TYPE_PERCPU_ARRAY, .key_size = sizeof(u32), @@ -33,7 +36,6 @@ struct bpf_map_def SEC("maps") rxcnt = { .max_entries = 1, }; - static void swap_src_dst_mac(void *data) { unsigned short *p = data; @@ -78,4 +80,11 @@ int xdp_redirect_prog(struct xdp_md *ctx) return bpf_redirect(*ifindex, 0); } +/* Redirect require an XDP bpf_prog loaded on the TX device */ +SEC("xdp_redirect_dummy") +int xdp_redirect_dummy(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c index 2faf196e17ea..79795d41ad0d 100644 --- a/samples/bpf/xdp_redirect_map_kern.c +++ b/samples/bpf/xdp_redirect_map_kern.c @@ -26,6 +26,9 @@ struct bpf_map_def SEC("maps") tx_port = { .max_entries = 100, }; +/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success + * feedback. Redirect TX errors can be caught via a tracepoint. + */ struct bpf_map_def SEC("maps") rxcnt = { .type = BPF_MAP_TYPE_PERCPU_ARRAY, .key_size = sizeof(u32), @@ -33,7 +36,6 @@ struct bpf_map_def SEC("maps") rxcnt = { .max_entries = 1, }; - static void swap_src_dst_mac(void *data) { unsigned short *p = data; @@ -80,4 +82,11 @@ int xdp_redirect_map_prog(struct xdp_md *ctx) return bpf_redirect_map(&tx_port, vport, 0); } +/* Redirect require an XDP bpf_prog loaded on the TX device */ +SEC("xdp_redirect_dummy") +int xdp_redirect_dummy(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index a1ad00fdaa8a..d4d86a273fba 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -16,6 +16,7 @@ #include <signal.h> #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #include <string.h> #include <unistd.h> #include <libgen.h> @@ -26,17 +27,18 @@ static int ifindex_in; static int ifindex_out; +static bool ifindex_out_xdp_dummy_attached = true; static __u32 xdp_flags; static void int_exit(int sig) { set_link_xdp_fd(ifindex_in, -1, xdp_flags); + if (ifindex_out_xdp_dummy_attached) + set_link_xdp_fd(ifindex_out, -1, xdp_flags); exit(0); } -/* simple per-protocol drop counter - */ static void poll_stats(int interval, int ifindex) { unsigned int nr_cpus = bpf_num_possible_cpus(); @@ -70,7 +72,6 @@ static void usage(const char *prog) prog); } - int main(int argc, char **argv) { const char *optstr = "SN"; @@ -112,14 +113,21 @@ int main(int argc, char **argv) return 1; } - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); - if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { - printf("link set xdp fd failed\n"); + printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); return 1; } + /* Loading dummy XDP prog on out-device */ + if (set_link_xdp_fd(ifindex_out, prog_fd[1], + (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { + printf("WARN: link set xdp fd failed on %d\n", ifindex_out); + ifindex_out_xdp_dummy_attached = false; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n", map_fd[0], map_fd[1], map_fd[2]); diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index f705a1905d2d..4475d837bf2c 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -16,6 +16,7 @@ #include <signal.h> #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #include <string.h> #include <unistd.h> #include <libgen.h> @@ -26,17 +27,18 @@ static int ifindex_in; static int ifindex_out; +static bool ifindex_out_xdp_dummy_attached = true; static __u32 xdp_flags; static void int_exit(int sig) { set_link_xdp_fd(ifindex_in, -1, xdp_flags); + if (ifindex_out_xdp_dummy_attached) + set_link_xdp_fd(ifindex_out, -1, xdp_flags); exit(0); } -/* simple per-protocol drop counter - */ static void poll_stats(int interval, int ifindex) { unsigned int nr_cpus = bpf_num_possible_cpus(); @@ -112,14 +114,21 @@ int main(int argc, char **argv) return 1; } - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); - if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { - printf("link set xdp fd failed\n"); + printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); return 1; } + /* Loading dummy XDP prog on out-device */ + if (set_link_xdp_fd(ifindex_out, prog_fd[1], + (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { + printf("WARN: link set xdp fd failed on %d\n", ifindex_out); + ifindex_out_xdp_dummy_attached = false; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + /* bpf redirect port */ ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); if (ret) { |