diff options
Diffstat (limited to 'samples/bpf/xdpsock_user.c')
-rw-r--r-- | samples/bpf/xdpsock_user.c | 366 |
1 files changed, 344 insertions, 22 deletions
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 49d7a6ad7e39..aa50864e4415 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -14,6 +14,7 @@ #include <arpa/inet.h> #include <locale.h> #include <net/ethernet.h> +#include <netinet/ether.h> #include <net/if.h> #include <poll.h> #include <pthread.h> @@ -30,12 +31,16 @@ #include <sys/un.h> #include <time.h> #include <unistd.h> +#include <sched.h> #include <bpf/libbpf.h> #include <bpf/xsk.h> #include <bpf/bpf.h> #include "xdpsock.h" +/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #ifndef SOL_XDP #define SOL_XDP 283 #endif @@ -53,12 +58,27 @@ #define DEBUG_HEXDUMP 0 +#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ +#define VLAN_PRIO_SHIFT 13 +#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ +#define VLAN_VID__DEFAULT 1 +#define VLAN_PRI__DEFAULT 0 + +#define NSEC_PER_SEC 1000000000UL +#define NSEC_PER_USEC 1000 + +#define SCHED_PRI__DEFAULT 0 + typedef __u64 u64; typedef __u32 u32; typedef __u16 u16; typedef __u8 u8; static unsigned long prev_time; +static long tx_cycle_diff_min; +static long tx_cycle_diff_max; +static double tx_cycle_diff_ave; +static long tx_cycle_cnt; enum benchmark_type { BENCH_RXDROP = 0, @@ -78,14 +98,23 @@ static u32 opt_batch_size = 64; static int opt_pkt_count; static u16 opt_pkt_size = MIN_PKT_SIZE; static u32 opt_pkt_fill_pattern = 0x12345678; +static bool opt_vlan_tag; +static u16 opt_pkt_vlan_id = VLAN_VID__DEFAULT; +static u16 opt_pkt_vlan_pri = VLAN_PRI__DEFAULT; +static struct ether_addr opt_txdmac = {{ 0x3c, 0xfd, 0xfe, + 0x9e, 0x7f, 0x71 }}; +static struct ether_addr opt_txsmac = {{ 0xec, 0xb1, 0xd7, + 0x98, 0x3a, 0xc0 }}; static bool opt_extra_stats; static bool opt_quiet; static bool opt_app_stats; static const char *opt_irq_str = ""; static u32 irq_no; static int irqs_at_init = -1; +static u32 sequence; static int opt_poll; static int opt_interval = 1; +static int opt_retries = 3; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; static u32 opt_umem_flags; static int opt_unaligned_chunks; @@ -97,6 +126,27 @@ static u32 opt_num_xsks = 1; static u32 prog_id; static bool opt_busy_poll; static bool opt_reduced_cap; +static clockid_t opt_clock = CLOCK_MONOTONIC; +static unsigned long opt_tx_cycle_ns; +static int opt_schpolicy = SCHED_OTHER; +static int opt_schprio = SCHED_PRI__DEFAULT; +static bool opt_tstamp; + +struct vlan_ethhdr { + unsigned char h_dest[6]; + unsigned char h_source[6]; + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +#define PKTGEN_MAGIC 0xbe9be955 +struct pktgen_hdr { + __be32 pgh_magic; + __be32 seq_num; + __be32 tv_sec; + __be32 tv_usec; +}; struct xsk_ring_stats { unsigned long rx_npkts; @@ -153,15 +203,63 @@ struct xsk_socket_info { u32 outstanding_tx; }; +static const struct clockid_map { + const char *name; + clockid_t clockid; +} clockids_map[] = { + { "REALTIME", CLOCK_REALTIME }, + { "TAI", CLOCK_TAI }, + { "BOOTTIME", CLOCK_BOOTTIME }, + { "MONOTONIC", CLOCK_MONOTONIC }, + { NULL } +}; + +static const struct sched_map { + const char *name; + int policy; +} schmap[] = { + { "OTHER", SCHED_OTHER }, + { "FIFO", SCHED_FIFO }, + { NULL } +}; + static int num_socks; struct xsk_socket_info *xsks[MAX_SOCKS]; int sock; +static int get_clockid(clockid_t *id, const char *name) +{ + const struct clockid_map *clk; + + for (clk = clockids_map; clk->name; clk++) { + if (strcasecmp(clk->name, name) == 0) { + *id = clk->clockid; + return 0; + } + } + + return -1; +} + +static int get_schpolicy(int *policy, const char *name) +{ + const struct sched_map *sch; + + for (sch = schmap; sch->name; sch++) { + if (strcasecmp(sch->name, name) == 0) { + *policy = sch->policy; + return 0; + } + } + + return -1; +} + static unsigned long get_nsecs(void) { struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); + clock_gettime(opt_clock, &ts); return ts.tv_sec * 1000000000UL + ts.tv_nsec; } @@ -254,6 +352,15 @@ static void dump_app_stats(long dt) xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos; xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls; } + + if (opt_tx_cycle_ns) { + printf("\n%-18s %-10s %-10s %-10s %-10s %-10s\n", + "", "period", "min", "ave", "max", "cycle"); + printf("%-18s %-10lu %-10lu %-10lu %-10lu %-10lu\n", + "Cyclic TX", opt_tx_cycle_ns, tx_cycle_diff_min, + (long)(tx_cycle_diff_ave / tx_cycle_cnt), + tx_cycle_diff_max, tx_cycle_cnt); + } } static bool get_interrupt_number(void) @@ -737,29 +844,69 @@ static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, #define ETH_FCS_SIZE 4 -#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ - sizeof(struct udphdr)) +#define ETH_HDR_SIZE (opt_vlan_tag ? sizeof(struct vlan_ethhdr) : \ + sizeof(struct ethhdr)) +#define PKTGEN_HDR_SIZE (opt_tstamp ? sizeof(struct pktgen_hdr) : 0) +#define PKT_HDR_SIZE (ETH_HDR_SIZE + sizeof(struct iphdr) + \ + sizeof(struct udphdr) + PKTGEN_HDR_SIZE) +#define PKTGEN_HDR_OFFSET (ETH_HDR_SIZE + sizeof(struct iphdr) + \ + sizeof(struct udphdr)) +#define PKTGEN_SIZE_MIN (PKTGEN_HDR_OFFSET + sizeof(struct pktgen_hdr) + \ + ETH_FCS_SIZE) #define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) -#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) +#define IP_PKT_SIZE (PKT_SIZE - ETH_HDR_SIZE) #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) -#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) +#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - \ + (sizeof(struct udphdr) + PKTGEN_HDR_SIZE)) static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; static void gen_eth_hdr_data(void) { - struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + + struct pktgen_hdr *pktgen_hdr; + struct udphdr *udp_hdr; + struct iphdr *ip_hdr; + + if (opt_vlan_tag) { + struct vlan_ethhdr *veth_hdr = (struct vlan_ethhdr *)pkt_data; + u16 vlan_tci = 0; + + udp_hdr = (struct udphdr *)(pkt_data + + sizeof(struct vlan_ethhdr) + + sizeof(struct iphdr)); + ip_hdr = (struct iphdr *)(pkt_data + + sizeof(struct vlan_ethhdr)); + pktgen_hdr = (struct pktgen_hdr *)(pkt_data + + sizeof(struct vlan_ethhdr) + + sizeof(struct iphdr) + + sizeof(struct udphdr)); + /* ethernet & VLAN header */ + memcpy(veth_hdr->h_dest, &opt_txdmac, ETH_ALEN); + memcpy(veth_hdr->h_source, &opt_txsmac, ETH_ALEN); + veth_hdr->h_vlan_proto = htons(ETH_P_8021Q); + vlan_tci = opt_pkt_vlan_id & VLAN_VID_MASK; + vlan_tci |= (opt_pkt_vlan_pri << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; + veth_hdr->h_vlan_TCI = htons(vlan_tci); + veth_hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP); + } else { + struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; + + udp_hdr = (struct udphdr *)(pkt_data + + sizeof(struct ethhdr) + + sizeof(struct iphdr)); + ip_hdr = (struct iphdr *)(pkt_data + + sizeof(struct ethhdr)); + pktgen_hdr = (struct pktgen_hdr *)(pkt_data + sizeof(struct ethhdr) + - sizeof(struct iphdr)); - struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + - sizeof(struct ethhdr)); - struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; + sizeof(struct iphdr) + + sizeof(struct udphdr)); + /* ethernet header */ + memcpy(eth_hdr->h_dest, &opt_txdmac, ETH_ALEN); + memcpy(eth_hdr->h_source, &opt_txsmac, ETH_ALEN); + eth_hdr->h_proto = htons(ETH_P_IP); + } - /* ethernet header */ - memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); - memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); - eth_hdr->h_proto = htons(ETH_P_IP); /* IP header */ ip_hdr->version = IPVERSION; @@ -782,6 +929,9 @@ static void gen_eth_hdr_data(void) udp_hdr->dest = htons(0x1000); udp_hdr->len = htons(UDP_PKT_SIZE); + if (opt_tstamp) + pktgen_hdr->pgh_magic = htonl(PKTGEN_MAGIC); + /* UDP data */ memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, UDP_PKT_DATA_SIZE); @@ -905,6 +1055,7 @@ static struct option long_options[] = { {"xdp-skb", no_argument, 0, 'S'}, {"xdp-native", no_argument, 0, 'N'}, {"interval", required_argument, 0, 'n'}, + {"retries", required_argument, 0, 'O'}, {"zero-copy", no_argument, 0, 'z'}, {"copy", no_argument, 0, 'c'}, {"frame-size", required_argument, 0, 'f'}, @@ -913,10 +1064,20 @@ static struct option long_options[] = { {"shared-umem", no_argument, 0, 'M'}, {"force", no_argument, 0, 'F'}, {"duration", required_argument, 0, 'd'}, + {"clock", required_argument, 0, 'w'}, {"batch-size", required_argument, 0, 'b'}, {"tx-pkt-count", required_argument, 0, 'C'}, {"tx-pkt-size", required_argument, 0, 's'}, {"tx-pkt-pattern", required_argument, 0, 'P'}, + {"tx-vlan", no_argument, 0, 'V'}, + {"tx-vlan-id", required_argument, 0, 'J'}, + {"tx-vlan-pri", required_argument, 0, 'K'}, + {"tx-dmac", required_argument, 0, 'G'}, + {"tx-smac", required_argument, 0, 'H'}, + {"tx-cycle", required_argument, 0, 'T'}, + {"tstamp", no_argument, 0, 'y'}, + {"policy", required_argument, 0, 'W'}, + {"schpri", required_argument, 0, 'U'}, {"extra-stats", no_argument, 0, 'x'}, {"quiet", no_argument, 0, 'Q'}, {"app-stats", no_argument, 0, 'a'}, @@ -940,6 +1101,7 @@ static void usage(const char *prog) " -S, --xdp-skb=n Use XDP skb-mod\n" " -N, --xdp-native=n Enforce XDP native mode\n" " -n, --interval=n Specify statistics update interval (default 1 sec).\n" + " -O, --retries=n Specify time-out retries (1s interval) attempt (default 3).\n" " -z, --zero-copy Force zero-copy mode.\n" " -c, --copy Force copy mode.\n" " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" @@ -949,6 +1111,7 @@ static void usage(const char *prog) " -F, --force Force loading the XDP prog\n" " -d, --duration=n Duration in secs to run command.\n" " Default: forever.\n" + " -w, --clock=CLOCK Clock NAME (default MONOTONIC).\n" " -b, --batch-size=n Batch size for sending or receiving\n" " packets. Default: %d\n" " -C, --tx-pkt-count=n Number of packets to send.\n" @@ -957,6 +1120,15 @@ static void usage(const char *prog) " (Default: %d bytes)\n" " Min size: %d, Max size %d.\n" " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" + " -V, --tx-vlan Send VLAN tagged packets (For -t|--txonly)\n" + " -J, --tx-vlan-id=n Tx VLAN ID [1-4095]. Default: %d (For -V|--tx-vlan)\n" + " -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n" + " -G, --tx-dmac=<MAC> Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" + " -H, --tx-smac=<MAC> Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" + " -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n" + " -y, --tstamp Add time-stamp to packet (For -t|--txonly).\n" + " -W, --policy=POLICY Schedule policy. Default: SCHED_OTHER\n" + " -U, --schpri=n Schedule priority. Default: %d\n" " -x, --extra-stats Display extra statistics.\n" " -Q, --quiet Do not display any stats.\n" " -a, --app-stats Display application (syscall) statistics.\n" @@ -966,7 +1138,9 @@ static void usage(const char *prog) "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, - XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); + XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern, + VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT, + SCHED_PRI__DEFAULT); exit(EXIT_FAILURE); } @@ -978,7 +1152,8 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR", + c = getopt_long(argc, argv, + "Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:yW:U:xQaI:BR", long_options, &option_index); if (c == -1) break; @@ -1012,6 +1187,17 @@ static void parse_command_line(int argc, char **argv) case 'n': opt_interval = atoi(optarg); break; + case 'w': + if (get_clockid(&opt_clock, optarg)) { + fprintf(stderr, + "ERROR: Invalid clock %s. Default to CLOCK_MONOTONIC.\n", + optarg); + opt_clock = CLOCK_MONOTONIC; + } + break; + case 'O': + opt_retries = atoi(optarg); + break; case 'z': opt_xdp_bind_flags |= XDP_ZEROCOPY; break; @@ -1059,6 +1245,49 @@ static void parse_command_line(int argc, char **argv) case 'P': opt_pkt_fill_pattern = strtol(optarg, NULL, 16); break; + case 'V': + opt_vlan_tag = true; + break; + case 'J': + opt_pkt_vlan_id = atoi(optarg); + break; + case 'K': + opt_pkt_vlan_pri = atoi(optarg); + break; + case 'G': + if (!ether_aton_r(optarg, + (struct ether_addr *)&opt_txdmac)) { + fprintf(stderr, "Invalid dmac address:%s\n", + optarg); + usage(basename(argv[0])); + } + break; + case 'H': + if (!ether_aton_r(optarg, + (struct ether_addr *)&opt_txsmac)) { + fprintf(stderr, "Invalid smac address:%s\n", + optarg); + usage(basename(argv[0])); + } + break; + case 'T': + opt_tx_cycle_ns = atoi(optarg); + opt_tx_cycle_ns *= NSEC_PER_USEC; + break; + case 'y': + opt_tstamp = 1; + break; + case 'W': + if (get_schpolicy(&opt_schpolicy, optarg)) { + fprintf(stderr, + "ERROR: Invalid policy %s. Default to SCHED_OTHER.\n", + optarg); + opt_schpolicy = SCHED_OTHER; + } + break; + case 'U': + opt_schprio = atoi(optarg); + break; case 'x': opt_extra_stats = 1; break; @@ -1264,16 +1493,22 @@ static void rx_drop_all(void) } } -static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) +static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, + int batch_size, unsigned long tx_ns) { - u32 idx; + u32 idx, tv_sec, tv_usec; unsigned int i; while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size) { complete_tx_only(xsk, batch_size); if (benchmark_done) - return; + return 0; + } + + if (opt_tstamp) { + tv_sec = (u32)(tx_ns / NSEC_PER_SEC); + tv_usec = (u32)((tx_ns % NSEC_PER_SEC) / 1000); } for (i = 0; i < batch_size; i++) { @@ -1281,6 +1516,21 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) idx + i); tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size; tx_desc->len = PKT_SIZE; + + if (opt_tstamp) { + struct pktgen_hdr *pktgen_hdr; + u64 addr = tx_desc->addr; + char *pkt; + + pkt = xsk_umem__get_data(xsk->umem->buffer, addr); + pktgen_hdr = (struct pktgen_hdr *)(pkt + PKTGEN_HDR_OFFSET); + + pktgen_hdr->seq_num = htonl(sequence++); + pktgen_hdr->tv_sec = htonl(tv_sec); + pktgen_hdr->tv_usec = htonl(tv_usec); + + hex_dump(pkt, PKT_SIZE, addr); + } } xsk_ring_prod__submit(&xsk->tx, batch_size); @@ -1289,6 +1539,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) *frame_nb += batch_size; *frame_nb %= NUM_FRAMES; complete_tx_only(xsk, batch_size); + + return batch_size; } static inline int get_batch_size(int pkt_cnt) @@ -1315,23 +1567,48 @@ static void complete_tx_only_all(void) pending = !!xsks[i]->outstanding_tx; } } - } while (pending); + sleep(1); + } while (pending && opt_retries-- > 0); } static void tx_only_all(void) { struct pollfd fds[MAX_SOCKS] = {}; u32 frame_nb[MAX_SOCKS] = {}; + unsigned long next_tx_ns = 0; int pkt_cnt = 0; int i, ret; + if (opt_poll && opt_tx_cycle_ns) { + fprintf(stderr, + "Error: --poll and --tx-cycles are both set\n"); + return; + } + for (i = 0; i < num_socks; i++) { fds[0].fd = xsk_socket__fd(xsks[i]->xsk); fds[0].events = POLLOUT; } + if (opt_tx_cycle_ns) { + /* Align Tx time to micro-second boundary */ + next_tx_ns = (get_nsecs() / NSEC_PER_USEC + 1) * + NSEC_PER_USEC; + next_tx_ns += opt_tx_cycle_ns; + + /* Initialize periodic Tx scheduling variance */ + tx_cycle_diff_min = 1000000000; + tx_cycle_diff_max = 0; + tx_cycle_diff_ave = 0.0; + } + while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { int batch_size = get_batch_size(pkt_cnt); + unsigned long tx_ns = 0; + struct timespec next; + int tx_cnt = 0; + long diff; + int err; if (opt_poll) { for (i = 0; i < num_socks; i++) @@ -1344,13 +1621,43 @@ static void tx_only_all(void) continue; } + if (opt_tx_cycle_ns) { + next.tv_sec = next_tx_ns / NSEC_PER_SEC; + next.tv_nsec = next_tx_ns % NSEC_PER_SEC; + err = clock_nanosleep(opt_clock, TIMER_ABSTIME, &next, NULL); + if (err) { + if (err != EINTR) + fprintf(stderr, + "clock_nanosleep failed. Err:%d errno:%d\n", + err, errno); + break; + } + + /* Measure periodic Tx scheduling variance */ + tx_ns = get_nsecs(); + diff = tx_ns - next_tx_ns; + if (diff < tx_cycle_diff_min) + tx_cycle_diff_min = diff; + + if (diff > tx_cycle_diff_max) + tx_cycle_diff_max = diff; + + tx_cycle_diff_ave += (double)diff; + tx_cycle_cnt++; + } else if (opt_tstamp) { + tx_ns = get_nsecs(); + } + for (i = 0; i < num_socks; i++) - tx_only(xsks[i], &frame_nb[i], batch_size); + tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size, tx_ns); - pkt_cnt += batch_size; + pkt_cnt += tx_cnt; if (benchmark_done) break; + + if (opt_tx_cycle_ns) + next_tx_ns += opt_tx_cycle_ns; } if (opt_pkt_count) @@ -1581,6 +1888,7 @@ int main(int argc, char **argv) struct __user_cap_data_struct data[2] = { { 0 } }; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; bool rx = false, tx = false; + struct sched_param schparam; struct xsk_umem_info *umem; struct bpf_object *obj; int xsks_map_fd = 0; @@ -1643,6 +1951,9 @@ int main(int argc, char **argv) apply_setsockopt(xsks[i]); if (opt_bench == BENCH_TXONLY) { + if (opt_tstamp && opt_pkt_size < PKTGEN_SIZE_MIN) + opt_pkt_size = PKTGEN_SIZE_MIN; + gen_eth_hdr_data(); for (i = 0; i < NUM_FRAMES; i++) @@ -1682,6 +1993,16 @@ int main(int argc, char **argv) prev_time = get_nsecs(); start_time = prev_time; + /* Configure sched priority for better wake-up accuracy */ + memset(&schparam, 0, sizeof(schparam)); + schparam.sched_priority = opt_schprio; + ret = sched_setscheduler(0, opt_schpolicy, &schparam); + if (ret) { + fprintf(stderr, "Error(%d) in setting priority(%d): %s\n", + errno, opt_schprio, strerror(errno)); + goto out; + } + if (opt_bench == BENCH_RXDROP) rx_drop_all(); else if (opt_bench == BENCH_TXONLY) @@ -1689,6 +2010,7 @@ int main(int argc, char **argv) else l2fwd_all(); +out: benchmark_done = true; if (!opt_quiet) |