diff options
Diffstat (limited to 'tools')
39 files changed, 4567 insertions, 143 deletions
diff --git a/tools/Makefile b/tools/Makefile index 9dfede37c8ff..df6fcb293fbc 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -19,7 +19,7 @@ help: @echo ' kvm_stat - top-like utility for displaying kvm statistics' @echo ' leds - LEDs tools' @echo ' liblockdep - user-space wrapper for kernel locking-validator' - @echo ' net - misc networking tools' + @echo ' bpf - misc BPF tools' @echo ' perf - Linux performance measurement and analysis tool' @echo ' selftests - various kernel selftests' @echo ' spi - spi tools' @@ -57,7 +57,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest spi usb virtio vm net iio gpio objtool leds: FORCE +cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds: FORCE $(call descend,$@) liblockdep: FORCE @@ -91,7 +91,7 @@ kvm_stat: FORCE all: acpi cgroup cpupower gpio hv firewire liblockdep \ perf selftests spi turbostat usb \ - virtio vm net x86_energy_perf_policy \ + virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat acpi_install: @@ -100,7 +100,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install net_install objtool_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -124,7 +124,7 @@ kvm_stat_install: install: acpi_install cgroup_install cpupower_install gpio_install \ hv_install firewire_install iio_install liblockdep_install \ perf_install selftests_install turbostat_install usb_install \ - virtio_install vm_install net_install x86_energy_perf_policy_install \ + virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install acpi_clean: @@ -133,7 +133,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean gpio_clean objtool_clean leds_clean: +cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -169,7 +169,7 @@ build_clean: clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \ - vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ + vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ gpio_clean objtool_clean leds_clean diff --git a/tools/net/Makefile b/tools/bpf/Makefile index ddf888010652..325a35e1c28e 100644 --- a/tools/net/Makefile +++ b/tools/bpf/Makefile @@ -3,6 +3,7 @@ prefix = /usr CC = gcc LEX = flex YACC = bison +MAKE = make CFLAGS += -Wall -O2 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include @@ -13,7 +14,7 @@ CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include %.lex.c: %.l $(LEX) -o $@ $< -all : bpf_jit_disasm bpf_dbg bpf_asm +all: bpf_jit_disasm bpf_dbg bpf_asm bpftool bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm' bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl @@ -26,10 +27,21 @@ bpf_asm : LDLIBS = bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o bpf_exp.lex.o : bpf_exp.yacc.c -clean : +clean: bpftool_clean rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.* -install : +install: bpftool_install install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm install bpf_dbg $(prefix)/bin/bpf_dbg install bpf_asm $(prefix)/bin/bpf_asm + +bpftool: + $(MAKE) -C bpftool + +bpftool_install: + $(MAKE) -C bpftool install + +bpftool_clean: + $(MAKE) -C bpftool clean + +.PHONY: bpftool FORCE diff --git a/tools/net/bpf_asm.c b/tools/bpf/bpf_asm.c index c15aef097b04..c15aef097b04 100644 --- a/tools/net/bpf_asm.c +++ b/tools/bpf/bpf_asm.c diff --git a/tools/net/bpf_dbg.c b/tools/bpf/bpf_dbg.c index 4f254bcc4423..4f254bcc4423 100644 --- a/tools/net/bpf_dbg.c +++ b/tools/bpf/bpf_dbg.c diff --git a/tools/net/bpf_exp.l b/tools/bpf/bpf_exp.l index bd83149e7be0..bd83149e7be0 100644 --- a/tools/net/bpf_exp.l +++ b/tools/bpf/bpf_exp.l diff --git a/tools/net/bpf_exp.y b/tools/bpf/bpf_exp.y index 56ba1de50784..56ba1de50784 100644 --- a/tools/net/bpf_exp.y +++ b/tools/bpf/bpf_exp.y diff --git a/tools/net/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index 422d9abd666a..422d9abd666a 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile new file mode 100644 index 000000000000..bde77d7c4390 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -0,0 +1,34 @@ +include ../../../scripts/Makefile.include +include ../../../scripts/utilities.mak + +INSTALL ?= install +RM ?= rm -f + +# Make the path relative to DESTDIR, not prefix +ifndef DESTDIR +prefix?=$(HOME) +endif +mandir ?= $(prefix)/share/man +man8dir = $(mandir)/man8 + +MAN8_RST = $(wildcard *.rst) + +_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST)) +DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8)) + +man: man8 +man8: $(DOC_MAN8) + +$(OUTPUT)%.8: %.rst + rst2man $< > $@ + +clean: + $(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8) + +install: man + $(call QUIET_INSTALL, Documentation-man) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \ + $(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir); + +.PHONY: man man8 clean install +.DEFAULT_GOAL := man diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst new file mode 100644 index 000000000000..ff63e89e4b6c --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -0,0 +1,110 @@ +================ +bpftool-map +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF maps +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** **map** *COMMAND* + + *COMMANDS* := + { show | dump | update | lookup | getnext | delete | pin | help } + +MAP COMMANDS +============= + +| **bpftool** map show [*MAP*] +| **bpftool** map dump *MAP* +| **bpftool** map update *MAP* key *BYTES* value *VALUE* [*UPDATE_FLAGS*] +| **bpftool** map lookup *MAP* key *BYTES* +| **bpftool** map getnext *MAP* [key *BYTES*] +| **bpftool** map delete *MAP* key *BYTES* +| **bpftool** map pin *MAP* *FILE* +| **bpftool** map help +| +| *MAP* := { id MAP_ID | pinned FILE } +| *VALUE* := { BYTES | MAP | PROGRAM } +| *UPDATE_FLAGS* := { any | exist | noexist } + +DESCRIPTION +=========== + **bpftool map show** [*MAP*] + Show information about loaded maps. If *MAP* is specified + show information only about given map, otherwise list all + maps currently loaded on the system. + + Output will start with map ID followed by map type and + zero or more named attributes (depending on kernel version). + + **bpftool map dump** *MAP* + Dump all entries in a given *MAP*. + + **bpftool map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] + Update map entry for a given *KEY*. + + *UPDATE_FLAGS* can be one of: **any** update existing entry + or add if doesn't exit; **exist** update only if entry already + exists; **noexist** update only if entry doesn't exist. + + **bpftool map lookup** *MAP* **key** *BYTES* + Lookup **key** in the map. + + **bpftool map getnext** *MAP* [**key** *BYTES*] + Get next key. If *key* is not specified, get first key. + + **bpftool map delete** *MAP* **key** *BYTES* + Remove entry from the map. + + **bpftool map pin** *MAP* *FILE* + Pin map *MAP* as *FILE*. + + Note: *FILE* must be located in *bpffs* mount. + + **bpftool map help** + Print short help message. + +EXAMPLES +======== +**# bpftool map show** +:: + + 10: hash name some_map flags 0x0 + key 4B value 8B max_entries 2048 memlock 167936B + +**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04** + +**# bpftool map lookup id 10 key 0 1 2 3** + +:: + + key: 00 01 02 03 value: 00 01 02 03 04 05 06 07 + + +**# bpftool map dump id 10** +:: + + key: 00 01 02 03 value: 00 01 02 03 04 05 06 07 + key: 0d 00 07 00 value: 02 00 00 00 01 02 03 04 + Found 2 elements + +**# bpftool map getnext id 10 key 0 1 2 3** +:: + + key: + 00 01 02 03 + next key: + 0d 00 07 00 + +| +| **# mount -t bpf none /sys/fs/bpf/** +| **# bpftool map pin id 10 /sys/fs/bpf/map** +| **# bpftool map del pinned /sys/fs/bpf/map key 13 00 07 00** + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst new file mode 100644 index 000000000000..57fc4b9924ea --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -0,0 +1,79 @@ +================ +bpftool-prog +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF progs +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + +| **bpftool** prog show [*PROG*] +| **bpftool** prog dump xlated *PROG* file *FILE* +| **bpftool** prog dump jited *PROG* [file *FILE*] [opcodes] +| **bpftool** prog pin *PROG* *FILE* +| **bpftool** prog help +| +| *PROG* := { id *PROG_ID* | pinned *FILE* | tag *PROG_TAG* } + +DESCRIPTION +=========== + **bpftool prog show** [*PROG*] + Show information about loaded programs. If *PROG* is + specified show information only about given program, otherwise + list all programs currently loaded on the system. + + Output will start with program ID followed by program type and + zero or more named attributes (depending on kernel version). + + **bpftool prog dump xlated** *PROG* **file** *FILE* + Dump eBPF instructions of the program from the kernel to a + file. + + **bpftool prog dump jited** *PROG* [**file** *FILE*] [**opcodes**] + Dump jited image (host machine code) of the program. + If *FILE* is specified image will be written to a file, + otherwise it will be disassembled and printed to stdout. + + **opcodes** controls if raw opcodes will be printed. + + **bpftool prog pin** *PROG* *FILE* + Pin program *PROG* as *FILE*. + + Note: *FILE* must be located in *bpffs* mount. + + **bpftool prog help** + Print short help message. + +EXAMPLES +======== +**# bpftool prog show** +:: + + 10: xdp name some_prog tag 00:5a:3d:21:23:62:0c:8b + loaded_at Sep 29/20:11 uid 0 + xlated 528B jited 370B memlock 4096B map_ids 10 + +| +| **# bpftool prog dump xlated id 10 file /tmp/t** +| **# ls -l /tmp/t** +| -rw------- 1 root root 560 Jul 22 01:42 /tmp/t + +| +| **# bpftool prog dum jited pinned /sys/fs/bpf/prog** + +:: + + push %rbp + mov %rsp,%rbp + sub $0x228,%rsp + sub $0x28,%rbp + mov %rbx,0x0(%rbp) + + + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst new file mode 100644 index 000000000000..f1df1893fb54 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -0,0 +1,34 @@ +================ +BPFTOOL +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF programs and maps +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** *OBJECT* { *COMMAND* | help } + + **bpftool** batch file *FILE* + + *OBJECT* := { **map** | **program** } + + *MAP-COMMANDS* := + { show | dump | update | lookup | getnext | delete | pin | help } + + *PROG-COMMANDS* := { show | dump jited | dump xlated | pin | help } + +DESCRIPTION +=========== + *bpftool* allows for inspection and simple modification of BPF objects + on the system. + + Note that format of the output of all tools is not guaranteed to be + stable and should not be depended upon. + +SEE ALSO +======== + **bpftool-map**\ (8), **bpftool-prog**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile new file mode 100644 index 000000000000..8705ee44664d --- /dev/null +++ b/tools/bpf/bpftool/Makefile @@ -0,0 +1,86 @@ +include ../../scripts/Makefile.include + +include ../../scripts/utilities.mak + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +ifneq ($(objtree),) +#$(info Determined 'objtree' to be $(objtree)) +endif + +ifneq ($(OUTPUT),) +#$(info Determined 'OUTPUT' to be $(OUTPUT)) +# Adding $(OUTPUT) as a directory to look for source files, +# because use generated output files as sources dependency +# for flex/bison parsers. +VPATH += $(OUTPUT) +export VPATH +endif + +ifeq ($(V),1) + Q = +else + Q = @ +endif + +BPF_DIR = $(srctree)/tools/lib/bpf/ + +ifneq ($(OUTPUT),) + BPF_PATH=$(OUTPUT) +else + BPF_PATH=$(BPF_DIR) +endif + +LIBBPF = $(BPF_PATH)libbpf.a + +$(LIBBPF): FORCE + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) + +$(LIBBPF)-clean: + $(call QUIET_CLEAN, libbpf) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null + +prefix = /usr + +CC = gcc + +CFLAGS += -O2 +CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow +CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf +LIBS = -lelf -lbfd -lopcodes $(LIBBPF) + +include $(wildcard *.d) + +all: $(OUTPUT)bpftool + +SRCS=$(wildcard *.c) +OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) + +$(OUTPUT)bpftool: $(OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +$(OUTPUT)%.o: %.c + $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< + +clean: $(LIBBPF)-clean + $(call QUIET_CLEAN, bpftool) + $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + +install: + install $(OUTPUT)bpftool $(prefix)/sbin/bpftool + +doc: + $(Q)$(MAKE) -C Documentation/ + +doc-install: + $(Q)$(MAKE) -C Documentation/ install + +FORCE: + +.PHONY: all clean FORCE +.DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c new file mode 100644 index 000000000000..df8396a0c400 --- /dev/null +++ b/tools/bpf/bpftool/common.c @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski <kubakici@wp.pl> */ + +#include <errno.h> +#include <libgen.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/limits.h> +#include <linux/magic.h> +#include <sys/types.h> +#include <sys/vfs.h> + +#include <bpf.h> + +#include "main.h" + +static bool is_bpffs(char *path) +{ + struct statfs st_fs; + + if (statfs(path, &st_fs) < 0) + return false; + + return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; +} + +int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) +{ + enum bpf_obj_type type; + int fd; + + fd = bpf_obj_get(path); + if (fd < 0) { + err("bpf obj get (%s): %s\n", path, + errno == EACCES && !is_bpffs(dirname(path)) ? + "directory not in bpf file system (bpffs)" : + strerror(errno)); + return -1; + } + + type = get_fd_type(fd); + if (type < 0) { + close(fd); + return type; + } + if (type != exp_type) { + err("incorrect object type: %s\n", get_fd_type_name(type)); + close(fd); + return -1; + } + + return fd; +} + +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +{ + unsigned int id; + char *endptr; + int err; + int fd; + + if (!is_prefix(*argv, "id")) { + err("expected 'id' got %s\n", *argv); + return -1; + } + NEXT_ARG(); + + id = strtoul(*argv, &endptr, 0); + if (*endptr) { + err("can't parse %s as ID\n", *argv); + return -1; + } + NEXT_ARG(); + + if (argc != 1) + usage(); + + fd = get_fd_by_id(id); + if (fd < 0) { + err("can't get prog by id (%u): %s\n", id, strerror(errno)); + return -1; + } + + err = bpf_obj_pin(fd, *argv); + close(fd); + if (err) { + err("can't pin the object (%s): %s\n", *argv, + errno == EACCES && !is_bpffs(dirname(*argv)) ? + "directory not in bpf file system (bpffs)" : + strerror(errno)); + return -1; + } + + return 0; +} + +const char *get_fd_type_name(enum bpf_obj_type type) +{ + static const char * const names[] = { + [BPF_OBJ_UNKNOWN] = "unknown", + [BPF_OBJ_PROG] = "prog", + [BPF_OBJ_MAP] = "map", + }; + + if (type < 0 || type >= ARRAY_SIZE(names) || !names[type]) + return names[BPF_OBJ_UNKNOWN]; + + return names[type]; +} + +int get_fd_type(int fd) +{ + char path[PATH_MAX]; + char buf[512]; + ssize_t n; + + snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd); + + n = readlink(path, buf, sizeof(buf)); + if (n < 0) { + err("can't read link type: %s\n", strerror(errno)); + return -1; + } + if (n == sizeof(path)) { + err("can't read link type: path too long!\n"); + return -1; + } + + if (strstr(buf, "bpf-map")) + return BPF_OBJ_MAP; + else if (strstr(buf, "bpf-prog")) + return BPF_OBJ_PROG; + + return BPF_OBJ_UNKNOWN; +} + +char *get_fdinfo(int fd, const char *key) +{ + char path[PATH_MAX]; + char *line = NULL; + size_t line_n = 0; + ssize_t n; + FILE *fdi; + + snprintf(path, sizeof(path), "/proc/%d/fdinfo/%d", getpid(), fd); + + fdi = fopen(path, "r"); + if (!fdi) { + err("can't open fdinfo: %s\n", strerror(errno)); + return NULL; + } + + while ((n = getline(&line, &line_n, fdi))) { + char *value; + int len; + + if (!strstr(line, key)) + continue; + + fclose(fdi); + + value = strchr(line, '\t'); + if (!value || !value[1]) { + err("malformed fdinfo!?\n"); + free(line); + return NULL; + } + value++; + + len = strlen(value); + memmove(line, value, len); + line[len - 1] = '\0'; + + return line; + } + + err("key '%s' not found in fdinfo\n", key); + free(line); + fclose(fdi); + return NULL; +} diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c new file mode 100644 index 000000000000..70e480b59e9d --- /dev/null +++ b/tools/bpf/bpftool/jit_disasm.c @@ -0,0 +1,87 @@ +/* + * Based on: + * + * Minimal BPF JIT image disassembler + * + * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for + * debugging or verification purposes. + * + * Copyright 2013 Daniel Borkmann <daniel@iogearbox.net> + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <unistd.h> +#include <string.h> +#include <bfd.h> +#include <dis-asm.h> +#include <sys/types.h> +#include <sys/stat.h> + +static void get_exec_path(char *tpath, size_t size) +{ + ssize_t len; + char *path; + + snprintf(tpath, size, "/proc/%d/exe", (int) getpid()); + tpath[size - 1] = 0; + + path = strdup(tpath); + assert(path); + + len = readlink(path, tpath, size - 1); + assert(len > 0); + tpath[len] = 0; + + free(path); +} + +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) +{ + disassembler_ftype disassemble; + struct disassemble_info info; + int count, i, pc = 0; + char tpath[256]; + bfd *bfdf; + + if (!len) + return; + + memset(tpath, 0, sizeof(tpath)); + get_exec_path(tpath, sizeof(tpath)); + + bfdf = bfd_openr(tpath, NULL); + assert(bfdf); + assert(bfd_check_format(bfdf, bfd_object)); + + init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); + info.arch = bfd_get_arch(bfdf); + info.mach = bfd_get_mach(bfdf); + info.buffer = image; + info.buffer_length = len; + + disassemble_init_for_target(&info); + + disassemble = disassembler(bfdf); + assert(disassemble); + + do { + printf("%4x:\t", pc); + + count = disassemble(pc, &info); + + if (opcodes) { + printf("\n\t"); + for (i = 0; i < count; ++i) + printf("%02x ", (uint8_t) image[pc + i]); + } + printf("\n"); + + pc += count; + } while (count > 0 && pc < len); + + bfd_close(bfdf); +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c new file mode 100644 index 000000000000..e02d00d6e00b --- /dev/null +++ b/tools/bpf/bpftool/main.c @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski <kubakici@wp.pl> */ + +#include <bfd.h> +#include <ctype.h> +#include <errno.h> +#include <linux/bpf.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <bpf.h> + +#include "main.h" + +const char *bin_name; +static int last_argc; +static char **last_argv; +static int (*last_do_help)(int argc, char **argv); + +void usage(void) +{ + last_do_help(last_argc - 1, last_argv + 1); + + exit(-1); +} + +static int do_help(int argc, char **argv) +{ + fprintf(stderr, + "Usage: %s OBJECT { COMMAND | help }\n" + " %s batch file FILE\n" + "\n" + " OBJECT := { prog | map }\n", + bin_name, bin_name); + + return 0; +} + +int cmd_select(const struct cmd *cmds, int argc, char **argv, + int (*help)(int argc, char **argv)) +{ + unsigned int i; + + last_argc = argc; + last_argv = argv; + last_do_help = help; + + if (argc < 1 && cmds[0].func) + return cmds[0].func(argc, argv); + + for (i = 0; cmds[i].func; i++) + if (is_prefix(*argv, cmds[i].cmd)) + return cmds[i].func(argc - 1, argv + 1); + + help(argc - 1, argv + 1); + + return -1; +} + +bool is_prefix(const char *pfx, const char *str) +{ + if (!pfx) + return false; + if (strlen(str) < strlen(pfx)) + return false; + + return !memcmp(str, pfx, strlen(pfx)); +} + +void print_hex(void *arg, unsigned int n, const char *sep) +{ + unsigned char *data = arg; + unsigned int i; + + for (i = 0; i < n; i++) { + const char *pfx = ""; + + if (!i) + /* nothing */; + else if (!(i % 16)) + printf("\n"); + else if (!(i % 8)) + printf(" "); + else + pfx = sep; + + printf("%s%02hhx", i ? pfx : "", data[i]); + } +} + +static int do_batch(int argc, char **argv); + +static const struct cmd cmds[] = { + { "help", do_help }, + { "batch", do_batch }, + { "prog", do_prog }, + { "map", do_map }, + { 0 } +}; + +static int do_batch(int argc, char **argv) +{ + unsigned int lines = 0; + char *n_argv[4096]; + char buf[65536]; + int n_argc; + FILE *fp; + int err; + + if (argc < 2) { + err("too few parameters for batch\n"); + return -1; + } else if (!is_prefix(*argv, "file")) { + err("expected 'file', got: %s\n", *argv); + return -1; + } else if (argc > 2) { + err("too many parameters for batch\n"); + return -1; + } + NEXT_ARG(); + + fp = fopen(*argv, "r"); + if (!fp) { + err("Can't open file (%s): %s\n", *argv, strerror(errno)); + return -1; + } + + while (fgets(buf, sizeof(buf), fp)) { + if (strlen(buf) == sizeof(buf) - 1) { + errno = E2BIG; + break; + } + + n_argc = 0; + n_argv[n_argc] = strtok(buf, " \t\n"); + + while (n_argv[n_argc]) { + n_argc++; + if (n_argc == ARRAY_SIZE(n_argv)) { + err("line %d has too many arguments, skip\n", + lines); + n_argc = 0; + break; + } + n_argv[n_argc] = strtok(NULL, " \t\n"); + } + + if (!n_argc) + continue; + + err = cmd_select(cmds, n_argc, n_argv, do_help); + if (err) + goto err_close; + + lines++; + } + + if (errno && errno != ENOENT) { + perror("reading batch file failed"); + err = -1; + } else { + info("processed %d lines\n", lines); + err = 0; + } +err_close: + fclose(fp); + + return err; +} + +int main(int argc, char **argv) +{ + bin_name = argv[0]; + NEXT_ARG(); + + bfd_init(); + + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h new file mode 100644 index 000000000000..85d2d7870a58 --- /dev/null +++ b/tools/bpf/bpftool/main.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski <kubakici@wp.pl> */ + +#ifndef __BPF_TOOL_H +#define __BPF_TOOL_H + +#include <stdbool.h> +#include <stdio.h> +#include <linux/bpf.h> + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +#define err(msg...) fprintf(stderr, "Error: " msg) +#define warn(msg...) fprintf(stderr, "Warning: " msg) +#define info(msg...) fprintf(stderr, msg) + +#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) + +#define min(a, b) \ + ({ typeof(a) _a = (a); typeof(b) _b = (b); _a > _b ? _b : _a; }) +#define max(a, b) \ + ({ typeof(a) _a = (a); typeof(b) _b = (b); _a < _b ? _b : _a; }) + +#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) +#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) +#define BAD_ARG() ({ err("what is '%s'?\n", *argv); -1; }) + +#define BPF_TAG_FMT "%02hhx:%02hhx:%02hhx:%02hhx:" \ + "%02hhx:%02hhx:%02hhx:%02hhx" + +#define HELP_SPEC_PROGRAM \ + "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" + +enum bpf_obj_type { + BPF_OBJ_UNKNOWN, + BPF_OBJ_PROG, + BPF_OBJ_MAP, +}; + +extern const char *bin_name; + +bool is_prefix(const char *pfx, const char *str); +void print_hex(void *arg, unsigned int n, const char *sep); +void usage(void) __attribute__((noreturn)); + +struct cmd { + const char *cmd; + int (*func)(int argc, char **argv); +}; + +int cmd_select(const struct cmd *cmds, int argc, char **argv, + int (*help)(int argc, char **argv)); + +int get_fd_type(int fd); +const char *get_fd_type_name(enum bpf_obj_type type); +char *get_fdinfo(int fd, const char *key); +int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); + +int do_prog(int argc, char **arg); +int do_map(int argc, char **arg); + +int prog_parse_fd(int *argc, char ***argv); + +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); + +#endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c new file mode 100644 index 000000000000..0528a5379e6c --- /dev/null +++ b/tools/bpf/bpftool/map.c @@ -0,0 +1,744 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski <kubakici@wp.pl> */ + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <bpf.h> + +#include "main.h" + +static const char * const map_type_name[] = { + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", +}; + +static unsigned int get_possible_cpus(void) +{ + static unsigned int result; + char buf[128]; + long int n; + char *ptr; + int fd; + + if (result) + return result; + + fd = open("/sys/devices/system/cpu/possible", O_RDONLY); + if (fd < 0) { + err("can't open sysfs possible cpus\n"); + exit(-1); + } + + n = read(fd, buf, sizeof(buf)); + if (n < 2) { + err("can't read sysfs possible cpus\n"); + exit(-1); + } + close(fd); + + if (n == sizeof(buf)) { + err("read sysfs possible cpus overflow\n"); + exit(-1); + } + + ptr = buf; + n = 0; + while (*ptr && *ptr != '\n') { + unsigned int a, b; + + if (sscanf(ptr, "%u-%u", &a, &b) == 2) { + n += b - a + 1; + + ptr = strchr(ptr, '-') + 1; + } else if (sscanf(ptr, "%u", &a) == 1) { + n++; + } else { + assert(0); + } + + while (isdigit(*ptr)) + ptr++; + if (*ptr == ',') + ptr++; + } + + result = n; + + return result; +} + +static bool map_is_per_cpu(__u32 type) +{ + return type == BPF_MAP_TYPE_PERCPU_HASH || + type == BPF_MAP_TYPE_PERCPU_ARRAY || + type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + +static bool map_is_map_of_maps(__u32 type) +{ + return type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + type == BPF_MAP_TYPE_HASH_OF_MAPS; +} + +static bool map_is_map_of_progs(__u32 type) +{ + return type == BPF_MAP_TYPE_PROG_ARRAY; +} + +static void *alloc_value(struct bpf_map_info *info) +{ + if (map_is_per_cpu(info->type)) + return malloc(info->value_size * get_possible_cpus()); + else + return malloc(info->value_size); +} + +static int map_parse_fd(int *argc, char ***argv) +{ + int fd; + + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + err("can't parse %s as ID\n", **argv); + return -1; + } + NEXT_ARGP(); + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) + err("get map by id (%u): %s\n", id, strerror(errno)); + return fd; + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + return open_obj_pinned_any(path, BPF_OBJ_MAP); + } + + err("expected 'id' or 'pinned', got: '%s'?\n", **argv); + return -1; +} + +static int +map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) +{ + int err; + int fd; + + fd = map_parse_fd(argc, argv); + if (fd < 0) + return -1; + + err = bpf_obj_get_info_by_fd(fd, info, info_len); + if (err) { + err("can't get map info: %s\n", strerror(errno)); + close(fd); + return err; + } + + return fd; +} + +static void print_entry(struct bpf_map_info *info, unsigned char *key, + unsigned char *value) +{ + if (!map_is_per_cpu(info->type)) { + bool single_line, break_names; + + break_names = info->key_size > 16 || info->value_size > 16; + single_line = info->key_size + info->value_size <= 24 && + !break_names; + + printf("key:%c", break_names ? '\n' : ' '); + print_hex(key, info->key_size, " "); + + printf(single_line ? " " : "\n"); + + printf("value:%c", break_names ? '\n' : ' '); + print_hex(value, info->value_size, " "); + + printf("\n"); + } else { + unsigned int i, n; + + n = get_possible_cpus(); + + printf("key:\n"); + print_hex(key, info->key_size, " "); + printf("\n"); + for (i = 0; i < n; i++) { + printf("value (CPU %02d):%c", + i, info->value_size > 16 ? '\n' : ' '); + print_hex(value + i * info->value_size, + info->value_size, " "); + printf("\n"); + } + } +} + +static char **parse_bytes(char **argv, const char *name, unsigned char *val, + unsigned int n) +{ + unsigned int i = 0; + char *endptr; + + while (i < n && argv[i]) { + val[i] = strtoul(argv[i], &endptr, 0); + if (*endptr) { + err("error parsing byte: %s\n", argv[i]); + break; + } + i++; + } + + if (i != n) { + err("%s expected %d bytes got %d\n", name, n, i); + return NULL; + } + + return argv + i; +} + +static int parse_elem(char **argv, struct bpf_map_info *info, + void *key, void *value, __u32 key_size, __u32 value_size, + __u32 *flags, __u32 **value_fd) +{ + if (!*argv) { + if (!key && !value) + return 0; + err("did not find %s\n", key ? "key" : "value"); + return -1; + } + + if (is_prefix(*argv, "key")) { + if (!key) { + if (key_size) + err("duplicate key\n"); + else + err("unnecessary key\n"); + return -1; + } + + argv = parse_bytes(argv + 1, "key", key, key_size); + if (!argv) + return -1; + + return parse_elem(argv, info, NULL, value, key_size, value_size, + flags, value_fd); + } else if (is_prefix(*argv, "value")) { + int fd; + + if (!value) { + if (value_size) + err("duplicate value\n"); + else + err("unnecessary value\n"); + return -1; + } + + argv++; + + if (map_is_map_of_maps(info->type)) { + int argc = 2; + + if (value_size != 4) { + err("value smaller than 4B for map in map?\n"); + return -1; + } + if (!argv[0] || !argv[1]) { + err("not enough value arguments for map in map\n"); + return -1; + } + + fd = map_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + *value_fd = value; + **value_fd = fd; + } else if (map_is_map_of_progs(info->type)) { + int argc = 2; + + if (value_size != 4) { + err("value smaller than 4B for map of progs?\n"); + return -1; + } + if (!argv[0] || !argv[1]) { + err("not enough value arguments for map of progs\n"); + return -1; + } + + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + *value_fd = value; + **value_fd = fd; + } else { + argv = parse_bytes(argv, "value", value, value_size); + if (!argv) + return -1; + } + + return parse_elem(argv, info, key, NULL, key_size, value_size, + flags, NULL); + } else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") || + is_prefix(*argv, "exist")) { + if (!flags) { + err("flags specified multiple times: %s\n", *argv); + return -1; + } + + if (is_prefix(*argv, "any")) + *flags = BPF_ANY; + else if (is_prefix(*argv, "noexist")) + *flags = BPF_NOEXIST; + else if (is_prefix(*argv, "exist")) + *flags = BPF_EXIST; + + return parse_elem(argv + 1, info, key, value, key_size, + value_size, NULL, value_fd); + } + + err("expected key or value, got: %s\n", *argv); + return -1; +} + +static int show_map_close(int fd, struct bpf_map_info *info) +{ + char *memlock; + + memlock = get_fdinfo(fd, "memlock"); + close(fd); + + printf("%u: ", info->id); + if (info->type < ARRAY_SIZE(map_type_name)) + printf("%s ", map_type_name[info->type]); + else + printf("type %u ", info->type); + + if (*info->name) + printf("name %s ", info->name); + + printf("flags 0x%x\n", info->map_flags); + printf("\tkey %uB value %uB max_entries %u", + info->key_size, info->value_size, info->max_entries); + + if (memlock) + printf(" memlock %sB", memlock); + free(memlock); + + printf("\n"); + + return 0; +} + +static int do_show(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + __u32 id = 0; + int err; + int fd; + + if (argc == 2) { + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + return show_map_close(fd, &info); + } + + if (argc) + return BAD_ARG(); + + while (true) { + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + err("can't get next map: %s\n", strerror(errno)); + if (errno == EINVAL) + err("kernel too old?\n"); + return -1; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + err("can't get map by id (%u): %s\n", + id, strerror(errno)); + return -1; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err("can't get map info: %s\n", strerror(errno)); + close(fd); + return -1; + } + + show_map_close(fd, &info); + } + + return errno == ENOENT ? 0 : -1; +} + +static int do_dump(int argc, char **argv) +{ + void *key, *value, *prev_key; + unsigned int num_elems = 0; + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int err; + int fd; + + if (argc != 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) { + err("Dumping maps of maps and program maps not supported\n"); + close(fd); + return -1; + } + + key = malloc(info.key_size); + value = alloc_value(&info); + if (!key || !value) { + err("mem alloc failed\n"); + err = -1; + goto exit_free; + } + + prev_key = NULL; + while (true) { + err = bpf_map_get_next_key(fd, prev_key, key); + if (err) { + if (errno == ENOENT) + err = 0; + break; + } + + if (!bpf_map_lookup_elem(fd, key, value)) { + print_entry(&info, key, value); + } else { + info("can't lookup element with key: "); + print_hex(key, info.key_size, " "); + printf("\n"); + } + + prev_key = key; + num_elems++; + } + + printf("Found %u element%s\n", num_elems, num_elems != 1 ? "s" : ""); + +exit_free: + free(key); + free(value); + close(fd); + + return err; +} + +static int do_update(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + __u32 *value_fd = NULL; + __u32 flags = BPF_ANY; + void *key, *value; + int fd, err; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + key = malloc(info.key_size); + value = alloc_value(&info); + if (!key || !value) { + err("mem alloc failed"); + err = -1; + goto exit_free; + } + + err = parse_elem(argv, &info, key, value, info.key_size, + info.value_size, &flags, &value_fd); + if (err) + goto exit_free; + + err = bpf_map_update_elem(fd, key, value, flags); + if (err) { + err("update failed: %s\n", strerror(errno)); + goto exit_free; + } + +exit_free: + if (value_fd) + close(*value_fd); + free(key); + free(value); + close(fd); + + return err; +} + +static int do_lookup(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + void *key, *value; + int err; + int fd; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + key = malloc(info.key_size); + value = alloc_value(&info); + if (!key || !value) { + err("mem alloc failed"); + err = -1; + goto exit_free; + } + + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + if (err) + goto exit_free; + + err = bpf_map_lookup_elem(fd, key, value); + if (!err) { + print_entry(&info, key, value); + } else if (errno == ENOENT) { + printf("key:\n"); + print_hex(key, info.key_size, " "); + printf("\n\nNot found\n"); + } else { + err("lookup failed: %s\n", strerror(errno)); + } + +exit_free: + free(key); + free(value); + close(fd); + + return err; +} + +static int do_getnext(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + void *key, *nextkey; + int err; + int fd; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + key = malloc(info.key_size); + nextkey = malloc(info.key_size); + if (!key || !nextkey) { + err("mem alloc failed"); + err = -1; + goto exit_free; + } + + if (argc) { + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, + NULL, NULL); + if (err) + goto exit_free; + } else { + free(key); + key = NULL; + } + + err = bpf_map_get_next_key(fd, key, nextkey); + if (err) { + err("can't get next key: %s\n", strerror(errno)); + goto exit_free; + } + + if (key) { + printf("key:\n"); + print_hex(key, info.key_size, " "); + printf("\n"); + } else { + printf("key: None\n"); + } + + printf("next key:\n"); + print_hex(nextkey, info.key_size, " "); + printf("\n"); + +exit_free: + free(nextkey); + free(key); + close(fd); + + return err; +} + +static int do_delete(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + void *key; + int err; + int fd; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + key = malloc(info.key_size); + if (!key) { + err("mem alloc failed"); + err = -1; + goto exit_free; + } + + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + if (err) + goto exit_free; + + err = bpf_map_delete_elem(fd, key); + if (err) + err("delete failed: %s\n", strerror(errno)); + +exit_free: + free(key); + close(fd); + + return err; +} + +static int do_pin(int argc, char **argv) +{ + return do_pin_any(argc, argv, bpf_map_get_fd_by_id); +} + +static int do_help(int argc, char **argv) +{ + fprintf(stderr, + "Usage: %s %s show [MAP]\n" + " %s %s dump MAP\n" + " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n" + " %s %s lookup MAP key BYTES\n" + " %s %s getnext MAP [key BYTES]\n" + " %s %s delete MAP key BYTES\n" + " %s %s pin MAP FILE\n" + " %s %s help\n" + "\n" + " MAP := { id MAP_ID | pinned FILE }\n" + " " HELP_SPEC_PROGRAM "\n" + " VALUE := { BYTES | MAP | PROG }\n" + " UPDATE_FLAGS := { any | exist | noexist }\n" + "", + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "help", do_help }, + { "dump", do_dump }, + { "update", do_update }, + { "lookup", do_lookup }, + { "getnext", do_getnext }, + { "delete", do_delete }, + { "pin", do_pin }, + { 0 } +}; + +int do_map(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c new file mode 100644 index 000000000000..421ba89ce86a --- /dev/null +++ b/tools/bpf/bpftool/prog.c @@ -0,0 +1,456 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski <kubakici@wp.pl> */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <bpf.h> + +#include "main.h" + +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", +}; + +static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) +{ + struct timespec real_time_ts, boot_time_ts; + time_t wallclock_secs; + struct tm load_tm; + + buf[--size] = '\0'; + + if (clock_gettime(CLOCK_REALTIME, &real_time_ts) || + clock_gettime(CLOCK_BOOTTIME, &boot_time_ts)) { + perror("Can't read clocks"); + snprintf(buf, size, "%llu", nsecs / 1000000000); + return; + } + + wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + + nsecs / 1000000000; + + if (!localtime_r(&wallclock_secs, &load_tm)) { + snprintf(buf, size, "%llu", nsecs / 1000000000); + return; + } + + strftime(buf, size, "%b %d/%H:%M", &load_tm); +} + +static int prog_fd_by_tag(unsigned char *tag) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + unsigned int id = 0; + int err; + int fd; + + while (true) { + err = bpf_prog_get_next_id(id, &id); + if (err) { + err("%s\n", strerror(errno)); + return -1; + } + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) { + err("can't get prog by id (%u): %s\n", + id, strerror(errno)); + return -1; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err("can't get prog info (%u): %s\n", + id, strerror(errno)); + close(fd); + return -1; + } + + if (!memcmp(tag, info.tag, BPF_TAG_SIZE)) + return fd; + + close(fd); + } +} + +int prog_parse_fd(int *argc, char ***argv) +{ + int fd; + + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + err("can't parse %s as ID\n", **argv); + return -1; + } + NEXT_ARGP(); + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) + err("get by id (%u): %s\n", id, strerror(errno)); + return fd; + } else if (is_prefix(**argv, "tag")) { + unsigned char tag[BPF_TAG_SIZE]; + + NEXT_ARGP(); + + if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2, + tag + 3, tag + 4, tag + 5, tag + 6, tag + 7) + != BPF_TAG_SIZE) { + err("can't parse tag\n"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_tag(tag); + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + return open_obj_pinned_any(path, BPF_OBJ_PROG); + } + + err("expected 'id', 'tag' or 'pinned', got: '%s'?\n", **argv); + return -1; +} + +static void show_prog_maps(int fd, u32 num_maps) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + __u32 map_ids[num_maps]; + unsigned int i; + int err; + + info.nr_map_ids = num_maps; + info.map_ids = ptr_to_u64(map_ids); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err || !info.nr_map_ids) + return; + + printf(" map_ids "); + for (i = 0; i < info.nr_map_ids; i++) + printf("%u%s", map_ids[i], + i == info.nr_map_ids - 1 ? "" : ","); +} + +static int show_prog(int fd) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + char *memlock; + int err; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err("can't get prog info: %s\n", strerror(errno)); + return -1; + } + + printf("%u: ", info.id); + if (info.type < ARRAY_SIZE(prog_type_name)) + printf("%s ", prog_type_name[info.type]); + else + printf("type %u ", info.type); + + if (*info.name) + printf("name %s ", info.name); + + printf("tag "); + print_hex(info.tag, BPF_TAG_SIZE, ":"); + printf("\n"); + + if (info.load_time) { + char buf[32]; + + print_boot_time(info.load_time, buf, sizeof(buf)); + + /* Piggy back on load_time, since 0 uid is a valid one */ + printf("\tloaded_at %s uid %u\n", buf, info.created_by_uid); + } + + printf("\txlated %uB", info.xlated_prog_len); + + if (info.jited_prog_len) + printf(" jited %uB", info.jited_prog_len); + else + printf(" not jited"); + + memlock = get_fdinfo(fd, "memlock"); + if (memlock) + printf(" memlock %sB", memlock); + free(memlock); + + if (info.nr_map_ids) + show_prog_maps(fd, info.nr_map_ids); + + printf("\n"); + + return 0; +} + +static int do_show(int argc, char **argv) +{ __u32 id = 0; + int err; + int fd; + + if (argc == 2) { + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + return show_prog(fd); + } + + if (argc) + return BAD_ARG(); + + while (true) { + err = bpf_prog_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + err("can't get next program: %s\n", strerror(errno)); + if (errno == EINVAL) + err("kernel too old?\n"); + return -1; + } + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) { + err("can't get prog by id (%u): %s\n", + id, strerror(errno)); + return -1; + } + + err = show_prog(fd); + close(fd); + if (err) + return err; + } + + return 0; +} + +static int do_dump(int argc, char **argv) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + bool can_disasm = false; + unsigned int buf_size; + char *filepath = NULL; + bool opcodes = false; + unsigned char *buf; + __u32 *member_len; + __u64 *member_ptr; + ssize_t n; + int err; + int fd; + + if (is_prefix(*argv, "jited")) { + member_len = &info.jited_prog_len; + member_ptr = &info.jited_prog_insns; + can_disasm = true; + } else if (is_prefix(*argv, "xlated")) { + member_len = &info.xlated_prog_len; + member_ptr = &info.xlated_prog_insns; + } else { + err("expected 'xlated' or 'jited', got: %s\n", *argv); + return -1; + } + NEXT_ARG(); + + if (argc < 2) + usage(); + + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + if (is_prefix(*argv, "file")) { + NEXT_ARG(); + if (!argc) { + err("expected file path\n"); + return -1; + } + + filepath = *argv; + NEXT_ARG(); + } else if (is_prefix(*argv, "opcodes")) { + opcodes = true; + NEXT_ARG(); + } + + if (!filepath && !can_disasm) { + err("expected 'file' got %s\n", *argv); + return -1; + } + if (argc) { + usage(); + return -1; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err("can't get prog info: %s\n", strerror(errno)); + return -1; + } + + if (!*member_len) { + info("no instructions returned\n"); + close(fd); + return 0; + } + + buf_size = *member_len; + + buf = malloc(buf_size); + if (!buf) { + err("mem alloc failed\n"); + close(fd); + return -1; + } + + memset(&info, 0, sizeof(info)); + + *member_ptr = ptr_to_u64(buf); + *member_len = buf_size; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + close(fd); + if (err) { + err("can't get prog info: %s\n", strerror(errno)); + goto err_free; + } + + if (*member_len > buf_size) { + info("too many instructions returned\n"); + goto err_free; + } + + if (filepath) { + fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd < 0) { + err("can't open file %s: %s\n", filepath, + strerror(errno)); + goto err_free; + } + + n = write(fd, buf, *member_len); + close(fd); + if (n != *member_len) { + err("error writing output file: %s\n", + n < 0 ? strerror(errno) : "short write"); + goto err_free; + } + } else { + disasm_print_insn(buf, *member_len, opcodes); + } + + free(buf); + + return 0; + +err_free: + free(buf); + return -1; +} + +static int do_pin(int argc, char **argv) +{ + return do_pin_any(argc, argv, bpf_prog_get_fd_by_id); +} + +static int do_help(int argc, char **argv) +{ + fprintf(stderr, + "Usage: %s %s show [PROG]\n" + " %s %s dump xlated PROG file FILE\n" + " %s %s dump jited PROG [file FILE] [opcodes]\n" + " %s %s pin PROG FILE\n" + " %s %s help\n" + "\n" + " " HELP_SPEC_PROGRAM "\n" + "", + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "dump", do_dump }, + { "pin", do_pin }, + { 0 } +}; + +int do_prog(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 43ab5c402f98..fb4fb81ce5b0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -92,6 +92,7 @@ enum bpf_cmd { BPF_PROG_GET_FD_BY_ID, BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, }; enum bpf_map_type { @@ -143,11 +144,47 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command - * to the given target_fd cgroup the descendent cgroup will be able to - * override effective bpf program that was inherited from this cgroup +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -175,6 +212,11 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -188,6 +230,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ + char map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -210,6 +253,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; + char prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -249,6 +293,15 @@ union bpf_attr { __u32 info_len; __aligned_u64 info; } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -582,6 +635,12 @@ union bpf_attr { * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem + * + * int bpf_xdp_adjust_meta(xdp_md, delta) + * Adjust the xdp_md.data_meta by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data_meta + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -638,6 +697,9 @@ union bpf_attr { FN(redirect_map), \ FN(sk_redirect_map), \ FN(sock_map_update), \ + FN(xdp_adjust_meta), \ + FN(perf_event_read_value), \ + FN(perf_prog_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -715,7 +777,7 @@ struct __sk_buff { __u32 data_end; __u32 napi_id; - /* accessed by BPF_PROG_TYPE_sk_skb types */ + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ __u32 family; __u32 remote_ip4; /* Stored in network byte order */ __u32 local_ip4; /* Stored in network byte order */ @@ -723,6 +785,9 @@ struct __sk_buff { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; }; struct bpf_tunnel_key { @@ -783,6 +848,7 @@ enum xdp_action { struct xdp_md { __u32 data; __u32 data_end; + __u32 data_meta; }; enum sk_action { @@ -801,6 +867,11 @@ struct bpf_prog_info { __u32 xlated_prog_len; __aligned_u64 jited_prog_insns; __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -810,6 +881,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 1d6907d379c9..5128677e4117 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -46,6 +46,8 @@ # endif #endif +#define min(x, y) ((x) < (y) ? (x) : (y)) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -57,10 +59,11 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } -int bpf_create_map_node(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags, - int node) +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node) { + __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -70,6 +73,8 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size, attr.value_size = value_size; attr.max_entries = max_entries; attr.map_flags = map_flags; + memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; attr.numa_node = node; @@ -81,14 +86,23 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size, int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags) { - return bpf_create_map_node(map_type, key_size, value_size, + return bpf_create_map_node(map_type, NULL, key_size, value_size, max_entries, map_flags, -1); } -int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags) +{ + return bpf_create_map_node(map_type, name, key_size, value_size, + max_entries, map_flags, -1); +} + +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { + __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -99,6 +113,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, attr.inner_map_fd = inner_map_fd; attr.max_entries = max_entries; attr.map_flags = map_flags; + memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; attr.numa_node = node; @@ -107,19 +123,24 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags) +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags) { - return bpf_create_map_in_map_node(map_type, key_size, inner_map_fd, - max_entries, map_flags, -1); + return bpf_create_map_in_map_node(map_type, name, key_size, + inner_map_fd, max_entries, map_flags, + -1); } -int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, size_t log_buf_sz) +int bpf_load_program_name(enum bpf_prog_type type, const char *name, + const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) { int fd; union bpf_attr attr; + __u32 name_len = name ? strlen(name) : 0; bzero(&attr, sizeof(attr)); attr.prog_type = type; @@ -130,6 +151,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_size = 0; attr.log_level = 0; attr.kern_version = kern_version; + memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); if (fd >= 0 || !log_buf || !log_buf_sz) @@ -143,6 +165,15 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } +int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) +{ + return bpf_load_program_name(type, NULL, insns, insns_cnt, license, + kern_version, log_buf, log_buf_sz); +} + int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, int strict_alignment, const char *license, __u32 kern_version, @@ -260,6 +291,38 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type) return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); } +int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} + +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) +{ + union bpf_attr attr; + int ret; + + bzero(&attr, sizeof(attr)); + attr.query.target_fd = target_fd; + attr.query.attach_type = type; + attr.query.query_flags = query_flags; + attr.query.prog_cnt = *prog_cnt; + attr.query.prog_ids = ptr_to_u64(prog_ids); + + ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); + if (attach_flags) + *attach_flags = attr.query.attach_flags; + *prog_cnt = attr.query.prog_cnt; + return ret; +} + int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index b8ea5843c39e..6534889e2b2f 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,19 +24,28 @@ #include <linux/bpf.h> #include <stddef.h> -int bpf_create_map_node(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags, - int node); +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node); +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags); int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); -int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node); -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags); +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags); /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE 65536 +int bpf_load_program_name(enum bpf_prog_type type, const char *name, + const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz); int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, @@ -57,6 +66,7 @@ int bpf_obj_get(const char *pathname); int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); +int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); @@ -65,5 +75,6 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); int bpf_prog_get_fd_by_id(__u32 id); int bpf_map_get_fd_by_id(__u32 id); int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); - +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); #endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 35f6dfcdc565..5aa45f89da93 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -171,6 +171,7 @@ int libbpf_strerror(int err, char *buf, size_t size) struct bpf_program { /* Index in elf obj file, for relocation use. */ int idx; + char *name; char *section_name; struct bpf_insn *insns; size_t insns_cnt; @@ -283,6 +284,7 @@ static void bpf_program__exit(struct bpf_program *prog) prog->clear_priv = NULL; bpf_program__unload(prog); + zfree(&prog->name); zfree(&prog->section_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -293,26 +295,27 @@ static void bpf_program__exit(struct bpf_program *prog) } static int -bpf_program__init(void *data, size_t size, char *name, int idx, - struct bpf_program *prog) +bpf_program__init(void *data, size_t size, char *section_name, int idx, + struct bpf_program *prog) { if (size < sizeof(struct bpf_insn)) { - pr_warning("corrupted section '%s'\n", name); + pr_warning("corrupted section '%s'\n", section_name); return -EINVAL; } bzero(prog, sizeof(*prog)); - prog->section_name = strdup(name); + prog->section_name = strdup(section_name); if (!prog->section_name) { - pr_warning("failed to alloc name for prog %s\n", - name); + pr_warning("failed to alloc name for prog under section %s\n", + section_name); goto errout; } prog->insns = malloc(size); if (!prog->insns) { - pr_warning("failed to alloc insns for %s\n", name); + pr_warning("failed to alloc insns for prog under section %s\n", + section_name); goto errout; } prog->insns_cnt = size / sizeof(struct bpf_insn); @@ -331,12 +334,12 @@ errout: static int bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, - char *name, int idx) + char *section_name, int idx) { struct bpf_program prog, *progs; int nr_progs, err; - err = bpf_program__init(data, size, name, idx, &prog); + err = bpf_program__init(data, size, section_name, idx, &prog); if (err) return err; @@ -350,8 +353,8 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, * is still valid, so don't need special treat for * bpf_close_object(). */ - pr_warning("failed to alloc a new program '%s'\n", - name); + pr_warning("failed to alloc a new program under section '%s'\n", + section_name); bpf_program__exit(&prog); return -ENOMEM; } @@ -364,6 +367,54 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, return 0; } +static int +bpf_object__init_prog_names(struct bpf_object *obj) +{ + Elf_Data *symbols = obj->efile.symbols; + struct bpf_program *prog; + size_t pi, si; + + for (pi = 0; pi < obj->nr_programs; pi++) { + char *name = NULL; + + prog = &obj->programs[pi]; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; + si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (sym.st_shndx != prog->idx) + continue; + + name = elf_strptr(obj->efile.elf, + obj->efile.strtabidx, + sym.st_name); + if (!name) { + pr_warning("failed to get sym name string for prog %s\n", + prog->section_name); + return -LIBBPF_ERRNO__LIBELF; + } + } + + if (!name) { + pr_warning("failed to find sym for prog %s\n", + prog->section_name); + return -EINVAL; + } + + prog->name = strdup(name); + if (!prog->name) { + pr_warning("failed to allocate memory for prog sym %s\n", + name); + return -ENOMEM; + } + } + + return 0; +} + static struct bpf_object *bpf_object__new(const char *path, void *obj_buf, size_t obj_buf_sz) @@ -528,31 +579,6 @@ bpf_object__init_kversion(struct bpf_object *obj, return 0; } -static int -bpf_object__validate_maps(struct bpf_object *obj) -{ - int i; - - /* - * If there's only 1 map, the only error case should have been - * catched in bpf_object__init_maps(). - */ - if (!obj->maps || !obj->nr_maps || (obj->nr_maps == 1)) - return 0; - - for (i = 1; i < obj->nr_maps; i++) { - const struct bpf_map *a = &obj->maps[i - 1]; - const struct bpf_map *b = &obj->maps[i]; - - if (b->offset - a->offset < sizeof(struct bpf_map_def)) { - pr_warning("corrupted map section in %s: map \"%s\" too small\n", - obj->path, a->name); - return -EINVAL; - } - } - return 0; -} - static int compare_bpf_map(const void *_a, const void *_b) { const struct bpf_map *a = _a; @@ -564,7 +590,7 @@ static int compare_bpf_map(const void *_a, const void *_b) static int bpf_object__init_maps(struct bpf_object *obj) { - int i, map_idx, nr_maps = 0; + int i, map_idx, map_def_sz, nr_maps = 0; Elf_Scn *scn; Elf_Data *data; Elf_Data *symbols = obj->efile.symbols; @@ -607,6 +633,15 @@ bpf_object__init_maps(struct bpf_object *obj) if (!nr_maps) return 0; + /* Assume equally sized map definitions */ + map_def_sz = data->d_size / nr_maps; + if (!data->d_size || (data->d_size % nr_maps) != 0) { + pr_warning("unable to determine map definition size " + "section %s, %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); + return -EINVAL; + } + obj->maps = calloc(nr_maps, sizeof(obj->maps[0])); if (!obj->maps) { pr_warning("alloc maps for object failed\n"); @@ -639,7 +674,7 @@ bpf_object__init_maps(struct bpf_object *obj) obj->efile.strtabidx, sym.st_name); obj->maps[map_idx].offset = sym.st_value; - if (sym.st_value + sizeof(struct bpf_map_def) > data->d_size) { + if (sym.st_value + map_def_sz > data->d_size) { pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", obj->path, map_name); return -EINVAL; @@ -653,12 +688,40 @@ bpf_object__init_maps(struct bpf_object *obj) pr_debug("map %d is \"%s\"\n", map_idx, obj->maps[map_idx].name); def = (struct bpf_map_def *)(data->d_buf + sym.st_value); - obj->maps[map_idx].def = *def; + /* + * If the definition of the map in the object file fits in + * bpf_map_def, copy it. Any extra fields in our version + * of bpf_map_def will default to zero as a result of the + * calloc above. + */ + if (map_def_sz <= sizeof(struct bpf_map_def)) { + memcpy(&obj->maps[map_idx].def, def, map_def_sz); + } else { + /* + * Here the map structure being read is bigger than what + * we expect, truncate if the excess bits are all zero. + * If they are not zero, reject this map as + * incompatible. + */ + char *b; + for (b = ((char *)def) + sizeof(struct bpf_map_def); + b < ((char *)def) + map_def_sz; b++) { + if (*b != 0) { + pr_warning("maps section in %s: \"%s\" " + "has unrecognized, non-zero " + "options\n", + obj->path, map_name); + return -EINVAL; + } + } + memcpy(&obj->maps[map_idx].def, def, + sizeof(struct bpf_map_def)); + } map_idx++; } qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map); - return bpf_object__validate_maps(obj); + return 0; } static int bpf_object__elf_collect(struct bpf_object *obj) @@ -766,8 +829,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } - if (obj->efile.maps_shndx >= 0) + if (obj->efile.maps_shndx >= 0) { err = bpf_object__init_maps(obj); + if (err) + goto out; + } + err = bpf_object__init_prog_names(obj); out: return err; } @@ -870,11 +937,12 @@ bpf_object__create_maps(struct bpf_object *obj) struct bpf_map_def *def = &obj->maps[i].def; int *pfd = &obj->maps[i].fd; - *pfd = bpf_create_map(def->type, - def->key_size, - def->value_size, - def->max_entries, - 0); + *pfd = bpf_create_map_name(def->type, + obj->maps[i].name, + def->key_size, + def->value_size, + def->max_entries, + def->map_flags); if (*pfd < 0) { size_t j; int err = *pfd; @@ -982,7 +1050,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } static int -load_program(enum bpf_prog_type type, struct bpf_insn *insns, +load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns, int insns_cnt, char *license, u32 kern_version, int *pfd) { int ret; @@ -995,8 +1063,8 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns, if (!log_buf) pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); - ret = bpf_load_program(type, insns, insns_cnt, license, - kern_version, log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_load_program_name(type, name, insns, insns_cnt, license, + kern_version, log_buf, BPF_LOG_BUF_SIZE); if (ret >= 0) { *pfd = ret; @@ -1021,9 +1089,9 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns, if (type != BPF_PROG_TYPE_KPROBE) { int fd; - fd = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, - insns_cnt, license, kern_version, - NULL, 0); + fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name, + insns, insns_cnt, license, + kern_version, NULL, 0); if (fd >= 0) { close(fd); ret = -LIBBPF_ERRNO__PROGTYPE; @@ -1067,8 +1135,8 @@ bpf_program__load(struct bpf_program *prog, pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", prog->section_name, prog->instances.nr); } - err = load_program(prog->type, prog->insns, prog->insns_cnt, - license, kern_version, &fd); + err = load_program(prog->type, prog->name, prog->insns, + prog->insns_cnt, license, kern_version, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1096,7 +1164,8 @@ bpf_program__load(struct bpf_program *prog, continue; } - err = load_program(prog->type, result.new_insn_ptr, + err = load_program(prog->type, prog->name, + result.new_insn_ptr, result.new_insn_cnt, license, kern_version, &fd); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 7959086eb9c9..6e20003109e0 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -207,6 +207,7 @@ struct bpf_map_def { unsigned int key_size; unsigned int value_size; unsigned int max_entries; + unsigned int map_flags; }; /* diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f4b23d697448..924af8d79bde 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -15,9 +15,10 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ - test_pkt_md_access.o test_xdp_redirect.o sockmap_parse_prog.o sockmap_verdict_prog.o + test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ + sockmap_verdict_prog.o -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh +TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh include ../lib.mk @@ -34,8 +35,20 @@ $(BPFOBJ): force $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ CLANG ?= clang +LLC ?= llc + +PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) + +# Let newer LLVM versions transparently probe the kernel for availability +# of full BPF instruction set. +ifeq ($(PROBE),) + CPU ?= probe +else + CPU ?= generic +endif %.o: %.c $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ - -Wno-compare-distinct-pointer-types \ - -O2 -target bpf -c $< -o $@ + -Wno-compare-distinct-pointer-types \ + -O2 -target bpf -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 36fb9161b34a..e25dbf6038cf 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -62,6 +62,8 @@ static unsigned long long (*bpf_get_prandom_u32)(void) = (void *) BPF_FUNC_get_prandom_u32; static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_head; +static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) = + (void *) BPF_FUNC_xdp_adjust_meta; static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; @@ -70,6 +72,12 @@ static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = static int (*bpf_sock_map_update)(void *map, void *key, void *value, unsigned long long flags) = (void *) BPF_FUNC_sock_map_update; +static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, + void *buf, unsigned int buf_size) = + (void *) BPF_FUNC_perf_event_read_value; +static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, + unsigned int buf_size) = + (void *) BPF_FUNC_perf_prog_read_value; /* llvm builtin functions that eBPF C program may use to @@ -109,7 +117,47 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = static int (*bpf_skb_change_head)(void *, int len, int flags) = (void *) BPF_FUNC_skb_change_head; +/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ +#if defined(__TARGET_ARCH_x86) + #define bpf_target_x86 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_s930x) + #define bpf_target_s930x + #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm64) + #define bpf_target_arm64 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_mips) + #define bpf_target_mips + #define bpf_target_defined +#elif defined(__TARGET_ARCH_powerpc) + #define bpf_target_powerpc + #define bpf_target_defined +#elif defined(__TARGET_ARCH_sparc) + #define bpf_target_sparc + #define bpf_target_defined +#else + #undef bpf_target_defined +#endif + +/* Fall back to what the compiler says */ +#ifndef bpf_target_defined #if defined(__x86_64__) + #define bpf_target_x86 +#elif defined(__s390x__) + #define bpf_target_s930x +#elif defined(__aarch64__) + #define bpf_target_arm64 +#elif defined(__mips__) + #define bpf_target_mips +#elif defined(__powerpc__) + #define bpf_target_powerpc +#elif defined(__sparc__) + #define bpf_target_sparc +#endif +#endif + +#if defined(bpf_target_x86) #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) @@ -122,7 +170,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) -#elif defined(__s390x__) +#elif defined(bpf_target_s390x) #define PT_REGS_PARM1(x) ((x)->gprs[2]) #define PT_REGS_PARM2(x) ((x)->gprs[3]) @@ -135,7 +183,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->gprs[15]) #define PT_REGS_IP(x) ((x)->psw.addr) -#elif defined(__aarch64__) +#elif defined(bpf_target_arm64) #define PT_REGS_PARM1(x) ((x)->regs[0]) #define PT_REGS_PARM2(x) ((x)->regs[1]) @@ -148,7 +196,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->pc) -#elif defined(__mips__) +#elif defined(bpf_target_mips) #define PT_REGS_PARM1(x) ((x)->regs[4]) #define PT_REGS_PARM2(x) ((x)->regs[5]) @@ -161,7 +209,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->regs[29]) #define PT_REGS_IP(x) ((x)->cp0_epc) -#elif defined(__powerpc__) +#elif defined(bpf_target_powerpc) #define PT_REGS_PARM1(x) ((x)->gpr[3]) #define PT_REGS_PARM2(x) ((x)->gpr[4]) @@ -172,7 +220,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->nip) -#elif defined(__sparc__) +#elif defined(bpf_target_sparc) #define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) @@ -182,6 +230,8 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) + +/* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) #define PT_REGS_IP(x) ((x)->tpc) #else @@ -190,10 +240,10 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #endif -#ifdef __powerpc__ +#ifdef bpf_target_powerpc #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(__sparc__) +#elif bpf_target_sparc #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #else diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index e97565243d59..6fedb9fec36b 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -31,6 +31,10 @@ struct tlpm_node { uint8_t key[]; }; +static struct tlpm_node *tlpm_match(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits); + static struct tlpm_node *tlpm_add(struct tlpm_node *list, const uint8_t *key, size_t n_bits) @@ -38,9 +42,17 @@ static struct tlpm_node *tlpm_add(struct tlpm_node *list, struct tlpm_node *node; size_t n; + n = (n_bits + 7) / 8; + + /* 'overwrite' an equivalent entry if one already exists */ + node = tlpm_match(list, key, n_bits); + if (node && node->n_bits == n_bits) { + memcpy(node->key, key, n); + return list; + } + /* add new entry with @key/@n_bits to @list and return new head */ - n = (n_bits + 7) / 8; node = malloc(sizeof(*node) + n); assert(node); @@ -92,6 +104,34 @@ static struct tlpm_node *tlpm_match(struct tlpm_node *list, return best; } +static struct tlpm_node *tlpm_delete(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits) +{ + struct tlpm_node *best = tlpm_match(list, key, n_bits); + struct tlpm_node *node; + + if (!best || best->n_bits != n_bits) + return list; + + if (best == list) { + node = best->next; + free(best); + return node; + } + + for (node = list; node; node = node->next) { + if (node->next == best) { + node->next = best->next; + free(best); + return list; + } + } + /* should never get here */ + assert(0); + return list; +} + static void test_lpm_basic(void) { struct tlpm_node *list = NULL, *t1, *t2; @@ -114,6 +154,13 @@ static void test_lpm_basic(void) assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15)); assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16)); + list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16)); + + list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8); + assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + tlpm_clear(list); } @@ -158,7 +205,7 @@ static void test_lpm_order(void) static void test_lpm_map(int keysize) { - size_t i, j, n_matches, n_nodes, n_lookups; + size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups; struct tlpm_node *t, *list = NULL; struct bpf_lpm_trie_key *key; uint8_t *data, *value; @@ -170,6 +217,7 @@ static void test_lpm_map(int keysize) */ n_matches = 0; + n_matches_after_delete = 0; n_nodes = 1 << 8; n_lookups = 1 << 16; @@ -223,15 +271,54 @@ static void test_lpm_map(int keysize) } } + /* Remove the first half of the elements in the tlpm and the + * corresponding nodes from the bpf-lpm. Then run the same + * large number of random lookups in both and make sure they match. + * Note: we need to count the number of nodes actually inserted + * since there may have been duplicates. + */ + for (i = 0, t = list; t; i++, t = t->next) + ; + for (j = 0; j < i / 2; ++j) { + key->prefixlen = list->n_bits; + memcpy(key->data, list->key, keysize); + r = bpf_map_delete_elem(map, key); + assert(!r); + list = tlpm_delete(list, list->key, list->n_bits); + assert(list); + } + for (i = 0; i < n_lookups; ++i) { + for (j = 0; j < keysize; ++j) + data[j] = rand() & 0xff; + + t = tlpm_match(list, data, 8 * keysize); + + key->prefixlen = 8 * keysize; + memcpy(key->data, data, keysize); + r = bpf_map_lookup_elem(map, key, value); + assert(!r || errno == ENOENT); + assert(!t == !!r); + + if (t) { + ++n_matches_after_delete; + assert(t->n_bits == value[keysize]); + for (j = 0; j < t->n_bits; ++j) + assert((t->key[j / 8] & (1 << (7 - j % 8))) == + (value[j / 8] & (1 << (7 - j % 8)))); + } + } + close(map); tlpm_clear(list); /* With 255 random nodes in the map, we are pretty likely to match * something on every lookup. For statistics, use this: * - * printf(" nodes: %zu\n" - * "lookups: %zu\n" - * "matches: %zu\n", n_nodes, n_lookups, n_matches); + * printf(" nodes: %zu\n" + * " lookups: %zu\n" + * " matches: %zu\n" + * "matches(delete): %zu\n", + * n_nodes, n_lookups, n_matches, n_matches_after_delete); */ } @@ -331,6 +418,108 @@ static void test_lpm_ipaddr(void) close(map_fd_ipv6); } +static void test_lpm_delete(void) +{ + struct bpf_lpm_trie_key *key; + size_t key_size; + int map_fd; + __u64 value; + + key_size = sizeof(*key) + sizeof(__u32); + key = alloca(key_size); + + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, + key_size, sizeof(value), + 100, BPF_F_NO_PREALLOC); + assert(map_fd >= 0); + + /* Add nodes: + * 192.168.0.0/16 (1) + * 192.168.0.0/24 (2) + * 192.168.128.0/24 (3) + * 192.168.1.0/24 (4) + * + * (1) + * / \ + * (IM) (3) + * / \ + * (2) (4) + */ + value = 1; + key->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 2; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 3; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 4; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + /* remove non-existent node */ + key->prefixlen = 32; + inet_pton(AF_INET, "10.0.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && + errno == ENOENT); + + /* assert initial lookup */ + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 2); + + /* remove leaf node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 1); + + /* remove leaf (and intermediary) node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.1.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 1); + + /* remove root node */ + key->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.128.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 3); + + /* remove last node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.128.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && + errno == ENOENT); + + close(map_fd); +} + int main(void) { struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; @@ -353,6 +542,8 @@ int main(void) test_lpm_ipaddr(); + test_lpm_delete(); + printf("test_lpm: OK\n"); return 0; } diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 11ee25cea227..69427531408d 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -10,6 +10,7 @@ #include <string.h> #include <assert.h> #include <stdlib.h> +#include <time.h> #include <linux/types.h> typedef __u16 __sum16; @@ -19,6 +20,8 @@ typedef __u16 __sum16; #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <linux/filter.h> +#include <linux/unistd.h> #include <sys/wait.h> #include <sys/resource.h> @@ -273,16 +276,26 @@ static void test_bpf_obj_id(void) const int nr_iters = 2; const char *file = "./test_obj_id.o"; const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; + const char *expected_prog_name = "test_obj_id"; + const char *expected_map_name = "test_map_id"; + const __u64 nsec_per_sec = 1000000000; struct bpf_object *objs[nr_iters]; int prog_fds[nr_iters], map_fds[nr_iters]; /* +1 to test for the info_len returned by kernel */ struct bpf_prog_info prog_infos[nr_iters + 1]; struct bpf_map_info map_infos[nr_iters + 1]; + /* Each prog only uses one map. +1 to test nr_map_ids + * returned by kernel. + */ + __u32 map_ids[nr_iters + 1]; char jited_insns[128], xlated_insns[128], zeros[128]; __u32 i, next_id, info_len, nr_id_found, duration = 0; + struct timespec real_time_ts, boot_time_ts; int sysctl_fd, jit_enabled = 0, err = 0; __u64 array_value; + uid_t my_uid = getuid(); + time_t now, load_time; sysctl_fd = open(jit_sysctl, 0, O_RDONLY); if (sysctl_fd != -1) { @@ -307,6 +320,7 @@ static void test_bpf_obj_id(void) /* Check bpf_obj_get_info_by_fd() */ bzero(zeros, sizeof(zeros)); for (i = 0; i < nr_iters; i++) { + now = time(NULL); err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER, &objs[i], &prog_fds[i]); /* test_obj_id.o is a dumb prog. It should never fail @@ -316,6 +330,38 @@ static void test_bpf_obj_id(void) error_cnt++; assert(!err); + /* Insert a magic value to the map */ + map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); + assert(map_fds[i] >= 0); + err = bpf_map_update_elem(map_fds[i], &array_key, + &array_magic_value, 0); + assert(!err); + + /* Check getting map info */ + info_len = sizeof(struct bpf_map_info) * 2; + bzero(&map_infos[i], info_len); + err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i], + &info_len); + if (CHECK(err || + map_infos[i].type != BPF_MAP_TYPE_ARRAY || + map_infos[i].key_size != sizeof(__u32) || + map_infos[i].value_size != sizeof(__u64) || + map_infos[i].max_entries != 1 || + map_infos[i].map_flags != 0 || + info_len != sizeof(struct bpf_map_info) || + strcmp((char *)map_infos[i].name, expected_map_name), + "get-map-info(fd)", + "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", + err, errno, + map_infos[i].type, BPF_MAP_TYPE_ARRAY, + info_len, sizeof(struct bpf_map_info), + map_infos[i].key_size, + map_infos[i].value_size, + map_infos[i].max_entries, + map_infos[i].map_flags, + map_infos[i].name, expected_map_name)) + goto done; + /* Check getting prog info */ info_len = sizeof(struct bpf_prog_info) * 2; bzero(&prog_infos[i], info_len); @@ -325,8 +371,16 @@ static void test_bpf_obj_id(void) prog_infos[i].jited_prog_len = sizeof(jited_insns); prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns); prog_infos[i].xlated_prog_len = sizeof(xlated_insns); + prog_infos[i].map_ids = ptr_to_u64(map_ids + i); + prog_infos[i].nr_map_ids = 2; + err = clock_gettime(CLOCK_REALTIME, &real_time_ts); + assert(!err); + err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); + assert(!err); err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i], &info_len); + load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + + (prog_infos[i].load_time / nsec_per_sec); if (CHECK(err || prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER || info_len != sizeof(struct bpf_prog_info) || @@ -334,9 +388,14 @@ static void test_bpf_obj_id(void) (jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))) || !prog_infos[i].xlated_prog_len || - !memcmp(xlated_insns, zeros, sizeof(zeros)), + !memcmp(xlated_insns, zeros, sizeof(zeros)) || + load_time < now - 60 || load_time > now + 60 || + prog_infos[i].created_by_uid != my_uid || + prog_infos[i].nr_map_ids != 1 || + *(int *)prog_infos[i].map_ids != map_infos[i].id || + strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d\n", + "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), @@ -344,36 +403,12 @@ static void test_bpf_obj_id(void) prog_infos[i].jited_prog_len, prog_infos[i].xlated_prog_len, !!memcmp(jited_insns, zeros, sizeof(zeros)), - !!memcmp(xlated_insns, zeros, sizeof(zeros)))) - goto done; - - map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); - assert(map_fds[i] >= 0); - err = bpf_map_update_elem(map_fds[i], &array_key, - &array_magic_value, 0); - assert(!err); - - /* Check getting map info */ - info_len = sizeof(struct bpf_map_info) * 2; - bzero(&map_infos[i], info_len); - err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i], - &info_len); - if (CHECK(err || - map_infos[i].type != BPF_MAP_TYPE_ARRAY || - map_infos[i].key_size != sizeof(__u32) || - map_infos[i].value_size != sizeof(__u64) || - map_infos[i].max_entries != 1 || - map_infos[i].map_flags != 0 || - info_len != sizeof(struct bpf_map_info), - "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n", - err, errno, - map_infos[i].type, BPF_MAP_TYPE_ARRAY, - info_len, sizeof(struct bpf_map_info), - map_infos[i].key_size, - map_infos[i].value_size, - map_infos[i].max_entries, - map_infos[i].map_flags)) + !!memcmp(xlated_insns, zeros, sizeof(zeros)), + load_time, now, + prog_infos[i].created_by_uid, my_uid, + prog_infos[i].nr_map_ids, 1, + *(int *)prog_infos[i].map_ids, map_infos[i].id, + prog_infos[i].name, expected_prog_name)) goto done; } @@ -382,6 +417,7 @@ static void test_bpf_obj_id(void) next_id = 0; while (!bpf_prog_get_next_id(next_id, &next_id)) { struct bpf_prog_info prog_info = {}; + __u32 saved_map_id; int prog_fd; info_len = sizeof(prog_info); @@ -404,16 +440,33 @@ static void test_bpf_obj_id(void) nr_id_found++; + /* Negative test: + * prog_info.nr_map_ids = 1 + * prog_info.map_ids = NULL + */ + prog_info.nr_map_ids = 1; + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (CHECK(!err || errno != EFAULT, + "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)", + err, errno, EFAULT)) + break; + bzero(&prog_info, sizeof(prog_info)); + info_len = sizeof(prog_info); + + saved_map_id = *(int *)(prog_infos[i].map_ids); + prog_info.map_ids = prog_infos[i].map_ids; + prog_info.nr_map_ids = 2; err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); prog_infos[i].jited_prog_insns = 0; prog_infos[i].xlated_prog_insns = 0; CHECK(err || info_len != sizeof(struct bpf_prog_info) || - memcmp(&prog_info, &prog_infos[i], info_len), + memcmp(&prog_info, &prog_infos[i], info_len) || + *(int *)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%lu) memcmp %d\n", + "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), - memcmp(&prog_info, &prog_infos[i], info_len)); - + memcmp(&prog_info, &prog_infos[i], info_len), + *(int *)prog_info.map_ids, saved_map_id); close(prog_fd); } CHECK(nr_id_found != nr_iters, @@ -495,6 +548,75 @@ static void test_pkt_md_access(void) bpf_object__close(obj); } +static void test_obj_name(void) +{ + struct { + const char *name; + int success; + int expected_errno; + } tests[] = { + { "", 1, 0 }, + { "_123456789ABCDE", 1, 0 }, + { "_123456789ABCDEF", 0, EINVAL }, + { "_123456789ABCD\n", 0, EINVAL }, + }; + struct bpf_insn prog[] = { + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 duration = 0; + int i; + + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { + size_t name_len = strlen(tests[i].name) + 1; + union bpf_attr attr; + size_t ncopy; + int fd; + + /* test different attr.prog_name during BPF_PROG_LOAD */ + ncopy = name_len < sizeof(attr.prog_name) ? + name_len : sizeof(attr.prog_name); + bzero(&attr, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SCHED_CLS; + attr.insn_cnt = 2; + attr.insns = ptr_to_u64(prog); + attr.license = ptr_to_u64(""); + memcpy(attr.prog_name, tests[i].name, ncopy); + + fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + CHECK((tests[i].success && fd < 0) || + (!tests[i].success && fd != -1) || + (!tests[i].success && errno != tests[i].expected_errno), + "check-bpf-prog-name", + "fd %d(%d) errno %d(%d)\n", + fd, tests[i].success, errno, tests[i].expected_errno); + + if (fd != -1) + close(fd); + + /* test different attr.map_name during BPF_MAP_CREATE */ + ncopy = name_len < sizeof(attr.map_name) ? + name_len : sizeof(attr.map_name); + bzero(&attr, sizeof(attr)); + attr.map_type = BPF_MAP_TYPE_ARRAY; + attr.key_size = 4; + attr.value_size = 4; + attr.max_entries = 1; + attr.map_flags = 0; + memcpy(attr.map_name, tests[i].name, ncopy); + fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); + CHECK((tests[i].success && fd < 0) || + (!tests[i].success && fd != -1) || + (!tests[i].success && errno != tests[i].expected_errno), + "check-bpf-map-name", + "fd %d(%d) errno %d(%d)\n", + fd, tests[i].success, errno, tests[i].expected_errno); + + if (fd != -1) + close(fd); + } +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -507,6 +629,7 @@ int main(void) test_tcp_estats(); test_bpf_obj_id(); test_pkt_md_access(); + test_obj_name(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 26f3250bdcd2..cc91d0159f43 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6645,6 +6645,325 @@ static struct bpf_test tests[] = { .errstr = "BPF_END uses reserved fields", .result = REJECT, }, + { + "meta access, test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet, off=-8", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_xdp_adjust_meta), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R3 !read_ok", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test7", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test10", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_IMM(BPF_REG_5, 42), + BPF_MOV64_IMM(BPF_REG_6, 24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test11", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_IMM(BPF_REG_5, 42), + BPF_MOV64_IMM(BPF_REG_6, 24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test12", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "bpf_exit with invalid return code. test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0xffffffff)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0x3)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x2; 0x0)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test6", + .insns = { + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R0 is not a known value (ctx)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test7", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4), + BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has unknown scalar value", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -6692,7 +7011,7 @@ static int create_map_in_map(void) return inner_map_fd; } - outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, + outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, sizeof(int), inner_map_fd, 1, 0); if (outer_map_fd < 0) printf("Failed to create array of maps '%s'!\n", diff --git a/tools/testing/selftests/bpf/test_xdp_meta.c b/tools/testing/selftests/bpf/test_xdp_meta.c new file mode 100644 index 000000000000..8d0182650653 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_meta.c @@ -0,0 +1,53 @@ +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/pkt_cls.h> + +#include "bpf_helpers.h" + +#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) +#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) +#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem + +SEC("t") +int ing_cls(struct __sk_buff *ctx) +{ + __u8 *data, *data_meta, *data_end; + __u32 diff = 0; + + data_meta = ctx_ptr(ctx, data_meta); + data_end = ctx_ptr(ctx, data_end); + data = ctx_ptr(ctx, data); + + if (data + ETH_ALEN > data_end || + data_meta + round_up(ETH_ALEN, 4) > data) + return TC_ACT_SHOT; + + diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0]; + diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2]; + + return diff ? TC_ACT_SHOT : TC_ACT_OK; +} + +SEC("x") +int ing_xdp(struct xdp_md *ctx) +{ + __u8 *data, *data_meta, *data_end; + int ret; + + ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4)); + if (ret < 0) + return XDP_DROP; + + data_meta = ctx_ptr(ctx, data_meta); + data_end = ctx_ptr(ctx, data_end); + data = ctx_ptr(ctx, data); + + if (data + ETH_ALEN > data_end || + data_meta + round_up(ETH_ALEN, 4) > data) + return XDP_DROP; + + __builtin_memcpy(data_meta, data, ETH_ALEN); + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh new file mode 100755 index 000000000000..307aa856cee3 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +cleanup() +{ + if [ "$?" = "0" ]; then + echo "selftests: test_xdp_meta [PASS]"; + else + echo "selftests: test_xdp_meta [FAILED]"; + fi + + set +e + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null +} + +ip link set dev lo xdp off 2>/dev/null > /dev/null +if [ $? -ne 0 ];then + echo "selftests: [SKIP] Could not run test without the ip xdp support" + exit 0 +fi +set -e + +ip netns add ns1 +ip netns add ns2 + +trap cleanup 0 2 3 6 9 + +ip link add veth1 type veth peer name veth2 + +ip link set veth1 netns ns1 +ip link set veth2 netns ns2 + +ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1 +ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2 + +ip netns exec ns1 tc qdisc add dev veth1 clsact +ip netns exec ns2 tc qdisc add dev veth2 clsact + +ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t +ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t + +ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x +ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x + +ip netns exec ns1 ip link set dev veth1 up +ip netns exec ns2 ip link set dev veth2 up + +ip netns exec ns1 ping -c 1 10.1.1.22 +ip netns exec ns2 ping -c 1 10.1.1.11 + +exit 0 diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 57b5ff576240..e8c86c416ed0 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -15,6 +15,14 @@ check_err() fi } +# same but inverted -- used when command must fail for test to pass +check_fail() +{ + if [ $1 -eq 0 ]; then + ret=1 + fi +} + kci_add_dummy() { ip link add name "$devdummy" type dummy @@ -235,6 +243,196 @@ kci_test_addrlabel() echo "PASS: ipv6 addrlabel" } +kci_test_ifalias() +{ + ret=0 + namewant=$(uuidgen) + syspathname="/sys/class/net/$devdummy/ifalias" + + ip link set dev "$devdummy" alias "$namewant" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: cannot set interface alias of $devdummy to $namewant" + return 1 + fi + + ip link show "$devdummy" | grep -q "alias $namewant" + check_err $? + + if [ -r "$syspathname" ] ; then + read namehave < "$syspathname" + if [ "$namewant" != "$namehave" ]; then + echo "FAIL: did set ifalias $namewant but got $namehave" + return 1 + fi + + namewant=$(uuidgen) + echo "$namewant" > "$syspathname" + ip link show "$devdummy" | grep -q "alias $namewant" + check_err $? + + # sysfs interface allows to delete alias again + echo "" > "$syspathname" + + ip link show "$devdummy" | grep -q "alias $namewant" + check_fail $? + + for i in $(seq 1 100); do + uuidgen > "$syspathname" & + done + + wait + + # re-add the alias -- kernel should free mem when dummy dev is removed + ip link set dev "$devdummy" alias "$namewant" + check_err $? + fi + + if [ $ret -ne 0 ]; then + echo "FAIL: set interface alias $devdummy to $namewant" + return 1 + fi + + echo "PASS: set ifalias $namewant for $devdummy" +} + +kci_test_vrf() +{ + vrfname="test-vrf" + ret=0 + + ip link show type vrf 2>/dev/null + if [ $? -ne 0 ]; then + echo "SKIP: vrf: iproute2 too old" + return 0 + fi + + ip link add "$vrfname" type vrf table 10 + check_err $? + if [ $ret -ne 0 ];then + echo "FAIL: can't add vrf interface, skipping test" + return 0 + fi + + ip -br link show type vrf | grep -q "$vrfname" + check_err $? + if [ $ret -ne 0 ];then + echo "FAIL: created vrf device not found" + return 1 + fi + + ip link set dev "$vrfname" up + check_err $? + + ip link set dev "$devdummy" master "$vrfname" + check_err $? + ip link del dev "$vrfname" + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: vrf" + return 1 + fi + + echo "PASS: vrf" +} + +kci_test_encap_vxlan() +{ + ret=0 + vxlan="test-vxlan0" + vlan="test-vlan0" + testns="$1" + + ip netns exec "$testns" ip link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ + dev "$devdummy" dstport 4789 2>/dev/null + if [ $? -ne 0 ]; then + echo "FAIL: can't add vxlan interface, skipping test" + return 0 + fi + check_err $? + + ip netns exec "$testns" ip addr add 10.2.11.49/24 dev "$vxlan" + check_err $? + + ip netns exec "$testns" ip link set up dev "$vxlan" + check_err $? + + ip netns exec "$testns" ip link add link "$vxlan" name "$vlan" type vlan id 1 + check_err $? + + ip netns exec "$testns" ip link del "$vxlan" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: vxlan" + return 1 + fi + echo "PASS: vxlan" +} + +kci_test_encap_fou() +{ + ret=0 + name="test-fou" + testns="$1" + + ip fou help 2>&1 |grep -q 'Usage: ip fou' + if [ $? -ne 0 ];then + echo "SKIP: fou: iproute2 too old" + return 1 + fi + + ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null + if [ $? -ne 0 ];then + echo "FAIL: can't add fou port 7777, skipping test" + return 1 + fi + + ip netns exec "$testns" ip fou add port 8888 ipproto 4 + check_err $? + + ip netns exec "$testns" ip fou del port 9999 2>/dev/null + check_fail $? + + ip netns exec "$testns" ip fou del port 7777 + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: fou" + return 1 + fi + + echo "PASS: fou" +} + +# test various encap methods, use netns to avoid unwanted interference +kci_test_encap() +{ + testns="testns" + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP encap tests: cannot add net namespace $testns" + return 1 + fi + + ip netns exec "$testns" ip link set lo up + check_err $? + + ip netns exec "$testns" ip link add name "$devdummy" type dummy + check_err $? + ip netns exec "$testns" ip link set "$devdummy" up + check_err $? + + kci_test_encap_vxlan "$testns" + kci_test_encap_fou "$testns" + + ip netns del "$testns" +} + kci_test_rtnl() { kci_add_dummy @@ -249,6 +447,9 @@ kci_test_rtnl() kci_test_gre kci_test_bridge kci_test_addrlabel + kci_test_ifalias + kci_test_vrf + kci_test_encap kci_del_dummy } diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore new file mode 100644 index 000000000000..dc5f11faf530 --- /dev/null +++ b/tools/testing/vsock/.gitignore @@ -0,0 +1,2 @@ +*.d +vsock_diag_test diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile new file mode 100644 index 000000000000..66ba0924194d --- /dev/null +++ b/tools/testing/vsock/Makefile @@ -0,0 +1,9 @@ +all: test +test: vsock_diag_test +vsock_diag_test: vsock_diag_test.o timeout.o control.o + +CFLAGS += -g -O2 -Werror -Wall -I. -I../../include/uapi -I../../include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE +.PHONY: all test clean +clean: + ${RM} *.o *.d vsock_diag_test +-include *.d diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README new file mode 100644 index 000000000000..2cc6d7302db6 --- /dev/null +++ b/tools/testing/vsock/README @@ -0,0 +1,36 @@ +AF_VSOCK test suite +------------------- +These tests exercise net/vmw_vsock/ host<->guest sockets for VMware, KVM, and +Hyper-V. + +The following tests are available: + + * vsock_diag_test - vsock_diag.ko module for listing open sockets + +The following prerequisite steps are not automated and must be performed prior +to running tests: + +1. Build the kernel and these tests. +2. Install the kernel and tests on the host. +3. Install the kernel and tests inside the guest. +4. Boot the guest and ensure that the AF_VSOCK transport is enabled. + +Invoke test binaries in both directions as follows: + + # host=server, guest=client + (host)# $TEST_BINARY --mode=server \ + --control-port=1234 \ + --peer-cid=3 + (guest)# $TEST_BINARY --mode=client \ + --control-host=$HOST_IP \ + --control-port=1234 \ + --peer-cid=2 + + # host=client, guest=server + (guest)# $TEST_BINARY --mode=server \ + --control-port=1234 \ + --peer-cid=2 + (host)# $TEST_BINARY --mode=client \ + --control-port=$GUEST_IP \ + --control-port=1234 \ + --peer-cid=3 diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c new file mode 100644 index 000000000000..90fd47f0e422 --- /dev/null +++ b/tools/testing/vsock/control.c @@ -0,0 +1,219 @@ +/* Control socket for client/server test execution + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Stefan Hajnoczi <stefanha@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +/* The client and server may need to coordinate to avoid race conditions like + * the client attempting to connect to a socket that the server is not + * listening on yet. The control socket offers a communications channel for + * such coordination tasks. + * + * If the client calls control_expectln("LISTENING"), then it will block until + * the server calls control_writeln("LISTENING"). This provides a simple + * mechanism for coordinating between the client and the server. + */ + +#include <errno.h> +#include <netdb.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include "timeout.h" +#include "control.h" + +static int control_fd = -1; + +/* Open the control socket, either in server or client mode */ +void control_init(const char *control_host, + const char *control_port, + bool server) +{ + struct addrinfo hints = { + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *result = NULL; + struct addrinfo *ai; + int ret; + + ret = getaddrinfo(control_host, control_port, &hints, &result); + if (ret != 0) { + fprintf(stderr, "%s\n", gai_strerror(ret)); + exit(EXIT_FAILURE); + } + + for (ai = result; ai; ai = ai->ai_next) { + int fd; + int val = 1; + + fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if (fd < 0) + continue; + + if (!server) { + if (connect(fd, ai->ai_addr, ai->ai_addrlen) < 0) + goto next; + control_fd = fd; + printf("Control socket connected to %s:%s.\n", + control_host, control_port); + break; + } + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, + &val, sizeof(val)) < 0) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } + + if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0) + goto next; + if (listen(fd, 1) < 0) + goto next; + + printf("Control socket listening on %s:%s\n", + control_host, control_port); + fflush(stdout); + + control_fd = accept(fd, NULL, 0); + close(fd); + + if (control_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + printf("Control socket connection accepted...\n"); + break; + +next: + close(fd); + } + + if (control_fd < 0) { + fprintf(stderr, "Control socket initialization failed. Invalid address %s:%s?\n", + control_host, control_port); + exit(EXIT_FAILURE); + } + + freeaddrinfo(result); +} + +/* Free resources */ +void control_cleanup(void) +{ + close(control_fd); + control_fd = -1; +} + +/* Write a line to the control socket */ +void control_writeln(const char *str) +{ + ssize_t len = strlen(str); + ssize_t ret; + + timeout_begin(TIMEOUT); + + do { + ret = send(control_fd, str, len, MSG_MORE); + timeout_check("send"); + } while (ret < 0 && errno == EINTR); + + if (ret != len) { + perror("send"); + exit(EXIT_FAILURE); + } + + do { + ret = send(control_fd, "\n", 1, 0); + timeout_check("send"); + } while (ret < 0 && errno == EINTR); + + if (ret != 1) { + perror("send"); + exit(EXIT_FAILURE); + } + + timeout_end(); +} + +/* Return the next line from the control socket (without the trailing newline). + * + * The program terminates if a timeout occurs. + * + * The caller must free() the returned string. + */ +char *control_readln(void) +{ + char *buf = NULL; + size_t idx = 0; + size_t buflen = 0; + + timeout_begin(TIMEOUT); + + for (;;) { + ssize_t ret; + + if (idx >= buflen) { + char *new_buf; + + new_buf = realloc(buf, buflen + 80); + if (!new_buf) { + perror("realloc"); + exit(EXIT_FAILURE); + } + + buf = new_buf; + buflen += 80; + } + + do { + ret = recv(control_fd, &buf[idx], 1, 0); + timeout_check("recv"); + } while (ret < 0 && errno == EINTR); + + if (ret == 0) { + fprintf(stderr, "unexpected EOF on control socket\n"); + exit(EXIT_FAILURE); + } + + if (ret != 1) { + perror("recv"); + exit(EXIT_FAILURE); + } + + if (buf[idx] == '\n') { + buf[idx] = '\0'; + break; + } + + idx++; + } + + timeout_end(); + + return buf; +} + +/* Wait until a given line is received or a timeout occurs */ +void control_expectln(const char *str) +{ + char *line; + + line = control_readln(); + if (strcmp(str, line) != 0) { + fprintf(stderr, "expected \"%s\" on control socket, got \"%s\"\n", + str, line); + exit(EXIT_FAILURE); + } + + free(line); +} diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h new file mode 100644 index 000000000000..54a07efd267c --- /dev/null +++ b/tools/testing/vsock/control.h @@ -0,0 +1,13 @@ +#ifndef CONTROL_H +#define CONTROL_H + +#include <stdbool.h> + +void control_init(const char *control_host, const char *control_port, + bool server); +void control_cleanup(void); +void control_writeln(const char *str); +char *control_readln(void); +void control_expectln(const char *str); + +#endif /* CONTROL_H */ diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c new file mode 100644 index 000000000000..c49b3003b2db --- /dev/null +++ b/tools/testing/vsock/timeout.c @@ -0,0 +1,64 @@ +/* Timeout API for single-threaded programs that use blocking + * syscalls (read/write/send/recv/connect/accept). + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Stefan Hajnoczi <stefanha@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +/* Use the following pattern: + * + * timeout_begin(TIMEOUT); + * do { + * ret = accept(...); + * timeout_check("accept"); + * } while (ret < 0 && ret == EINTR); + * timeout_end(); + */ + +#include <stdlib.h> +#include <stdbool.h> +#include <unistd.h> +#include <stdio.h> +#include "timeout.h" + +static volatile bool timeout; + +/* SIGALRM handler function. Do not use sleep(2), alarm(2), or + * setitimer(2) while using this API - they may interfere with each + * other. + */ +void sigalrm(int signo) +{ + timeout = true; +} + +/* Start a timeout. Call timeout_check() to verify that the timeout hasn't + * expired. timeout_end() must be called to stop the timeout. Timeouts cannot + * be nested. + */ +void timeout_begin(unsigned int seconds) +{ + alarm(seconds); +} + +/* Exit with an error message if the timeout has expired */ +void timeout_check(const char *operation) +{ + if (timeout) { + fprintf(stderr, "%s timed out\n", operation); + exit(EXIT_FAILURE); + } +} + +/* Stop a timeout */ +void timeout_end(void) +{ + alarm(0); + timeout = false; +} diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h new file mode 100644 index 000000000000..77db9ce9860a --- /dev/null +++ b/tools/testing/vsock/timeout.h @@ -0,0 +1,14 @@ +#ifndef TIMEOUT_H +#define TIMEOUT_H + +enum { + /* Default timeout */ + TIMEOUT = 10 /* seconds */ +}; + +void sigalrm(int signo); +void timeout_begin(unsigned int seconds); +void timeout_check(const char *operation); +void timeout_end(void); + +#endif /* TIMEOUT_H */ diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c new file mode 100644 index 000000000000..e896a4af52f4 --- /dev/null +++ b/tools/testing/vsock/vsock_diag_test.c @@ -0,0 +1,681 @@ +/* + * vsock_diag_test - vsock_diag.ko test suite + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Stefan Hajnoczi <stefanha@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <getopt.h> +#include <stdio.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <signal.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <linux/list.h> +#include <linux/net.h> +#include <linux/netlink.h> +#include <linux/sock_diag.h> +#include <netinet/tcp.h> + +#include "../../../include/uapi/linux/vm_sockets.h" +#include "../../../include/uapi/linux/vm_sockets_diag.h" + +#include "timeout.h" +#include "control.h" + +enum test_mode { + TEST_MODE_UNSET, + TEST_MODE_CLIENT, + TEST_MODE_SERVER +}; + +/* Per-socket status */ +struct vsock_stat { + struct list_head list; + struct vsock_diag_msg msg; +}; + +static const char *sock_type_str(int type) +{ + switch (type) { + case SOCK_DGRAM: + return "DGRAM"; + case SOCK_STREAM: + return "STREAM"; + default: + return "INVALID TYPE"; + } +} + +static const char *sock_state_str(int state) +{ + switch (state) { + case TCP_CLOSE: + return "UNCONNECTED"; + case TCP_SYN_SENT: + return "CONNECTING"; + case TCP_ESTABLISHED: + return "CONNECTED"; + case TCP_CLOSING: + return "DISCONNECTING"; + case TCP_LISTEN: + return "LISTEN"; + default: + return "INVALID STATE"; + } +} + +static const char *sock_shutdown_str(int shutdown) +{ + switch (shutdown) { + case 1: + return "RCV_SHUTDOWN"; + case 2: + return "SEND_SHUTDOWN"; + case 3: + return "RCV_SHUTDOWN | SEND_SHUTDOWN"; + default: + return "0"; + } +} + +static void print_vsock_addr(FILE *fp, unsigned int cid, unsigned int port) +{ + if (cid == VMADDR_CID_ANY) + fprintf(fp, "*:"); + else + fprintf(fp, "%u:", cid); + + if (port == VMADDR_PORT_ANY) + fprintf(fp, "*"); + else + fprintf(fp, "%u", port); +} + +static void print_vsock_stat(FILE *fp, struct vsock_stat *st) +{ + print_vsock_addr(fp, st->msg.vdiag_src_cid, st->msg.vdiag_src_port); + fprintf(fp, " "); + print_vsock_addr(fp, st->msg.vdiag_dst_cid, st->msg.vdiag_dst_port); + fprintf(fp, " %s %s %s %u\n", + sock_type_str(st->msg.vdiag_type), + sock_state_str(st->msg.vdiag_state), + sock_shutdown_str(st->msg.vdiag_shutdown), + st->msg.vdiag_ino); +} + +static void print_vsock_stats(FILE *fp, struct list_head *head) +{ + struct vsock_stat *st; + + list_for_each_entry(st, head, list) + print_vsock_stat(fp, st); +} + +static struct vsock_stat *find_vsock_stat(struct list_head *head, int fd) +{ + struct vsock_stat *st; + struct stat stat; + + if (fstat(fd, &stat) < 0) { + perror("fstat"); + exit(EXIT_FAILURE); + } + + list_for_each_entry(st, head, list) + if (st->msg.vdiag_ino == stat.st_ino) + return st; + + fprintf(stderr, "cannot find fd %d\n", fd); + exit(EXIT_FAILURE); +} + +static void check_no_sockets(struct list_head *head) +{ + if (!list_empty(head)) { + fprintf(stderr, "expected no sockets\n"); + print_vsock_stats(stderr, head); + exit(1); + } +} + +static void check_num_sockets(struct list_head *head, int expected) +{ + struct list_head *node; + int n = 0; + + list_for_each(node, head) + n++; + + if (n != expected) { + fprintf(stderr, "expected %d sockets, found %d\n", + expected, n); + print_vsock_stats(stderr, head); + exit(EXIT_FAILURE); + } +} + +static void check_socket_state(struct vsock_stat *st, __u8 state) +{ + if (st->msg.vdiag_state != state) { + fprintf(stderr, "expected socket state %#x, got %#x\n", + state, st->msg.vdiag_state); + exit(EXIT_FAILURE); + } +} + +static void send_req(int fd) +{ + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK, + }; + struct { + struct nlmsghdr nlh; + struct vsock_diag_req vreq; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + }, + .vreq = { + .sdiag_family = AF_VSOCK, + .vdiag_states = ~(__u32)0, + }, + }; + struct iovec iov = { + .iov_base = &req, + .iov_len = sizeof(req), + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + for (;;) { + if (sendmsg(fd, &msg, 0) < 0) { + if (errno == EINTR) + continue; + + perror("sendmsg"); + exit(EXIT_FAILURE); + } + + return; + } +} + +static ssize_t recv_resp(int fd, void *buf, size_t len) +{ + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK, + }; + struct iovec iov = { + .iov_base = buf, + .iov_len = len, + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + ssize_t ret; + + do { + ret = recvmsg(fd, &msg, 0); + } while (ret < 0 && errno == EINTR); + + if (ret < 0) { + perror("recvmsg"); + exit(EXIT_FAILURE); + } + + return ret; +} + +static void add_vsock_stat(struct list_head *sockets, + const struct vsock_diag_msg *resp) +{ + struct vsock_stat *st; + + st = malloc(sizeof(*st)); + if (!st) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + st->msg = *resp; + list_add_tail(&st->list, sockets); +} + +/* + * Read vsock stats into a list. + */ +static void read_vsock_stat(struct list_head *sockets) +{ + long buf[8192 / sizeof(long)]; + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + send_req(fd); + + for (;;) { + const struct nlmsghdr *h; + ssize_t ret; + + ret = recv_resp(fd, buf, sizeof(buf)); + if (ret == 0) + goto done; + if (ret < sizeof(*h)) { + fprintf(stderr, "short read of %zd bytes\n", ret); + exit(EXIT_FAILURE); + } + + h = (struct nlmsghdr *)buf; + + while (NLMSG_OK(h, ret)) { + if (h->nlmsg_type == NLMSG_DONE) + goto done; + + if (h->nlmsg_type == NLMSG_ERROR) { + const struct nlmsgerr *err = NLMSG_DATA(h); + + if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*err))) + fprintf(stderr, "NLMSG_ERROR\n"); + else { + errno = -err->error; + perror("NLMSG_ERROR"); + } + + exit(EXIT_FAILURE); + } + + if (h->nlmsg_type != SOCK_DIAG_BY_FAMILY) { + fprintf(stderr, "unexpected nlmsg_type %#x\n", + h->nlmsg_type); + exit(EXIT_FAILURE); + } + if (h->nlmsg_len < + NLMSG_LENGTH(sizeof(struct vsock_diag_msg))) { + fprintf(stderr, "short vsock_diag_msg\n"); + exit(EXIT_FAILURE); + } + + add_vsock_stat(sockets, NLMSG_DATA(h)); + + h = NLMSG_NEXT(h, ret); + } + } + +done: + close(fd); +} + +static void free_sock_stat(struct list_head *sockets) +{ + struct vsock_stat *st; + struct vsock_stat *next; + + list_for_each_entry_safe(st, next, sockets, list) + free(st); +} + +static void test_no_sockets(unsigned int peer_cid) +{ + LIST_HEAD(sockets); + + read_vsock_stat(&sockets); + + check_no_sockets(&sockets); + + free_sock_stat(&sockets); +} + +static void test_listen_socket_server(unsigned int peer_cid) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = VMADDR_CID_ANY, + }, + }; + LIST_HEAD(sockets); + struct vsock_stat *st; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("bind"); + exit(EXIT_FAILURE); + } + + if (listen(fd, 1) < 0) { + perror("listen"); + exit(EXIT_FAILURE); + } + + read_vsock_stat(&sockets); + + check_num_sockets(&sockets, 1); + st = find_vsock_stat(&sockets, fd); + check_socket_state(st, TCP_LISTEN); + + close(fd); + free_sock_stat(&sockets); +} + +static void test_connect_client(unsigned int peer_cid) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = peer_cid, + }, + }; + int fd; + int ret; + LIST_HEAD(sockets); + struct vsock_stat *st; + + control_expectln("LISTENING"); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + timeout_begin(TIMEOUT); + do { + ret = connect(fd, &addr.sa, sizeof(addr.svm)); + timeout_check("connect"); + } while (ret < 0 && errno == EINTR); + timeout_end(); + + if (ret < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + read_vsock_stat(&sockets); + + check_num_sockets(&sockets, 1); + st = find_vsock_stat(&sockets, fd); + check_socket_state(st, TCP_ESTABLISHED); + + control_expectln("DONE"); + control_writeln("DONE"); + + close(fd); + free_sock_stat(&sockets); +} + +static void test_connect_server(unsigned int peer_cid) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = VMADDR_CID_ANY, + }, + }; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } clientaddr; + socklen_t clientaddr_len = sizeof(clientaddr.svm); + LIST_HEAD(sockets); + struct vsock_stat *st; + int fd; + int client_fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("bind"); + exit(EXIT_FAILURE); + } + + if (listen(fd, 1) < 0) { + perror("listen"); + exit(EXIT_FAILURE); + } + + control_writeln("LISTENING"); + + timeout_begin(TIMEOUT); + do { + client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); + timeout_check("accept"); + } while (client_fd < 0 && errno == EINTR); + timeout_end(); + + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + if (clientaddr.sa.sa_family != AF_VSOCK) { + fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n", + clientaddr.sa.sa_family); + exit(EXIT_FAILURE); + } + if (clientaddr.svm.svm_cid != peer_cid) { + fprintf(stderr, "expected peer CID %u from accept(2), got %u\n", + peer_cid, clientaddr.svm.svm_cid); + exit(EXIT_FAILURE); + } + + read_vsock_stat(&sockets); + + check_num_sockets(&sockets, 2); + find_vsock_stat(&sockets, fd); + st = find_vsock_stat(&sockets, client_fd); + check_socket_state(st, TCP_ESTABLISHED); + + control_writeln("DONE"); + control_expectln("DONE"); + + close(client_fd); + close(fd); + free_sock_stat(&sockets); +} + +static struct { + const char *name; + void (*run_client)(unsigned int peer_cid); + void (*run_server)(unsigned int peer_cid); +} test_cases[] = { + { + .name = "No sockets", + .run_server = test_no_sockets, + }, + { + .name = "Listen socket", + .run_server = test_listen_socket_server, + }, + { + .name = "Connect", + .run_client = test_connect_client, + .run_server = test_connect_server, + }, + {}, +}; + +static void init_signals(void) +{ + struct sigaction act = { + .sa_handler = sigalrm, + }; + + sigaction(SIGALRM, &act, NULL); + signal(SIGPIPE, SIG_IGN); +} + +static unsigned int parse_cid(const char *str) +{ + char *endptr = NULL; + unsigned long int n; + + errno = 0; + n = strtoul(str, &endptr, 10); + if (errno || *endptr != '\0') { + fprintf(stderr, "malformed CID \"%s\"\n", str); + exit(EXIT_FAILURE); + } + return n; +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "control-host", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "control-port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "mode", + .has_arg = required_argument, + .val = 'm', + }, + { + .name = "peer-cid", + .has_arg = required_argument, + .val = 'p', + }, + { + .name = "help", + .has_arg = no_argument, + .val = '?', + }, + {}, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n" + "\n" + " Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n" + " Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" + "\n" + "Run vsock_diag.ko tests. Must be launched in both\n" + "guest and host. One side must use --mode=client and\n" + "the other side must use --mode=server.\n" + "\n" + "A TCP control socket connection is used to coordinate tests\n" + "between the client and the server. The server requires a\n" + "listen address and the client requires an address to\n" + "connect to.\n" + "\n" + "The CID of the other side must be given with --peer-cid=<cid>.\n"); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + const char *control_host = NULL; + const char *control_port = NULL; + int mode = TEST_MODE_UNSET; + unsigned int peer_cid = VMADDR_CID_ANY; + int i; + + init_signals(); + + for (;;) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'H': + control_host = optarg; + break; + case 'm': + if (strcmp(optarg, "client") == 0) + mode = TEST_MODE_CLIENT; + else if (strcmp(optarg, "server") == 0) + mode = TEST_MODE_SERVER; + else { + fprintf(stderr, "--mode must be \"client\" or \"server\"\n"); + return EXIT_FAILURE; + } + break; + case 'p': + peer_cid = parse_cid(optarg); + break; + case 'P': + control_port = optarg; + break; + case '?': + default: + usage(); + } + } + + if (!control_port) + usage(); + if (mode == TEST_MODE_UNSET) + usage(); + if (peer_cid == VMADDR_CID_ANY) + usage(); + + if (!control_host) { + if (mode != TEST_MODE_SERVER) + usage(); + control_host = "0.0.0.0"; + } + + control_init(control_host, control_port, mode == TEST_MODE_SERVER); + + for (i = 0; test_cases[i].name; i++) { + void (*run)(unsigned int peer_cid); + + printf("%s...", test_cases[i].name); + fflush(stdout); + + if (mode == TEST_MODE_CLIENT) + run = test_cases[i].run_client; + else + run = test_cases[i].run_server; + + if (run) + run(peer_cid); + + printf("ok\n"); + } + + control_cleanup(); + return EXIT_SUCCESS; +} |