diff options
Diffstat (limited to 'tools')
79 files changed, 4931 insertions, 376 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index abc418650fec..683ca3af4084 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -41,7 +41,7 @@ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) #define MIDR_CPU_MODEL(imp, partnum) \ - (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) @@ -80,6 +80,7 @@ #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_A715 0xD4D #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B @@ -123,6 +124,8 @@ #define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 #define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 #define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 +#define APPLE_CPU_PART_M2_BLIZZARD 0x032 +#define APPLE_CPU_PART_M2_AVALANCHE 0x033 #define AMPERE_CPU_PART_AMPERE1 0xAC3 @@ -142,6 +145,7 @@ #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) @@ -175,6 +179,8 @@ #define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) +#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD) +#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 316917b98707..a7a857f1784d 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -43,6 +43,7 @@ #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 649e50a8f9dd..e48deab8901d 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -206,6 +206,8 @@ struct kvm_msr_list { struct kvm_msr_filter_range { #define KVM_MSR_FILTER_READ (1 << 0) #define KVM_MSR_FILTER_WRITE (1 << 1) +#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \ + KVM_MSR_FILTER_WRITE) __u32 flags; __u32 nmsrs; /* number of msrs in bitmap */ __u32 base; /* MSR index the bitmap starts at */ @@ -214,8 +216,11 @@ struct kvm_msr_filter_range { #define KVM_MSR_FILTER_MAX_RANGES 16 struct kvm_msr_filter { +#ifndef __KERNEL__ #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#endif #define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) +#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY) __u32 flags; struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; }; diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index cc7070c7439b..b4898ff085de 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -79,4 +79,13 @@ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert + +/* + * Compile time check that field has an expected offset + */ +#define ASSERT_STRUCT_OFFSET(type, field, expected_offset) \ + BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset), \ + "Offset of " #field " in " #type " has changed.") + + #endif /* _LINUX_BUILD_BUG_H */ diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 5fc5b8029bff..7380093ba9e7 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -192,6 +192,7 @@ struct sys_stat_struct { __asm__ (".section .text\n" ".weak __start\n" ".set nomips16\n" + ".set push\n" ".set noreorder\n" ".option pic0\n" ".ent __start\n" @@ -210,6 +211,7 @@ __asm__ (".section .text\n" "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" + ".set pop\n" ""); #endif // _NOLIBC_ARCH_MIPS_H diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index ba04771cb3a3..a3bdd9803f8c 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -11,13 +11,13 @@ #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 -#define O_CREAT 0x100 -#define O_EXCL 0x200 -#define O_NOCTTY 0x400 -#define O_TRUNC 0x1000 -#define O_APPEND 0x2000 -#define O_NONBLOCK 0x4000 -#define O_DIRECTORY 0x200000 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 struct sys_stat_struct { unsigned long st_dev; /* Device. */ diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h index e3000b2992d7..6f90706d0644 100644 --- a/tools/include/nolibc/ctype.h +++ b/tools/include/nolibc/ctype.h @@ -96,4 +96,7 @@ int ispunct(int c) return isgraph(c) && !isalnum(c); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h index 06893d6dfb7a..9dc4919c769b 100644 --- a/tools/include/nolibc/errno.h +++ b/tools/include/nolibc/errno.h @@ -24,4 +24,7 @@ static int errno; */ #define MAX_ERRNO 4095 +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h index ef47e71e2be3..137552216e46 100644 --- a/tools/include/nolibc/signal.h +++ b/tools/include/nolibc/signal.h @@ -19,4 +19,7 @@ int raise(int signal) return sys_kill(sys_getpid(), signal); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index a3cebc4bc3ac..96ac8afc5aee 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -303,4 +303,7 @@ void perror(const char *msg) fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 92378c4b9660..a24000d1e822 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -419,4 +419,7 @@ char *u64toa(uint64_t in) return itoa_buffer; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index ad97c0d522b8..fffdaf6ff467 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -88,8 +88,11 @@ void *memset(void *dst, int b, size_t len) { char *p = dst; - while (len--) + while (len--) { + /* prevent gcc from recognizing memset() here */ + asm volatile(""); *(p++) = b; + } return dst; } @@ -285,4 +288,7 @@ char *strrchr(const char *s, int c) return (char *)ret; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index ce3ee03aa679..78473d34e27c 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -1243,5 +1243,7 @@ ssize_t write(int fd, const void *buf, size_t count) return ret; } +/* make sure to include all global symbols */ +#include "nolibc.h" #endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index d18b7661fdd7..84655361b9ad 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -25,4 +25,7 @@ time_t time(time_t *tptr) return tv.tv_sec; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 959997034e55..fbbc0e68c001 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -26,13 +26,13 @@ #define S_IFSOCK 0140000 #define S_IFMT 0170000 -#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) -#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) -#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) -#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) -#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) -#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) -#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) +#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +#define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR) +#define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) +#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO) +#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) +#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) /* dirent types */ #define DT_UNKNOWN 0x0 @@ -89,39 +89,46 @@ #define EXIT_SUCCESS 0 #define EXIT_FAILURE 1 +#define FD_SETIDXMASK (8 * sizeof(unsigned long)) +#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) + /* for select() */ typedef struct { - uint32_t fd32[(FD_SETSIZE + 31) / 32]; + unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; } fd_set; -#define FD_CLR(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \ +#define FD_CLR(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] &= \ + ~(1U << (__fd & FX_SETBITMASK)); \ } while (0) -#define FD_SET(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fd32[__fd / 32] |= 1U << (__fd & 31); \ +#define FD_SET(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] |= \ + 1 << (__fd & FD_SETBITMASK); \ } while (0) -#define FD_ISSET(fd, set) ({ \ - fd_set *__set = (set); \ - int __fd = (fd); \ - int __r = 0; \ - if (__fd >= 0) \ - __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \ - __r; \ +#define FD_ISSET(fd, set) ({ \ + fd_set *__set = (set); \ + int __fd = (fd); \ + int __r = 0; \ + if (__fd >= 0) \ + __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ +1U << (__fd & FD_SET_BITMASK)); \ + __r; \ }) -#define FD_ZERO(set) do { \ - fd_set *__set = (set); \ - int __idx; \ - for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \ - __set->fd32[__idx] = 0; \ +#define FD_ZERO(set) do { \ + fd_set *__set = (set); \ + int __idx; \ + int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ + for (__idx = 0; __idx < __size; __idx++) \ + __set->fds[__idx] = 0; \ } while (0) /* for poll() */ @@ -202,4 +209,7 @@ struct stat { }) #endif +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index 1c25e20ee360..1cfcd52106a4 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -51,4 +51,7 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_UNISTD_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 20522d4ba1e0..55155e262646 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1767,6 +1767,7 @@ struct kvm_xen_hvm_attr { __u8 runstate_update_flag; struct { __u64 gfn; +#define KVM_XEN_INVALID_GFN ((__u64)-1) } shared_info; struct { __u32 send_port; @@ -1798,6 +1799,7 @@ struct kvm_xen_hvm_attr { } u; }; + /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 #define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 @@ -1823,6 +1825,7 @@ struct kvm_xen_vcpu_attr { __u16 pad[3]; union { __u64 gpa; +#define KVM_XEN_INVALID_GPA ((__u64)-1) __u64 pad[8]; struct { __u64 state; diff --git a/tools/net/ynl/samples/cli.py b/tools/net/ynl/samples/cli.py new file mode 100755 index 000000000000..b27159c70710 --- /dev/null +++ b/tools/net/ynl/samples/cli.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: BSD-3-Clause + +import argparse +import json +import pprint +import time + +from ynl import YnlFamily + + +def main(): + parser = argparse.ArgumentParser(description='YNL CLI sample') + parser.add_argument('--spec', dest='spec', type=str, required=True) + parser.add_argument('--schema', dest='schema', type=str) + parser.add_argument('--json', dest='json_text', type=str) + parser.add_argument('--do', dest='do', type=str) + parser.add_argument('--dump', dest='dump', type=str) + parser.add_argument('--sleep', dest='sleep', type=int) + parser.add_argument('--subscribe', dest='ntf', type=str) + args = parser.parse_args() + + attrs = {} + if args.json_text: + attrs = json.loads(args.json_text) + + ynl = YnlFamily(args.spec, args.schema) + + if args.ntf: + ynl.ntf_subscribe(args.ntf) + + if args.sleep: + time.sleep(args.sleep) + + if args.do or args.dump: + method = getattr(ynl, args.do if args.do else args.dump) + + reply = method(attrs, dump=bool(args.dump)) + pprint.PrettyPrinter().pprint(reply) + + if args.ntf: + ynl.check_ntf() + pprint.PrettyPrinter().pprint(ynl.async_msg_queue) + + +if __name__ == "__main__": + main() diff --git a/tools/net/ynl/samples/ynl.py b/tools/net/ynl/samples/ynl.py new file mode 100644 index 000000000000..b71523d71d46 --- /dev/null +++ b/tools/net/ynl/samples/ynl.py @@ -0,0 +1,534 @@ +# SPDX-License-Identifier: BSD-3-Clause + +import functools +import jsonschema +import os +import random +import socket +import struct +import yaml + +# +# Generic Netlink code which should really be in some library, but I can't quickly find one. +# + + +class Netlink: + # Netlink socket + SOL_NETLINK = 270 + + NETLINK_ADD_MEMBERSHIP = 1 + NETLINK_CAP_ACK = 10 + NETLINK_EXT_ACK = 11 + + # Netlink message + NLMSG_ERROR = 2 + NLMSG_DONE = 3 + + NLM_F_REQUEST = 1 + NLM_F_ACK = 4 + NLM_F_ROOT = 0x100 + NLM_F_MATCH = 0x200 + NLM_F_APPEND = 0x800 + + NLM_F_CAPPED = 0x100 + NLM_F_ACK_TLVS = 0x200 + + NLM_F_DUMP = NLM_F_ROOT | NLM_F_MATCH + + NLA_F_NESTED = 0x8000 + NLA_F_NET_BYTEORDER = 0x4000 + + NLA_TYPE_MASK = NLA_F_NESTED | NLA_F_NET_BYTEORDER + + # Genetlink defines + NETLINK_GENERIC = 16 + + GENL_ID_CTRL = 0x10 + + # nlctrl + CTRL_CMD_GETFAMILY = 3 + + CTRL_ATTR_FAMILY_ID = 1 + CTRL_ATTR_FAMILY_NAME = 2 + CTRL_ATTR_MAXATTR = 5 + CTRL_ATTR_MCAST_GROUPS = 7 + + CTRL_ATTR_MCAST_GRP_NAME = 1 + CTRL_ATTR_MCAST_GRP_ID = 2 + + # Extack types + NLMSGERR_ATTR_MSG = 1 + NLMSGERR_ATTR_OFFS = 2 + NLMSGERR_ATTR_COOKIE = 3 + NLMSGERR_ATTR_POLICY = 4 + NLMSGERR_ATTR_MISS_TYPE = 5 + NLMSGERR_ATTR_MISS_NEST = 6 + + +class NlAttr: + def __init__(self, raw, offset): + self._len, self._type = struct.unpack("HH", raw[offset:offset + 4]) + self.type = self._type & ~Netlink.NLA_TYPE_MASK + self.payload_len = self._len + self.full_len = (self.payload_len + 3) & ~3 + self.raw = raw[offset + 4:offset + self.payload_len] + + def as_u16(self): + return struct.unpack("H", self.raw)[0] + + def as_u32(self): + return struct.unpack("I", self.raw)[0] + + def as_u64(self): + return struct.unpack("Q", self.raw)[0] + + def as_strz(self): + return self.raw.decode('ascii')[:-1] + + def as_bin(self): + return self.raw + + def __repr__(self): + return f"[type:{self.type} len:{self._len}] {self.raw}" + + +class NlAttrs: + def __init__(self, msg): + self.attrs = [] + + offset = 0 + while offset < len(msg): + attr = NlAttr(msg, offset) + offset += attr.full_len + self.attrs.append(attr) + + def __iter__(self): + yield from self.attrs + + def __repr__(self): + msg = '' + for a in self.attrs: + if msg: + msg += '\n' + msg += repr(a) + return msg + + +class NlMsg: + def __init__(self, msg, offset, attr_space=None): + self.hdr = msg[offset:offset + 16] + + self.nl_len, self.nl_type, self.nl_flags, self.nl_seq, self.nl_portid = \ + struct.unpack("IHHII", self.hdr) + + self.raw = msg[offset + 16:offset + self.nl_len] + + self.error = 0 + self.done = 0 + + extack_off = None + if self.nl_type == Netlink.NLMSG_ERROR: + self.error = struct.unpack("i", self.raw[0:4])[0] + self.done = 1 + extack_off = 20 + elif self.nl_type == Netlink.NLMSG_DONE: + self.done = 1 + extack_off = 4 + + self.extack = None + if self.nl_flags & Netlink.NLM_F_ACK_TLVS and extack_off: + self.extack = dict() + extack_attrs = NlAttrs(self.raw[extack_off:]) + for extack in extack_attrs: + if extack.type == Netlink.NLMSGERR_ATTR_MSG: + self.extack['msg'] = extack.as_strz() + elif extack.type == Netlink.NLMSGERR_ATTR_MISS_TYPE: + self.extack['miss-type'] = extack.as_u32() + elif extack.type == Netlink.NLMSGERR_ATTR_MISS_NEST: + self.extack['miss-nest'] = extack.as_u32() + elif extack.type == Netlink.NLMSGERR_ATTR_OFFS: + self.extack['bad-attr-offs'] = extack.as_u32() + else: + if 'unknown' not in self.extack: + self.extack['unknown'] = [] + self.extack['unknown'].append(extack) + + if attr_space: + # We don't have the ability to parse nests yet, so only do global + if 'miss-type' in self.extack and 'miss-nest' not in self.extack: + miss_type = self.extack['miss-type'] + if len(attr_space.attr_list) > miss_type: + spec = attr_space.attr_list[miss_type] + desc = spec['name'] + if 'doc' in spec: + desc += f" ({spec['doc']})" + self.extack['miss-type'] = desc + + def __repr__(self): + msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}\n" + if self.error: + msg += '\terror: ' + str(self.error) + if self.extack: + msg += '\textack: ' + repr(self.extack) + return msg + + +class NlMsgs: + def __init__(self, data, attr_space=None): + self.msgs = [] + + offset = 0 + while offset < len(data): + msg = NlMsg(data, offset, attr_space=attr_space) + offset += msg.nl_len + self.msgs.append(msg) + + def __iter__(self): + yield from self.msgs + + +genl_family_name_to_id = None + + +def _genl_msg(nl_type, nl_flags, genl_cmd, genl_version, seq=None): + # we prepend length in _genl_msg_finalize() + if seq is None: + seq = random.randint(1, 1024) + nlmsg = struct.pack("HHII", nl_type, nl_flags, seq, 0) + genlmsg = struct.pack("bbH", genl_cmd, genl_version, 0) + return nlmsg + genlmsg + + +def _genl_msg_finalize(msg): + return struct.pack("I", len(msg) + 4) + msg + + +def _genl_load_families(): + with socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, Netlink.NETLINK_GENERIC) as sock: + sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1) + + msg = _genl_msg(Netlink.GENL_ID_CTRL, + Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK | Netlink.NLM_F_DUMP, + Netlink.CTRL_CMD_GETFAMILY, 1) + msg = _genl_msg_finalize(msg) + + sock.send(msg, 0) + + global genl_family_name_to_id + genl_family_name_to_id = dict() + + while True: + reply = sock.recv(128 * 1024) + nms = NlMsgs(reply) + for nl_msg in nms: + if nl_msg.error: + print("Netlink error:", nl_msg.error) + return + if nl_msg.done: + return + + gm = GenlMsg(nl_msg) + fam = dict() + for attr in gm.raw_attrs: + if attr.type == Netlink.CTRL_ATTR_FAMILY_ID: + fam['id'] = attr.as_u16() + elif attr.type == Netlink.CTRL_ATTR_FAMILY_NAME: + fam['name'] = attr.as_strz() + elif attr.type == Netlink.CTRL_ATTR_MAXATTR: + fam['maxattr'] = attr.as_u32() + elif attr.type == Netlink.CTRL_ATTR_MCAST_GROUPS: + fam['mcast'] = dict() + for entry in NlAttrs(attr.raw): + mcast_name = None + mcast_id = None + for entry_attr in NlAttrs(entry.raw): + if entry_attr.type == Netlink.CTRL_ATTR_MCAST_GRP_NAME: + mcast_name = entry_attr.as_strz() + elif entry_attr.type == Netlink.CTRL_ATTR_MCAST_GRP_ID: + mcast_id = entry_attr.as_u32() + if mcast_name and mcast_id is not None: + fam['mcast'][mcast_name] = mcast_id + if 'name' in fam and 'id' in fam: + genl_family_name_to_id[fam['name']] = fam + + +class GenlMsg: + def __init__(self, nl_msg): + self.nl = nl_msg + + self.hdr = nl_msg.raw[0:4] + self.raw = nl_msg.raw[4:] + + self.genl_cmd, self.genl_version, _ = struct.unpack("bbH", self.hdr) + + self.raw_attrs = NlAttrs(self.raw) + + def __repr__(self): + msg = repr(self.nl) + msg += f"\tgenl_cmd = {self.genl_cmd} genl_ver = {self.genl_version}\n" + for a in self.raw_attrs: + msg += '\t\t' + repr(a) + '\n' + return msg + + +class GenlFamily: + def __init__(self, family_name): + self.family_name = family_name + + global genl_family_name_to_id + if genl_family_name_to_id is None: + _genl_load_families() + + self.genl_family = genl_family_name_to_id[family_name] + self.family_id = genl_family_name_to_id[family_name]['id'] + + +# +# YNL implementation details. +# + + +class YnlAttrSpace: + def __init__(self, family, yaml): + self.yaml = yaml + + self.attrs = dict() + self.name = self.yaml['name'] + self.subspace_of = self.yaml['subset-of'] if 'subspace-of' in self.yaml else None + + val = 0 + max_val = 0 + for elem in self.yaml['attributes']: + if 'value' in elem: + val = elem['value'] + else: + elem['value'] = val + if val > max_val: + max_val = val + val += 1 + + self.attrs[elem['name']] = elem + + self.attr_list = [None] * (max_val + 1) + for elem in self.yaml['attributes']: + self.attr_list[elem['value']] = elem + + def __getitem__(self, key): + return self.attrs[key] + + def __contains__(self, key): + return key in self.yaml + + def __iter__(self): + yield from self.attrs + + def items(self): + return self.attrs.items() + + +class YnlFamily: + def __init__(self, def_path, schema=None): + self.include_raw = False + + with open(def_path, "r") as stream: + self.yaml = yaml.safe_load(stream) + + if schema: + with open(schema, "r") as stream: + schema = yaml.safe_load(stream) + + jsonschema.validate(self.yaml, schema) + + self.sock = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, Netlink.NETLINK_GENERIC) + self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1) + self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_EXT_ACK, 1) + + self._ops = dict() + self._spaces = dict() + self._types = dict() + + for elem in self.yaml['attribute-sets']: + self._spaces[elem['name']] = YnlAttrSpace(self, elem) + + for elem in self.yaml['definitions']: + self._types[elem['name']] = elem + + async_separation = 'async-prefix' in self.yaml['operations'] + self.async_msg_ids = set() + self.async_msg_queue = [] + val = 0 + max_val = 0 + for elem in self.yaml['operations']['list']: + if not (async_separation and ('notify' in elem or 'event' in elem)): + if 'value' in elem: + val = elem['value'] + else: + elem['value'] = val + val += 1 + max_val = max(val, max_val) + + if 'notify' in elem or 'event' in elem: + self.async_msg_ids.add(elem['value']) + + self._ops[elem['name']] = elem + + op_name = elem['name'].replace('-', '_') + + bound_f = functools.partial(self._op, elem['name']) + setattr(self, op_name, bound_f) + + self._op_array = [None] * max_val + for _, op in self._ops.items(): + self._op_array[op['value']] = op + if 'notify' in op: + op['attribute-set'] = self._ops[op['notify']]['attribute-set'] + + self.family = GenlFamily(self.yaml['name']) + + def ntf_subscribe(self, mcast_name): + if mcast_name not in self.family.genl_family['mcast']: + raise Exception(f'Multicast group "{mcast_name}" not present in the family') + + self.sock.bind((0, 0)) + self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP, + self.family.genl_family['mcast'][mcast_name]) + + def _add_attr(self, space, name, value): + attr = self._spaces[space][name] + nl_type = attr['value'] + if attr["type"] == 'nest': + nl_type |= Netlink.NLA_F_NESTED + attr_payload = b'' + for subname, subvalue in value.items(): + attr_payload += self._add_attr(attr['nested-attributes'], subname, subvalue) + elif attr["type"] == 'u32': + attr_payload = struct.pack("I", int(value)) + elif attr["type"] == 'string': + attr_payload = str(value).encode('ascii') + b'\x00' + elif attr["type"] == 'binary': + attr_payload = value + else: + raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}') + + pad = b'\x00' * ((4 - len(attr_payload) % 4) % 4) + return struct.pack('HH', len(attr_payload) + 4, nl_type) + attr_payload + pad + + def _decode_enum(self, rsp, attr_spec): + raw = rsp[attr_spec['name']] + enum = self._types[attr_spec['enum']] + i = attr_spec.get('value-start', 0) + if 'enum-as-flags' in attr_spec and attr_spec['enum-as-flags']: + value = set() + while raw: + if raw & 1: + value.add(enum['entries'][i]) + raw >>= 1 + i += 1 + else: + value = enum['entries'][raw - i] + rsp[attr_spec['name']] = value + + def _decode(self, attrs, space): + attr_space = self._spaces[space] + rsp = dict() + for attr in attrs: + attr_spec = attr_space.attr_list[attr.type] + if attr_spec["type"] == 'nest': + subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes']) + rsp[attr_spec['name']] = subdict + elif attr_spec['type'] == 'u32': + rsp[attr_spec['name']] = attr.as_u32() + elif attr_spec['type'] == 'u64': + rsp[attr_spec['name']] = attr.as_u64() + elif attr_spec["type"] == 'string': + rsp[attr_spec['name']] = attr.as_strz() + elif attr_spec["type"] == 'binary': + rsp[attr_spec['name']] = attr.as_bin() + else: + raise Exception(f'Unknown {attr.type} {attr_spec["name"]} {attr_spec["type"]}') + + if 'enum' in attr_spec: + self._decode_enum(rsp, attr_spec) + return rsp + + def handle_ntf(self, nl_msg, genl_msg): + msg = dict() + if self.include_raw: + msg['nlmsg'] = nl_msg + msg['genlmsg'] = genl_msg + op = self._op_array[genl_msg.genl_cmd] + msg['name'] = op['name'] + msg['msg'] = self._decode(genl_msg.raw_attrs, op['attribute-set']) + self.async_msg_queue.append(msg) + + def check_ntf(self): + while True: + try: + reply = self.sock.recv(128 * 1024, socket.MSG_DONTWAIT) + except BlockingIOError: + return + + nms = NlMsgs(reply) + for nl_msg in nms: + if nl_msg.error: + print("Netlink error in ntf!?", os.strerror(-nl_msg.error)) + print(nl_msg) + continue + if nl_msg.done: + print("Netlink done while checking for ntf!?") + continue + + gm = GenlMsg(nl_msg) + if gm.genl_cmd not in self.async_msg_ids: + print("Unexpected msg id done while checking for ntf", gm) + continue + + self.handle_ntf(nl_msg, gm) + + def _op(self, method, vals, dump=False): + op = self._ops[method] + + nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK + if dump: + nl_flags |= Netlink.NLM_F_DUMP + + req_seq = random.randint(1024, 65535) + msg = _genl_msg(self.family.family_id, nl_flags, op['value'], 1, req_seq) + for name, value in vals.items(): + msg += self._add_attr(op['attribute-set'], name, value) + msg = _genl_msg_finalize(msg) + + self.sock.send(msg, 0) + + done = False + rsp = [] + while not done: + reply = self.sock.recv(128 * 1024) + nms = NlMsgs(reply, attr_space=self._spaces[op['attribute-set']]) + for nl_msg in nms: + if nl_msg.error: + print("Netlink error:", os.strerror(-nl_msg.error)) + print(nl_msg) + return + if nl_msg.done: + done = True + break + + gm = GenlMsg(nl_msg) + # Check if this is a reply to our request + if nl_msg.nl_seq != req_seq or gm.genl_cmd != op['value']: + if gm.genl_cmd in self.async_msg_ids: + self.handle_ntf(nl_msg, gm) + continue + else: + print('Unexpected message: ' + repr(gm)) + continue + + rsp.append(self._decode(gm.raw_attrs, op['attribute-set'])) + + if not rsp: + return None + if not dump and len(rsp) == 1: + return rsp[0] + return rsp diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py new file mode 100755 index 000000000000..1aa872e582ab --- /dev/null +++ b/tools/net/ynl/ynl-gen-c.py @@ -0,0 +1,2379 @@ +#!/usr/bin/env python + +import argparse +import collections +import jsonschema +import os +import yaml + + +def c_upper(name): + return name.upper().replace('-', '_') + + +def c_lower(name): + return name.lower().replace('-', '_') + + +class BaseNlLib: + def get_family_id(self): + return 'ys->family_id' + + def parse_cb_run(self, cb, data, is_dump=False, indent=1): + ind = '\n\t\t' + '\t' * indent + ' ' + if is_dump: + return f"mnl_cb_run2(ys->rx_buf, len, 0, 0, {cb}, {data},{ind}ynl_cb_array, NLMSG_MIN_TYPE)" + else: + return f"mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,{ind}{cb}, {data},{ind}" + \ + "ynl_cb_array, NLMSG_MIN_TYPE)" + + +class Type: + def __init__(self, family, attr_set, attr): + self.family = family + self.attr = attr + self.value = attr['value'] + self.name = c_lower(attr['name']) + self.type = attr['type'] + self.checks = attr.get('checks', {}) + + if 'len' in attr: + self.len = attr['len'] + if 'nested-attributes' in attr: + self.nested_attrs = attr['nested-attributes'] + if self.nested_attrs == family.name: + self.nested_render_name = f"{family.name}" + else: + self.nested_render_name = f"{family.name}_{c_lower(self.nested_attrs)}" + + self.enum_name = f"{attr_set.name_prefix}{self.name}" + self.enum_name = c_upper(self.enum_name) + self.c_name = c_lower(self.name) + if self.c_name in _C_KW: + self.c_name += '_' + + def __getitem__(self, key): + return self.attr[key] + + def __contains__(self, key): + return key in self.attr + + def is_multi_val(self): + return None + + def is_scalar(self): + return self.type in {'u8', 'u16', 'u32', 'u64', 's32', 's64'} + + def presence_type(self): + return 'bit' + + def presence_member(self, space, type_filter): + if self.presence_type() != type_filter: + return + + if self.presence_type() == 'bit': + pfx = '__' if space == 'user' else '' + return f"{pfx}u32 {self.c_name}:1;" + + if self.presence_type() == 'len': + pfx = '__' if space == 'user' else '' + return f"{pfx}u32 {self.c_name}_len;" + + def _complex_member_type(self, ri): + return None + + def free_needs_iter(self): + return False + + def free(self, ri, var, ref): + if self.is_multi_val() or self.presence_type() == 'len': + ri.cw.p(f'free({var}->{ref}{self.c_name});') + + def arg_member(self, ri): + member = self._complex_member_type(ri) + if member: + return [member + ' *' + self.c_name] + raise Exception(f"Struct member not implemented for class type {self.type}") + + def struct_member(self, ri): + if self.is_multi_val(): + ri.cw.p(f"unsigned int n_{self.c_name};") + member = self._complex_member_type(ri) + if member: + ptr = '*' if self.is_multi_val() else '' + ri.cw.p(f"{member} {ptr}{self.c_name};") + return + members = self.arg_member(ri) + for one in members: + ri.cw.p(one + ';') + + def _attr_policy(self, policy): + return '{ .type = ' + policy + ', }' + + def attr_policy(self, cw): + policy = c_upper('nla-' + self.attr['type']) + + spec = self._attr_policy(policy) + cw.p(f"\t[{self.enum_name}] = {spec},") + + def _attr_typol(self): + raise Exception(f"Type policy not implemented for class type {self.type}") + + def attr_typol(self, cw): + typol = self._attr_typol() + cw.p(f'[{self.enum_name}] = {"{"} .name = "{self.name}", {typol}{"}"},') + + def _attr_put_line(self, ri, var, line): + if self.presence_type() == 'bit': + ri.cw.p(f"if ({var}->_present.{self.c_name})") + elif self.presence_type() == 'len': + ri.cw.p(f"if ({var}->_present.{self.c_name}_len)") + ri.cw.p(f"{line};") + + def _attr_put_simple(self, ri, var, put_type): + line = f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name})" + self._attr_put_line(ri, var, line) + + def attr_put(self, ri, var): + raise Exception(f"Put not implemented for class type {self.type}") + + def _attr_get(self, ri, var): + raise Exception(f"Attr get not implemented for class type {self.type}") + + def attr_get(self, ri, var, first): + lines, init_lines, local_vars = self._attr_get(ri, var) + if type(lines) is str: + lines = [lines] + if type(init_lines) is str: + init_lines = [init_lines] + + kw = 'if' if first else 'else if' + ri.cw.block_start(line=f"{kw} (mnl_attr_get_type(attr) == {self.enum_name})") + if local_vars: + for local in local_vars: + ri.cw.p(local) + ri.cw.nl() + + if not self.is_multi_val(): + ri.cw.p("if (ynl_attr_validate(yarg, attr))") + ri.cw.p("return MNL_CB_ERROR;") + if self.presence_type() == 'bit': + ri.cw.p(f"{var}->_present.{self.c_name} = 1;") + + if init_lines: + ri.cw.nl() + for line in init_lines: + ri.cw.p(line) + + for line in lines: + ri.cw.p(line) + ri.cw.block_end() + + def _setter_lines(self, ri, member, presence): + raise Exception(f"Setter not implemented for class type {self.type}") + + def setter(self, ri, space, direction, deref=False, ref=None): + ref = (ref if ref else []) + [self.c_name] + var = "req" + member = f"{var}->{'.'.join(ref)}" + + code = [] + presence = '' + for i in range(0, len(ref)): + presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}" + if self.presence_type() == 'bit': + code.append(presence + ' = 1;') + code += self._setter_lines(ri, member, presence) + + ri.cw.write_func('static inline void', + f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}", + body=code, + args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri)) + + +class TypeUnused(Type): + def presence_type(self): + return '' + + def _attr_typol(self): + return '.type = YNL_PT_REJECT, ' + + def attr_policy(self, cw): + pass + + +class TypePad(Type): + def presence_type(self): + return '' + + def _attr_typol(self): + return '.type = YNL_PT_REJECT, ' + + def attr_policy(self, cw): + pass + + +class TypeScalar(Type): + def __init__(self, family, attr_set, attr): + super().__init__(family, attr_set, attr) + + self.is_bitfield = False + if 'enum' in self.attr: + self.is_bitfield = family.consts[self.attr['enum']]['type'] == 'flags' + if 'enum-as-flags' in self.attr and self.attr['enum-as-flags']: + self.is_bitfield = True + + if 'enum' in self.attr and not self.is_bitfield: + self.type_name = f"enum {family.name}_{c_lower(self.attr['enum'])}" + else: + self.type_name = '__' + self.type + + self.byte_order_comment = '' + if 'byte-order' in attr: + self.byte_order_comment = f" /* {attr['byte-order']} */" + + def _mnl_type(self): + t = self.type + # mnl does not have a helper for signed types + if t[0] == 's': + t = 'u' + t[1:] + return t + + def _attr_policy(self, policy): + if 'flags-mask' in self.checks or self.is_bitfield: + if self.is_bitfield: + mask = self.family.consts[self.attr['enum']].get_mask() + else: + flags = self.family.consts[self.checks['flags-mask']] + flag_cnt = len(flags['entries']) + mask = (1 << flag_cnt) - 1 + return f"NLA_POLICY_MASK({policy}, 0x{mask:x})" + elif 'min' in self.checks: + return f"NLA_POLICY_MIN({policy}, {self.checks['min']})" + elif 'enum' in self.attr: + enum = self.family.consts[self.attr['enum']] + cnt = len(enum['entries']) + return f"NLA_POLICY_MAX({policy}, {cnt - 1})" + return super()._attr_policy(policy) + + def _attr_typol(self): + return f'.type = YNL_PT_U{self.type[1:]}, ' + + def arg_member(self, ri): + return [f'{self.type_name} {self.c_name}{self.byte_order_comment}'] + + def attr_put(self, ri, var): + self._attr_put_simple(ri, var, self._mnl_type()) + + def _attr_get(self, ri, var): + return f"{var}->{self.c_name} = mnl_attr_get_{self._mnl_type()}(attr);", None, None + + def _setter_lines(self, ri, member, presence): + return [f"{member} = {self.c_name};"] + + +class TypeFlag(Type): + def arg_member(self, ri): + return [] + + def _attr_typol(self): + return '.type = YNL_PT_FLAG, ' + + def attr_put(self, ri, var): + self._attr_put_line(ri, var, f"mnl_attr_put(nlh, {self.enum_name}, 0, NULL)") + + def _attr_get(self, ri, var): + return [], None, None + + def _setter_lines(self, ri, member, presence): + return [] + + +class TypeString(Type): + def arg_member(self, ri): + return [f"const char *{self.c_name}"] + + def presence_type(self): + return 'len' + + def struct_member(self, ri): + ri.cw.p(f"char *{self.c_name};") + + def _attr_typol(self): + return f'.type = YNL_PT_NUL_STR, ' + + def _attr_policy(self, policy): + mem = '{ .type = ' + policy + if 'max-len' in self.checks: + mem += ', .len = ' + str(self.checks['max-len']) + mem += ', }' + return mem + + def attr_policy(self, cw): + if self.checks.get('unterminated-ok', False): + policy = 'NLA_STRING' + else: + policy = 'NLA_NUL_STRING' + + spec = self._attr_policy(policy) + cw.p(f"\t[{self.enum_name}] = {spec},") + + def attr_put(self, ri, var): + self._attr_put_simple(ri, var, 'strz') + + def _attr_get(self, ri, var): + len_mem = var + '->_present.' + self.c_name + '_len' + return [f"{len_mem} = len;", + f"{var}->{self.c_name} = malloc(len + 1);", + f"memcpy({var}->{self.c_name}, mnl_attr_get_str(attr), len);", + f"{var}->{self.c_name}[len] = 0;"], \ + ['len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));'], \ + ['unsigned int len;'] + + def _setter_lines(self, ri, member, presence): + return [f"free({member});", + f"{presence}_len = strlen({self.c_name});", + f"{member} = malloc({presence}_len + 1);", + f'memcpy({member}, {self.c_name}, {presence}_len);', + f'{member}[{presence}_len] = 0;'] + + +class TypeBinary(Type): + def arg_member(self, ri): + return [f"const void *{self.c_name}", 'size_t len'] + + def presence_type(self): + return 'len' + + def struct_member(self, ri): + ri.cw.p(f"void *{self.c_name};") + + def _attr_typol(self): + return f'.type = YNL_PT_BINARY,' + + def _attr_policy(self, policy): + mem = '{ ' + if len(self.checks) == 1 and 'min-len' in self.checks: + mem += '.len = ' + str(self.checks['min-len']) + elif len(self.checks) == 0: + mem += '.type = NLA_BINARY' + else: + raise Exception('One or more of binary type checks not implemented, yet') + mem += ', }' + return mem + + def attr_put(self, ri, var): + self._attr_put_line(ri, var, f"mnl_attr_put(nlh, {self.enum_name}, " + + f"{var}->_present.{self.c_name}_len, {var}->{self.c_name})") + + def _attr_get(self, ri, var): + len_mem = var + '->_present.' + self.c_name + '_len' + return [f"{len_mem} = len;", + f"{var}->{self.c_name} = malloc(len);", + f"memcpy({var}->{self.c_name}, mnl_attr_get_payload(attr), len);"], \ + ['len = mnl_attr_get_payload_len(attr);'], \ + ['unsigned int len;'] + + def _setter_lines(self, ri, member, presence): + return [f"free({member});", + f"{member} = malloc({presence}_len);", + f'memcpy({member}, {self.c_name}, {presence}_len);'] + + +class TypeNest(Type): + def _complex_member_type(self, ri): + return f"struct {self.nested_render_name}" + + def free(self, ri, var, ref): + ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name});') + + def _attr_typol(self): + return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' + + def _attr_policy(self, policy): + return 'NLA_POLICY_NESTED(' + self.nested_render_name + '_nl_policy)' + + def attr_put(self, ri, var): + self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + + f"{self.enum_name}, &{var}->{self.c_name})") + + def _attr_get(self, ri, var): + get_lines = [f"{self.nested_render_name}_parse(&parg, attr);"] + init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;", + f"parg.data = &{var}->{self.c_name};"] + return get_lines, init_lines, None + + def setter(self, ri, space, direction, deref=False, ref=None): + ref = (ref if ref else []) + [self.c_name] + + for _, attr in ri.family.pure_nested_structs[self.nested_attrs].member_list(): + attr.setter(ri, self.nested_attrs, direction, deref=deref, ref=ref) + + +class TypeMultiAttr(Type): + def is_multi_val(self): + return True + + def presence_type(self): + return 'count' + + def _complex_member_type(self, ri): + if 'type' not in self.attr or self.attr['type'] == 'nest': + return f"struct {self.nested_render_name}" + elif self.attr['type'] in scalars: + scalar_pfx = '__' if ri.ku_space == 'user' else '' + return scalar_pfx + self.attr['type'] + else: + raise Exception(f"Sub-type {self.attr['type']} not supported yet") + + def free_needs_iter(self): + return 'type' not in self.attr or self.attr['type'] == 'nest' + + def free(self, ri, var, ref): + if 'type' not in self.attr or self.attr['type'] == 'nest': + ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)") + ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);') + + def _attr_typol(self): + if 'type' not in self.attr or self.attr['type'] == 'nest': + return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' + elif self.attr['type'] in scalars: + return f".type = YNL_PT_U{self.attr['type'][1:]}, " + else: + raise Exception(f"Sub-type {self.attr['type']} not supported yet") + + def _attr_get(self, ri, var): + return f'{var}->n_{self.c_name}++;', None, None + + +class TypeArrayNest(Type): + def is_multi_val(self): + return True + + def presence_type(self): + return 'count' + + def _complex_member_type(self, ri): + if 'sub-type' not in self.attr or self.attr['sub-type'] == 'nest': + return f"struct {self.nested_render_name}" + elif self.attr['sub-type'] in scalars: + scalar_pfx = '__' if ri.ku_space == 'user' else '' + return scalar_pfx + self.attr['sub-type'] + else: + raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet") + + def _attr_typol(self): + return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' + + def _attr_get(self, ri, var): + local_vars = ['const struct nlattr *attr2;'] + get_lines = [f'attr_{self.c_name} = attr;', + 'mnl_attr_for_each_nested(attr2, attr)', + f'\t{var}->n_{self.c_name}++;'] + return get_lines, None, local_vars + + +class TypeNestTypeValue(Type): + def _complex_member_type(self, ri): + return f"struct {self.nested_render_name}" + + def _attr_typol(self): + return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' + + def _attr_get(self, ri, var): + prev = 'attr' + tv_args = '' + get_lines = [] + local_vars = [] + init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;", + f"parg.data = &{var}->{self.c_name};"] + if 'type-value' in self.attr: + tv_names = [c_lower(x) for x in self.attr["type-value"]] + local_vars += [f'const struct nlattr *attr_{", *attr_".join(tv_names)};'] + local_vars += [f'__u32 {", ".join(tv_names)};'] + for level in self.attr["type-value"]: + level = c_lower(level) + get_lines += [f'attr_{level} = mnl_attr_get_payload({prev});'] + get_lines += [f'{level} = mnl_attr_get_type(attr_{level});'] + prev = 'attr_' + level + + tv_args = f", {', '.join(tv_names)}" + + get_lines += [f"{self.nested_render_name}_parse(&parg, {prev}{tv_args});"] + return get_lines, init_lines, local_vars + + +class Struct: + def __init__(self, family, space_name, type_list=None, inherited=None): + self.family = family + self.space_name = space_name + self.attr_set = family.attr_sets[space_name] + # Use list to catch comparisons with empty sets + self._inherited = inherited if inherited is not None else [] + self.inherited = [] + + self.nested = type_list is None + if family.name == c_lower(space_name): + self.render_name = f"{family.name}" + else: + self.render_name = f"{family.name}_{c_lower(space_name)}" + self.struct_name = 'struct ' + self.render_name + self.ptr_name = self.struct_name + ' *' + + self.request = False + self.reply = False + + self.attr_list = [] + self.attrs = dict() + if type_list: + for t in type_list: + self.attr_list.append((t, self.attr_set[t]),) + else: + for t in self.attr_set: + self.attr_list.append((t, self.attr_set[t]),) + + max_val = 0 + self.attr_max_val = None + for name, attr in self.attr_list: + if attr.value > max_val: + max_val = attr.value + self.attr_max_val = attr + self.attrs[name] = attr + + def __iter__(self): + yield from self.attrs + + def __getitem__(self, key): + return self.attrs[key] + + def member_list(self): + return self.attr_list + + def set_inherited(self, new_inherited): + if self._inherited != new_inherited: + raise Exception("Inheriting different members not supported") + self.inherited = [c_lower(x) for x in sorted(self._inherited)] + + +class EnumEntry: + def __init__(self, enum_set, yaml, prev, value_start): + if isinstance(yaml, str): + self.name = yaml + yaml = {} + self.doc = '' + else: + self.name = yaml['name'] + self.doc = yaml.get('doc', '') + + self.yaml = yaml + self.enum_set = enum_set + self.c_name = c_upper(enum_set.value_pfx + self.name) + + if 'value' in yaml: + self.value = yaml['value'] + if prev: + self.value_change = (self.value != prev.value + 1) + elif prev: + self.value_change = False + self.value = prev.value + 1 + else: + self.value = value_start + self.value_change = (self.value != 0) + + self.value_change = self.value_change or self.enum_set['type'] == 'flags' + + def __getitem__(self, key): + return self.yaml[key] + + def __contains__(self, key): + return key in self.yaml + + def has_doc(self): + return bool(self.doc) + + # raw value, i.e. the id in the enum, unlike user value which is a mask for flags + def raw_value(self): + return self.value + + # user value, same as raw value for enums, for flags it's the mask + def user_value(self): + if self.enum_set['type'] == 'flags': + return 1 << self.value + else: + return self.value + + +class EnumSet: + def __init__(self, family, yaml): + self.yaml = yaml + self.family = family + + self.render_name = c_lower(family.name + '-' + yaml['name']) + self.enum_name = 'enum ' + self.render_name + + self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-") + + self.type = yaml['type'] + + prev_entry = None + value_start = self.yaml.get('value-start', 0) + self.entries = {} + self.entry_list = [] + for entry in self.yaml['entries']: + e = EnumEntry(self, entry, prev_entry, value_start) + self.entries[e.name] = e + self.entry_list.append(e) + prev_entry = e + + def __getitem__(self, key): + return self.yaml[key] + + def __contains__(self, key): + return key in self.yaml + + def has_doc(self): + if 'doc' in self.yaml: + return True + for entry in self.entry_list: + if entry.has_doc(): + return True + return False + + def get_mask(self): + mask = 0 + idx = self.yaml.get('value-start', 0) + for _ in self.entry_list: + mask |= 1 << idx + idx += 1 + return mask + + +class AttrSet: + def __init__(self, family, yaml): + self.yaml = yaml + + self.attrs = dict() + self.name = self.yaml['name'] + if 'subset-of' not in yaml: + self.subset_of = None + if 'name-prefix' in yaml: + pfx = yaml['name-prefix'] + elif self.name == family.name: + pfx = family.name + '-a-' + else: + pfx = f"{family.name}-a-{self.name}-" + self.name_prefix = c_upper(pfx) + self.max_name = c_upper(self.yaml.get('attr-max-name', f"{self.name_prefix}max")) + else: + self.subset_of = self.yaml['subset-of'] + self.name_prefix = family.attr_sets[self.subset_of].name_prefix + self.max_name = family.attr_sets[self.subset_of].max_name + + self.c_name = c_lower(self.name) + if self.c_name in _C_KW: + self.c_name += '_' + if self.c_name == family.c_name: + self.c_name = '' + + val = 0 + for elem in self.yaml['attributes']: + if 'value' in elem: + val = elem['value'] + else: + elem['value'] = val + val += 1 + + if 'multi-attr' in elem and elem['multi-attr']: + attr = TypeMultiAttr(family, self, elem) + elif elem['type'] in scalars: + attr = TypeScalar(family, self, elem) + elif elem['type'] == 'unused': + attr = TypeUnused(family, self, elem) + elif elem['type'] == 'pad': + attr = TypePad(family, self, elem) + elif elem['type'] == 'flag': + attr = TypeFlag(family, self, elem) + elif elem['type'] == 'string': + attr = TypeString(family, self, elem) + elif elem['type'] == 'binary': + attr = TypeBinary(family, self, elem) + elif elem['type'] == 'nest': + attr = TypeNest(family, self, elem) + elif elem['type'] == 'array-nest': + attr = TypeArrayNest(family, self, elem) + elif elem['type'] == 'nest-type-value': + attr = TypeNestTypeValue(family, self, elem) + else: + raise Exception(f"No typed class for type {elem['type']}") + + self.attrs[elem['name']] = attr + + def __getitem__(self, key): + return self.attrs[key] + + def __contains__(self, key): + return key in self.yaml + + def __iter__(self): + yield from self.attrs + + def items(self): + return self.attrs.items() + + +class Operation: + def __init__(self, family, yaml, value): + self.yaml = yaml + self.value = value + + self.name = self.yaml['name'] + self.render_name = family.name + '_' + c_lower(self.name) + self.is_async = 'notify' in yaml or 'event' in yaml + if not self.is_async: + self.enum_name = family.op_prefix + c_upper(self.name) + else: + self.enum_name = family.async_op_prefix + c_upper(self.name) + + self.dual_policy = ('do' in yaml and 'request' in yaml['do']) and \ + ('dump' in yaml and 'request' in yaml['dump']) + + def __getitem__(self, key): + return self.yaml[key] + + def __contains__(self, key): + return key in self.yaml + + def add_notification(self, op): + if 'notify' not in self.yaml: + self.yaml['notify'] = dict() + self.yaml['notify']['reply'] = self.yaml['do']['reply'] + self.yaml['notify']['cmds'] = [] + self.yaml['notify']['cmds'].append(op) + + +class Family: + def __init__(self, file_name): + with open(file_name, "r") as stream: + self.yaml = yaml.safe_load(stream) + + self.proto = self.yaml.get('protocol', 'genetlink') + + with open(os.path.dirname(os.path.dirname(file_name)) + + f'/{self.proto}.yaml', "r") as stream: + schema = yaml.safe_load(stream) + + jsonschema.validate(self.yaml, schema) + + if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}: + raise Exception("Codegen only supported for genetlink") + + self.fam_key = c_upper(self.yaml.get('c-family-name', self.yaml["name"] + '_FAMILY_NAME')) + self.ver_key = c_upper(self.yaml.get('c-version-name', self.yaml["name"] + '_FAMILY_VERSION')) + + if 'definitions' not in self.yaml: + self.yaml['definitions'] = [] + + self.name = self.yaml['name'] + self.c_name = c_lower(self.name) + if 'uapi-header' in self.yaml: + self.uapi_header = self.yaml['uapi-header'] + else: + self.uapi_header = f"linux/{self.name}.h" + if 'name-prefix' in self.yaml['operations']: + self.op_prefix = c_upper(self.yaml['operations']['name-prefix']) + else: + self.op_prefix = c_upper(self.yaml['name'] + '-cmd-') + if 'async-prefix' in self.yaml['operations']: + self.async_op_prefix = c_upper(self.yaml['operations']['async-prefix']) + else: + self.async_op_prefix = self.op_prefix + + self.mcgrps = self.yaml.get('mcast-groups', {'list': []}) + + self.consts = dict() + # list of all operations + self.msg_list = [] + # dict of operations which have their own message type (have attributes) + self.ops = collections.OrderedDict() + self.attr_sets = dict() + self.attr_sets_list = [] + + self.hooks = dict() + for when in ['pre', 'post']: + self.hooks[when] = dict() + for op_mode in ['do', 'dump']: + self.hooks[when][op_mode] = dict() + self.hooks[when][op_mode]['set'] = set() + self.hooks[when][op_mode]['list'] = [] + + # dict space-name -> 'request': set(attrs), 'reply': set(attrs) + self.root_sets = dict() + # dict space-name -> set('request', 'reply') + self.pure_nested_structs = dict() + self.all_notify = dict() + + self._mock_up_events() + + self._dictify() + self._load_root_sets() + self._load_nested_sets() + self._load_all_notify() + self._load_hooks() + + self.kernel_policy = self.yaml.get('kernel-policy', 'split') + if self.kernel_policy == 'global': + self._load_global_policy() + + def __getitem__(self, key): + return self.yaml[key] + + def get(self, key, default=None): + return self.yaml.get(key, default) + + # Fake a 'do' equivalent of all events, so that we can render their response parsing + def _mock_up_events(self): + for op in self.yaml['operations']['list']: + if 'event' in op: + op['do'] = { + 'reply': { + 'attributes': op['event']['attributes'] + } + } + + def _dictify(self): + for elem in self.yaml['definitions']: + if elem['type'] == 'enum' or elem['type'] == 'flags': + self.consts[elem['name']] = EnumSet(self, elem) + else: + self.consts[elem['name']] = elem + + for elem in self.yaml['attribute-sets']: + attr_set = AttrSet(self, elem) + self.attr_sets[elem['name']] = attr_set + self.attr_sets_list.append((elem['name'], attr_set), ) + + ntf = [] + val = 0 + for elem in self.yaml['operations']['list']: + if 'value' in elem: + val = elem['value'] + + op = Operation(self, elem, val) + val += 1 + + self.msg_list.append(op) + if 'notify' in elem: + ntf.append(op) + continue + if 'attribute-set' not in elem: + continue + self.ops[elem['name']] = op + for n in ntf: + self.ops[n['notify']].add_notification(n) + + def _load_root_sets(self): + for op_name, op in self.ops.items(): + if 'attribute-set' not in op: + continue + + req_attrs = set() + rsp_attrs = set() + for op_mode in ['do', 'dump']: + if op_mode in op and 'request' in op[op_mode]: + req_attrs.update(set(op[op_mode]['request']['attributes'])) + if op_mode in op and 'reply' in op[op_mode]: + rsp_attrs.update(set(op[op_mode]['reply']['attributes'])) + + if op['attribute-set'] not in self.root_sets: + self.root_sets[op['attribute-set']] = {'request': req_attrs, 'reply': rsp_attrs} + else: + self.root_sets[op['attribute-set']]['request'].update(req_attrs) + self.root_sets[op['attribute-set']]['reply'].update(rsp_attrs) + + def _load_nested_sets(self): + for root_set, rs_members in self.root_sets.items(): + for attr, spec in self.attr_sets[root_set].items(): + if 'nested-attributes' in spec: + inherit = set() + nested = spec['nested-attributes'] + if nested not in self.root_sets: + self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit) + if attr in rs_members['request']: + self.pure_nested_structs[nested].request = True + if attr in rs_members['reply']: + self.pure_nested_structs[nested].reply = True + + if 'type-value' in spec: + if nested in self.root_sets: + raise Exception("Inheriting members to a space used as root not supported") + inherit.update(set(spec['type-value'])) + elif spec['type'] == 'array-nest': + inherit.add('idx') + self.pure_nested_structs[nested].set_inherited(inherit) + + def _load_all_notify(self): + for op_name, op in self.ops.items(): + if not op: + continue + + if 'notify' in op: + self.all_notify[op_name] = op['notify']['cmds'] + + def _load_global_policy(self): + global_set = set() + attr_set_name = None + for op_name, op in self.ops.items(): + if not op: + continue + if 'attribute-set' not in op: + continue + + if attr_set_name is None: + attr_set_name = op['attribute-set'] + if attr_set_name != op['attribute-set']: + raise Exception('For a global policy all ops must use the same set') + + for op_mode in {'do', 'dump'}: + if op_mode in op: + global_set.update(op[op_mode].get('request', [])) + + self.global_policy = [] + self.global_policy_set = attr_set_name + for attr in self.attr_sets[attr_set_name]: + if attr in global_set: + self.global_policy.append(attr) + + def _load_hooks(self): + for op in self.ops.values(): + for op_mode in ['do', 'dump']: + if op_mode not in op: + continue + for when in ['pre', 'post']: + if when not in op[op_mode]: + continue + name = op[op_mode][when] + if name in self.hooks[when][op_mode]['set']: + continue + self.hooks[when][op_mode]['set'].add(name) + self.hooks[when][op_mode]['list'].append(name) + + +class RenderInfo: + def __init__(self, cw, family, ku_space, op, op_name, op_mode, attr_set=None): + self.family = family + self.nl = cw.nlib + self.ku_space = ku_space + self.op = op + self.op_name = op_name + self.op_mode = op_mode + + # 'do' and 'dump' response parsing is identical + if op_mode != 'do' and 'dump' in op and 'do' in op and 'reply' in op['do'] and \ + op["do"]["reply"] == op["dump"]["reply"]: + self.type_consistent = True + else: + self.type_consistent = op_mode == 'event' + + self.attr_set = attr_set + if not self.attr_set: + self.attr_set = op['attribute-set'] + + if op: + self.type_name = c_lower(op_name) + else: + self.type_name = c_lower(attr_set) + + self.cw = cw + + self.struct = dict() + for op_dir in ['request', 'reply']: + if op and op_dir in op[op_mode]: + self.struct[op_dir] = Struct(family, self.attr_set, + type_list=op[op_mode][op_dir]['attributes']) + if op_mode == 'event': + self.struct['reply'] = Struct(family, self.attr_set, type_list=op['event']['attributes']) + + +class CodeWriter: + def __init__(self, nlib, out_file): + self.nlib = nlib + + self._nl = False + self._silent_block = False + self._ind = 0 + self._out = out_file + + @classmethod + def _is_cond(cls, line): + return line.startswith('if') or line.startswith('while') or line.startswith('for') + + def p(self, line, add_ind=0): + if self._nl: + self._out.write('\n') + self._nl = False + ind = self._ind + if line[-1] == ':': + ind -= 1 + if self._silent_block: + ind += 1 + self._silent_block = line.endswith(')') and CodeWriter._is_cond(line) + if add_ind: + ind += add_ind + self._out.write('\t' * ind + line + '\n') + + def nl(self): + self._nl = True + + def block_start(self, line=''): + if line: + line = line + ' ' + self.p(line + '{') + self._ind += 1 + + def block_end(self, line=''): + if line and line[0] not in {';', ','}: + line = ' ' + line + self._ind -= 1 + self.p('}' + line) + + def write_doc_line(self, doc, indent=True): + words = doc.split() + line = ' *' + for word in words: + if len(line) + len(word) >= 79: + self.p(line) + line = ' *' + if indent: + line += ' ' + line += ' ' + word + self.p(line) + + def write_func_prot(self, qual_ret, name, args=None, doc=None, suffix=''): + if not args: + args = ['void'] + + if doc: + self.p('/*') + self.p(' * ' + doc) + self.p(' */') + + oneline = qual_ret + if qual_ret[-1] != '*': + oneline += ' ' + oneline += f"{name}({', '.join(args)}){suffix}" + + if len(oneline) < 80: + self.p(oneline) + return + + v = qual_ret + if len(v) > 3: + self.p(v) + v = '' + elif qual_ret[-1] != '*': + v += ' ' + v += name + '(' + ind = '\t' * (len(v) // 8) + ' ' * (len(v) % 8) + delta_ind = len(v) - len(ind) + v += args[0] + i = 1 + while i < len(args): + next_len = len(v) + len(args[i]) + if v[0] == '\t': + next_len += delta_ind + if next_len > 76: + self.p(v + ',') + v = ind + else: + v += ', ' + v += args[i] + i += 1 + self.p(v + ')' + suffix) + + def write_func_lvar(self, local_vars): + if not local_vars: + return + + if type(local_vars) is str: + local_vars = [local_vars] + + local_vars.sort(key=len, reverse=True) + for var in local_vars: + self.p(var) + self.nl() + + def write_func(self, qual_ret, name, body, args=None, local_vars=None): + self.write_func_prot(qual_ret=qual_ret, name=name, args=args) + self.write_func_lvar(local_vars=local_vars) + + self.block_start() + for line in body: + self.p(line) + self.block_end() + + def writes_defines(self, defines): + longest = 0 + for define in defines: + if len(define[0]) > longest: + longest = len(define[0]) + longest = ((longest + 8) // 8) * 8 + for define in defines: + line = '#define ' + define[0] + line += '\t' * ((longest - len(define[0]) + 7) // 8) + if type(define[1]) is int: + line += str(define[1]) + elif type(define[1]) is str: + line += '"' + define[1] + '"' + self.p(line) + + def write_struct_init(self, members): + longest = max([len(x[0]) for x in members]) + longest += 1 # because we prepend a . + longest = ((longest + 8) // 8) * 8 + for one in members: + line = '.' + one[0] + line += '\t' * ((longest - len(one[0]) - 1 + 7) // 8) + line += '= ' + one[1] + ',' + self.p(line) + + +scalars = {'u8', 'u16', 'u32', 'u64', 's32', 's64'} + +direction_to_suffix = { + 'reply': '_rsp', + 'request': '_req', + '': '' +} + +op_mode_to_wrapper = { + 'do': '', + 'dump': '_list', + 'notify': '_ntf', + 'event': '', +} + +_C_KW = { + 'do' +} + + +def rdir(direction): + if direction == 'reply': + return 'request' + if direction == 'request': + return 'reply' + return direction + + +def op_prefix(ri, direction, deref=False): + suffix = f"_{ri.type_name}" + + if not ri.op_mode or ri.op_mode == 'do': + suffix += f"{direction_to_suffix[direction]}" + else: + if direction == 'request': + suffix += '_req_dump' + else: + if ri.type_consistent: + if deref: + suffix += f"{direction_to_suffix[direction]}" + else: + suffix += op_mode_to_wrapper[ri.op_mode] + else: + suffix += '_rsp' + suffix += '_dump' if deref else '_list' + + return f"{ri.family['name']}{suffix}" + + +def type_name(ri, direction, deref=False): + return f"struct {op_prefix(ri, direction, deref=deref)}" + + +def print_prototype(ri, direction, terminate=True, doc=None): + suffix = ';' if terminate else '' + + fname = ri.op.render_name + if ri.op_mode == 'dump': + fname += '_dump' + + args = ['struct ynl_sock *ys'] + if 'request' in ri.op[ri.op_mode]: + args.append(f"{type_name(ri, direction)} *" + f"{direction_to_suffix[direction][1:]}") + + ret = 'int' + if 'reply' in ri.op[ri.op_mode]: + ret = f"{type_name(ri, rdir(direction))} *" + + ri.cw.write_func_prot(ret, fname, args, doc=doc, suffix=suffix) + + +def print_req_prototype(ri): + print_prototype(ri, "request", doc=ri.op['doc']) + + +def print_dump_prototype(ri): + print_prototype(ri, "request") + + +def put_typol_fwd(cw, struct): + cw.p(f'extern struct ynl_policy_nest {struct.render_name}_nest;') + + +def put_typol(cw, struct): + type_max = struct.attr_set.max_name + cw.block_start(line=f'struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =') + + for _, arg in struct.member_list(): + arg.attr_typol(cw) + + cw.block_end(line=';') + cw.nl() + + cw.block_start(line=f'struct ynl_policy_nest {struct.render_name}_nest =') + cw.p(f'.max_attr = {type_max},') + cw.p(f'.table = {struct.render_name}_policy,') + cw.block_end(line=';') + cw.nl() + + +def put_req_nested(ri, struct): + func_args = ['struct nlmsghdr *nlh', + 'unsigned int attr_type', + f'{struct.ptr_name}obj'] + + ri.cw.write_func_prot('int', f'{struct.render_name}_put', func_args) + ri.cw.block_start() + ri.cw.write_func_lvar('struct nlattr *nest;') + + ri.cw.p("nest = mnl_attr_nest_start(nlh, attr_type);") + + for _, arg in struct.member_list(): + arg.attr_put(ri, "obj") + + ri.cw.p("mnl_attr_nest_end(nlh, nest);") + + ri.cw.nl() + ri.cw.p('return 0;') + ri.cw.block_end() + ri.cw.nl() + + +def _multi_parse(ri, struct, init_lines, local_vars): + if struct.nested: + iter_line = "mnl_attr_for_each_nested(attr, nested)" + else: + iter_line = "mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr))" + + array_nests = set() + multi_attrs = set() + needs_parg = False + for arg, aspec in struct.member_list(): + if aspec['type'] == 'array-nest': + local_vars.append(f'const struct nlattr *attr_{aspec.c_name};') + array_nests.add(arg) + if 'multi-attr' in aspec: + multi_attrs.add(arg) + needs_parg |= 'nested-attributes' in aspec + if array_nests or multi_attrs: + local_vars.append('int i;') + if needs_parg: + local_vars.append('struct ynl_parse_arg parg;') + init_lines.append('parg.ys = yarg->ys;') + + ri.cw.block_start() + ri.cw.write_func_lvar(local_vars) + + for line in init_lines: + ri.cw.p(line) + ri.cw.nl() + + for arg in struct.inherited: + ri.cw.p(f'dst->{arg} = {arg};') + + ri.cw.nl() + ri.cw.block_start(line=iter_line) + + first = True + for _, arg in struct.member_list(): + arg.attr_get(ri, 'dst', first=first) + first = False + + ri.cw.block_end() + ri.cw.nl() + + for anest in sorted(array_nests): + aspec = struct[anest] + + ri.cw.block_start(line=f"if (dst->n_{aspec.c_name})") + ri.cw.p(f"dst->{aspec.c_name} = calloc(dst->n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") + ri.cw.p('i = 0;') + ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") + ri.cw.block_start(line=f"mnl_attr_for_each_nested(attr, attr_{aspec.c_name})") + ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") + ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, mnl_attr_get_type(attr)))") + ri.cw.p('return MNL_CB_ERROR;') + ri.cw.p('i++;') + ri.cw.block_end() + ri.cw.block_end() + ri.cw.nl() + + for anest in sorted(multi_attrs): + aspec = struct[anest] + ri.cw.block_start(line=f"if (dst->n_{aspec.c_name})") + ri.cw.p(f"dst->{aspec.c_name} = calloc(dst->n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") + ri.cw.p('i = 0;') + if 'nested-attributes' in aspec: + ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") + ri.cw.block_start(line=iter_line) + ri.cw.block_start(line=f"if (mnl_attr_get_type(attr) == {aspec.enum_name})") + if 'nested-attributes' in aspec: + ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") + ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr))") + ri.cw.p('return MNL_CB_ERROR;') + elif aspec['type'] in scalars: + t = aspec['type'] + if t[0] == 's': + t = 'u' + t[1:] + ri.cw.p(f"dst->{aspec.c_name}[i] = mnl_attr_get_{t}(attr);") + else: + raise Exception('Nest parsing type not supported yet') + ri.cw.p('i++;') + ri.cw.block_end() + ri.cw.block_end() + ri.cw.block_end() + ri.cw.nl() + + if struct.nested: + ri.cw.p('return 0;') + else: + ri.cw.p('return MNL_CB_OK;') + ri.cw.block_end() + ri.cw.nl() + + +def parse_rsp_nested(ri, struct): + func_args = ['struct ynl_parse_arg *yarg', + 'const struct nlattr *nested'] + for arg in struct.inherited: + func_args.append('__u32 ' + arg) + + local_vars = ['const struct nlattr *attr;', + f'{struct.ptr_name}dst = yarg->data;'] + init_lines = [] + + ri.cw.write_func_prot('int', f'{struct.render_name}_parse', func_args) + + _multi_parse(ri, struct, init_lines, local_vars) + + +def parse_rsp_msg(ri, deref=False): + if 'reply' not in ri.op[ri.op_mode] and ri.op_mode != 'event': + return + + func_args = ['const struct nlmsghdr *nlh', + 'void *data'] + + local_vars = [f'{type_name(ri, "reply", deref=deref)} *dst;', + 'struct ynl_parse_arg *yarg = data;', + 'const struct nlattr *attr;'] + init_lines = ['dst = yarg->data;'] + + ri.cw.write_func_prot('int', f'{op_prefix(ri, "reply", deref=deref)}_parse', func_args) + + _multi_parse(ri, ri.struct["reply"], init_lines, local_vars) + + +def print_req(ri): + ret_ok = '0' + ret_err = '-1' + direction = "request" + local_vars = ['struct nlmsghdr *nlh;', + 'int len, err;'] + + if 'reply' in ri.op[ri.op_mode]: + ret_ok = 'rsp' + ret_err = 'NULL' + local_vars += [f'{type_name(ri, rdir(direction))} *rsp;', + 'struct ynl_parse_arg yarg = { .ys = ys, };'] + + print_prototype(ri, direction, terminate=False) + ri.cw.block_start() + ri.cw.write_func_lvar(local_vars) + + ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);") + + ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;") + if 'reply' in ri.op[ri.op_mode]: + ri.cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") + ri.cw.nl() + for _, attr in ri.struct["request"].member_list(): + attr.attr_put(ri, "req") + ri.cw.nl() + + ri.cw.p('err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);') + ri.cw.p('if (err < 0)') + ri.cw.p(f"return {ret_err};") + ri.cw.nl() + ri.cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);') + ri.cw.p('if (len < 0)') + ri.cw.p(f"return {ret_err};") + ri.cw.nl() + + if 'reply' in ri.op[ri.op_mode]: + ri.cw.p('rsp = calloc(1, sizeof(*rsp));') + ri.cw.p('yarg.data = rsp;') + ri.cw.nl() + ri.cw.p(f"err = {ri.nl.parse_cb_run(op_prefix(ri, 'reply') + '_parse', '&yarg', False)};") + ri.cw.p('if (err < 0)') + ri.cw.p('goto err_free;') + ri.cw.nl() + + ri.cw.p('err = ynl_recv_ack(ys, err);') + ri.cw.p('if (err)') + ri.cw.p('goto err_free;') + ri.cw.nl() + ri.cw.p(f"return {ret_ok};") + ri.cw.nl() + ri.cw.p('err_free:') + + if 'reply' in ri.op[ri.op_mode]: + ri.cw.p(f"{call_free(ri, rdir(direction), 'rsp')}") + ri.cw.p(f"return {ret_err};") + ri.cw.block_end() + + +def print_dump(ri): + direction = "request" + print_prototype(ri, direction, terminate=False) + ri.cw.block_start() + local_vars = ['struct ynl_dump_state yds = {};', + 'struct nlmsghdr *nlh;', + 'int len, err;'] + + for var in local_vars: + ri.cw.p(f'{var}') + ri.cw.nl() + + ri.cw.p('yds.ys = ys;') + ri.cw.p(f"yds.alloc_sz = sizeof({type_name(ri, rdir(direction))});") + ri.cw.p(f"yds.cb = {op_prefix(ri, 'reply', deref=True)}_parse;") + ri.cw.p(f"yds.rsp_policy = &{ri.struct['reply'].render_name}_nest;") + ri.cw.nl() + ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);") + + if "request" in ri.op[ri.op_mode]: + ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;") + ri.cw.nl() + for _, attr in ri.struct["request"].member_list(): + attr.attr_put(ri, "req") + ri.cw.nl() + + ri.cw.p('err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);') + ri.cw.p('if (err < 0)') + ri.cw.p('return NULL;') + ri.cw.nl() + + ri.cw.block_start(line='do') + ri.cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);') + ri.cw.p('if (len < 0)') + ri.cw.p('goto free_list;') + ri.cw.nl() + ri.cw.p(f"err = {ri.nl.parse_cb_run('ynl_dump_trampoline', '&yds', False, indent=2)};") + ri.cw.p('if (err < 0)') + ri.cw.p('goto free_list;') + ri.cw.block_end(line='while (err > 0);') + ri.cw.nl() + + ri.cw.p('return yds.first;') + ri.cw.nl() + ri.cw.p('free_list:') + ri.cw.p(call_free(ri, rdir(direction), 'yds.first')) + ri.cw.p('return NULL;') + ri.cw.block_end() + + +def call_free(ri, direction, var): + return f"{op_prefix(ri, direction)}_free({var});" + + +def free_arg_name(direction): + if direction: + return direction_to_suffix[direction][1:] + return 'obj' + + +def print_free_prototype(ri, direction, suffix=';'): + name = op_prefix(ri, direction) + arg = free_arg_name(direction) + ri.cw.write_func_prot('void', f"{name}_free", [f"struct {name} *{arg}"], suffix=suffix) + + +def _print_type(ri, direction, struct): + suffix = f'_{ri.type_name}{direction_to_suffix[direction]}' + + if ri.op_mode == 'dump': + suffix += '_dump' + + ri.cw.block_start(line=f"struct {ri.family['name']}{suffix}") + + meta_started = False + for _, attr in struct.member_list(): + for type_filter in ['len', 'bit']: + line = attr.presence_member(ri.ku_space, type_filter) + if line: + if not meta_started: + ri.cw.block_start(line=f"struct") + meta_started = True + ri.cw.p(line) + if meta_started: + ri.cw.block_end(line='_present;') + ri.cw.nl() + + for arg in struct.inherited: + ri.cw.p(f"__u32 {arg};") + + for _, attr in struct.member_list(): + attr.struct_member(ri) + + ri.cw.block_end(line=';') + ri.cw.nl() + + +def print_type(ri, direction): + _print_type(ri, direction, ri.struct[direction]) + + +def print_type_full(ri, struct): + _print_type(ri, "", struct) + + +def print_type_helpers(ri, direction, deref=False): + print_free_prototype(ri, direction) + + if ri.ku_space == 'user' and direction == 'request': + for _, attr in ri.struct[direction].member_list(): + attr.setter(ri, ri.attr_set, direction, deref=deref) + ri.cw.nl() + + +def print_req_type_helpers(ri): + print_type_helpers(ri, "request") + + +def print_rsp_type_helpers(ri): + if 'reply' not in ri.op[ri.op_mode]: + return + print_type_helpers(ri, "reply") + + +def print_parse_prototype(ri, direction, terminate=True): + suffix = "_rsp" if direction == "reply" else "_req" + term = ';' if terminate else '' + + ri.cw.write_func_prot('void', f"{ri.op.render_name}{suffix}_parse", + ['const struct nlattr **tb', + f"struct {ri.op.render_name}{suffix} *req"], + suffix=term) + + +def print_req_type(ri): + print_type(ri, "request") + + +def print_rsp_type(ri): + if (ri.op_mode == 'do' or ri.op_mode == 'dump') and 'reply' in ri.op[ri.op_mode]: + direction = 'reply' + elif ri.op_mode == 'event': + direction = 'reply' + else: + return + print_type(ri, direction) + + +def print_wrapped_type(ri): + ri.cw.block_start(line=f"{type_name(ri, 'reply')}") + if ri.op_mode == 'dump': + ri.cw.p(f"{type_name(ri, 'reply')} *next;") + elif ri.op_mode == 'notify' or ri.op_mode == 'event': + ri.cw.p('__u16 family;') + ri.cw.p('__u8 cmd;') + ri.cw.p(f"void (*free)({type_name(ri, 'reply')} *ntf);") + ri.cw.p(f"{type_name(ri, 'reply', deref=True)} obj __attribute__ ((aligned (8)));") + ri.cw.block_end(line=';') + ri.cw.nl() + print_free_prototype(ri, 'reply') + ri.cw.nl() + + +def _free_type_members_iter(ri, struct): + for _, attr in struct.member_list(): + if attr.free_needs_iter(): + ri.cw.p('unsigned int i;') + ri.cw.nl() + break + + +def _free_type_members(ri, var, struct, ref=''): + for _, attr in struct.member_list(): + attr.free(ri, var, ref) + + +def _free_type(ri, direction, struct): + var = free_arg_name(direction) + + print_free_prototype(ri, direction, suffix='') + ri.cw.block_start() + _free_type_members_iter(ri, struct) + _free_type_members(ri, var, struct) + if direction: + ri.cw.p(f'free({var});') + ri.cw.block_end() + ri.cw.nl() + + +def free_rsp_nested(ri, struct): + _free_type(ri, "", struct) + + +def print_rsp_free(ri): + if 'reply' not in ri.op[ri.op_mode]: + return + _free_type(ri, 'reply', ri.struct['reply']) + + +def print_dump_type_free(ri): + sub_type = type_name(ri, 'reply') + + print_free_prototype(ri, 'reply', suffix='') + ri.cw.block_start() + ri.cw.p(f"{sub_type} *next = rsp;") + ri.cw.nl() + ri.cw.block_start(line='while (next)') + _free_type_members_iter(ri, ri.struct['reply']) + ri.cw.p('rsp = next;') + ri.cw.p('next = rsp->next;') + ri.cw.nl() + + _free_type_members(ri, 'rsp', ri.struct['reply'], ref='obj.') + ri.cw.p(f'free(rsp);') + ri.cw.block_end() + ri.cw.block_end() + ri.cw.nl() + + +def print_ntf_type_free(ri): + print_free_prototype(ri, 'reply', suffix='') + ri.cw.block_start() + _free_type_members_iter(ri, ri.struct['reply']) + _free_type_members(ri, 'rsp', ri.struct['reply'], ref='obj.') + ri.cw.p(f'free(rsp);') + ri.cw.block_end() + ri.cw.nl() + + +def print_ntf_parse_prototype(family, cw, suffix=';'): + cw.write_func_prot('struct ynl_ntf_base_type *', f"{family['name']}_ntf_parse", + ['struct ynl_sock *ys'], suffix=suffix) + + +def print_ntf_type_parse(family, cw, ku_mode): + print_ntf_parse_prototype(family, cw, suffix='') + cw.block_start() + cw.write_func_lvar(['struct genlmsghdr *genlh;', + 'struct nlmsghdr *nlh;', + 'struct ynl_parse_arg yarg = { .ys = ys, };', + 'struct ynl_ntf_base_type *rsp;', + 'int len, err;', + 'mnl_cb_t parse;']) + cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);') + cw.p('if (len < (ssize_t)(sizeof(*nlh) + sizeof(*genlh)))') + cw.p('return NULL;') + cw.nl() + cw.p('nlh = (struct nlmsghdr *)ys->rx_buf;') + cw.p('genlh = mnl_nlmsg_get_payload(nlh);') + cw.nl() + cw.block_start(line='switch (genlh->cmd)') + for ntf_op in sorted(family.all_notify.keys()): + op = family.ops[ntf_op] + ri = RenderInfo(cw, family, ku_mode, op, ntf_op, "notify") + for ntf in op['notify']['cmds']: + cw.p(f"case {ntf.enum_name}:") + cw.p(f"rsp = calloc(1, sizeof({type_name(ri, 'notify')}));") + cw.p(f"parse = {op_prefix(ri, 'reply', deref=True)}_parse;") + cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") + cw.p(f"rsp->free = (void *){op_prefix(ri, 'notify')}_free;") + cw.p('break;') + for op_name, op in family.ops.items(): + if 'event' not in op: + continue + ri = RenderInfo(cw, family, ku_mode, op, op_name, "event") + cw.p(f"case {op.enum_name}:") + cw.p(f"rsp = calloc(1, sizeof({type_name(ri, 'event')}));") + cw.p(f"parse = {op_prefix(ri, 'reply', deref=True)}_parse;") + cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") + cw.p(f"rsp->free = (void *){op_prefix(ri, 'notify')}_free;") + cw.p('break;') + cw.p('default:') + cw.p('ynl_error_unknown_notification(ys, genlh->cmd);') + cw.p('return NULL;') + cw.block_end() + cw.nl() + cw.p('yarg.data = rsp->data;') + cw.nl() + cw.p(f"err = {cw.nlib.parse_cb_run('parse', '&yarg', True)};") + cw.p('if (err < 0)') + cw.p('goto err_free;') + cw.nl() + cw.p('rsp->family = nlh->nlmsg_type;') + cw.p('rsp->cmd = genlh->cmd;') + cw.p('return rsp;') + cw.nl() + cw.p('err_free:') + cw.p('free(rsp);') + cw.p('return NULL;') + cw.block_end() + cw.nl() + + +def print_req_policy_fwd(cw, struct, ri=None, terminate=True): + if terminate and ri and kernel_can_gen_family_struct(struct.family): + return + + if terminate: + prefix = 'extern ' + else: + if kernel_can_gen_family_struct(struct.family) and ri: + prefix = 'static ' + else: + prefix = '' + + suffix = ';' if terminate else ' = {' + + max_attr = struct.attr_max_val + if ri: + name = ri.op.render_name + if ri.op.dual_policy: + name += '_' + ri.op_mode + else: + name = struct.render_name + cw.p(f"{prefix}const struct nla_policy {name}_nl_policy[{max_attr.enum_name} + 1]{suffix}") + + +def print_req_policy(cw, struct, ri=None): + print_req_policy_fwd(cw, struct, ri=ri, terminate=False) + for _, arg in struct.member_list(): + arg.attr_policy(cw) + cw.p("};") + + +def kernel_can_gen_family_struct(family): + return family.proto == 'genetlink' + + +def print_kernel_op_table_fwd(family, cw, terminate): + exported = not kernel_can_gen_family_struct(family) + + if not terminate or exported: + cw.p(f"/* Ops table for {family.name} */") + + pol_to_struct = {'global': 'genl_small_ops', + 'per-op': 'genl_ops', + 'split': 'genl_split_ops'} + struct_type = pol_to_struct[family.kernel_policy] + + if family.kernel_policy == 'split': + cnt = 0 + for op in family.ops.values(): + if 'do' in op: + cnt += 1 + if 'dump' in op: + cnt += 1 + else: + cnt = len(family.ops) + + qual = 'static const' if not exported else 'const' + line = f"{qual} struct {struct_type} {family.name}_nl_ops[{cnt}]" + if terminate: + cw.p(f"extern {line};") + else: + cw.block_start(line=line + ' =') + + if not terminate: + return + + cw.nl() + for name in family.hooks['pre']['do']['list']: + cw.write_func_prot('int', c_lower(name), + ['const struct genl_split_ops *ops', + 'struct sk_buff *skb', 'struct genl_info *info'], suffix=';') + for name in family.hooks['post']['do']['list']: + cw.write_func_prot('void', c_lower(name), + ['const struct genl_split_ops *ops', + 'struct sk_buff *skb', 'struct genl_info *info'], suffix=';') + for name in family.hooks['pre']['dump']['list']: + cw.write_func_prot('int', c_lower(name), + ['struct netlink_callback *cb'], suffix=';') + for name in family.hooks['post']['dump']['list']: + cw.write_func_prot('int', c_lower(name), + ['struct netlink_callback *cb'], suffix=';') + + cw.nl() + + for op_name, op in family.ops.items(): + if op.is_async: + continue + + if 'do' in op: + name = c_lower(f"{family.name}-nl-{op_name}-doit") + cw.write_func_prot('int', name, + ['struct sk_buff *skb', 'struct genl_info *info'], suffix=';') + + if 'dump' in op: + name = c_lower(f"{family.name}-nl-{op_name}-dumpit") + cw.write_func_prot('int', name, + ['struct sk_buff *skb', 'struct netlink_callback *cb'], suffix=';') + cw.nl() + + +def print_kernel_op_table_hdr(family, cw): + print_kernel_op_table_fwd(family, cw, terminate=True) + + +def print_kernel_op_table(family, cw): + print_kernel_op_table_fwd(family, cw, terminate=False) + if family.kernel_policy == 'global' or family.kernel_policy == 'per-op': + for op_name, op in family.ops.items(): + if op.is_async: + continue + + cw.block_start() + members = [('cmd', op.enum_name)] + if 'dont-validate' in op: + members.append(('validate', + ' | '.join([c_upper('genl-dont-validate-' + x) + for x in op['dont-validate']])), ) + for op_mode in ['do', 'dump']: + if op_mode in op: + name = c_lower(f"{family.name}-nl-{op_name}-{op_mode}it") + members.append((op_mode + 'it', name)) + if family.kernel_policy == 'per-op': + struct = Struct(family, op['attribute-set'], + type_list=op['do']['request']['attributes']) + + name = c_lower(f"{family.name}-{op_name}-nl-policy") + members.append(('policy', name)) + members.append(('maxattr', struct.attr_max_val.enum_name)) + if 'flags' in op: + members.append(('flags', ' | '.join([c_upper('genl-' + x) for x in op['flags']]))) + cw.write_struct_init(members) + cw.block_end(line=',') + elif family.kernel_policy == 'split': + cb_names = {'do': {'pre': 'pre_doit', 'post': 'post_doit'}, + 'dump': {'pre': 'start', 'post': 'done'}} + + for op_name, op in family.ops.items(): + for op_mode in ['do', 'dump']: + if op.is_async or op_mode not in op: + continue + + cw.block_start() + members = [('cmd', op.enum_name)] + if 'dont-validate' in op: + members.append(('validate', + ' | '.join([c_upper('genl-dont-validate-' + x) + for x in op['dont-validate']])), ) + name = c_lower(f"{family.name}-nl-{op_name}-{op_mode}it") + if 'pre' in op[op_mode]: + members.append((cb_names[op_mode]['pre'], c_lower(op[op_mode]['pre']))) + members.append((op_mode + 'it', name)) + if 'post' in op[op_mode]: + members.append((cb_names[op_mode]['post'], c_lower(op[op_mode]['post']))) + if 'request' in op[op_mode]: + struct = Struct(family, op['attribute-set'], + type_list=op[op_mode]['request']['attributes']) + + if op.dual_policy: + name = c_lower(f"{family.name}-{op_name}-{op_mode}-nl-policy") + else: + name = c_lower(f"{family.name}-{op_name}-nl-policy") + members.append(('policy', name)) + members.append(('maxattr', struct.attr_max_val.enum_name)) + flags = (op['flags'] if 'flags' in op else []) + ['cmd-cap-' + op_mode] + members.append(('flags', ' | '.join([c_upper('genl-' + x) for x in flags]))) + cw.write_struct_init(members) + cw.block_end(line=',') + + cw.block_end(line=';') + cw.nl() + + +def print_kernel_mcgrp_hdr(family, cw): + if not family.mcgrps['list']: + return + + cw.block_start('enum') + for grp in family.mcgrps['list']: + grp_id = c_upper(f"{family.name}-nlgrp-{grp['name']},") + cw.p(grp_id) + cw.block_end(';') + cw.nl() + + +def print_kernel_mcgrp_src(family, cw): + if not family.mcgrps['list']: + return + + cw.block_start('static const struct genl_multicast_group ' + family.name + '_nl_mcgrps[] =') + for grp in family.mcgrps['list']: + name = grp['name'] + grp_id = c_upper(f"{family.name}-nlgrp-{name}") + cw.p('[' + grp_id + '] = { "' + name + '", },') + cw.block_end(';') + cw.nl() + + +def print_kernel_family_struct_hdr(family, cw): + if not kernel_can_gen_family_struct(family): + return + + cw.p(f"extern struct genl_family {family.name}_nl_family;") + cw.nl() + + +def print_kernel_family_struct_src(family, cw): + if not kernel_can_gen_family_struct(family): + return + + cw.block_start(f"struct genl_family {family.name}_nl_family __ro_after_init =") + cw.p('.name\t\t= ' + family.fam_key + ',') + cw.p('.version\t= ' + family.ver_key + ',') + cw.p('.netnsok\t= true,') + cw.p('.parallel_ops\t= true,') + cw.p('.module\t\t= THIS_MODULE,') + if family.kernel_policy == 'per-op': + cw.p(f'.ops\t\t= {family.name}_nl_ops,') + cw.p(f'.n_ops\t\t= ARRAY_SIZE({family.name}_nl_ops),') + elif family.kernel_policy == 'split': + cw.p(f'.split_ops\t= {family.name}_nl_ops,') + cw.p(f'.n_split_ops\t= ARRAY_SIZE({family.name}_nl_ops),') + if family.mcgrps['list']: + cw.p(f'.mcgrps\t\t= {family.name}_nl_mcgrps,') + cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.name}_nl_mcgrps),') + cw.block_end(';') + + +def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'): + start_line = 'enum' + if enum_name in obj: + if obj[enum_name]: + start_line = 'enum ' + c_lower(obj[enum_name]) + elif ckey and ckey in obj: + start_line = 'enum ' + family.name + '_' + c_lower(obj[ckey]) + cw.block_start(line=start_line) + + +def render_uapi(family, cw): + hdr_prot = f"_UAPI_LINUX_{family.name.upper()}_H" + cw.p('#ifndef ' + hdr_prot) + cw.p('#define ' + hdr_prot) + cw.nl() + + defines = [(family.fam_key, family["name"]), + (family.ver_key, family.get('version', 1))] + cw.writes_defines(defines) + cw.nl() + + defines = [] + for const in family['definitions']: + if const['type'] != 'const': + cw.writes_defines(defines) + defines = [] + cw.nl() + + # Write kdoc for enum and flags (one day maybe also structs) + if const['type'] == 'enum' or const['type'] == 'flags': + enum = family.consts[const['name']] + + if enum.has_doc(): + cw.p('/**') + doc = '' + if 'doc' in enum: + doc = ' - ' + enum['doc'] + cw.write_doc_line(enum.enum_name + doc) + for entry in enum.entry_list: + if entry.has_doc(): + doc = '@' + entry.c_name + ': ' + entry['doc'] + cw.write_doc_line(doc) + cw.p(' */') + + uapi_enum_start(family, cw, const, 'name') + name_pfx = const.get('name-prefix', f"{family.name}-{const['name']}-") + for entry in enum.entry_list: + suffix = ',' + if entry.value_change: + suffix = f" = {entry.user_value()}" + suffix + cw.p(entry.c_name + suffix) + + if const.get('render-max', False): + cw.nl() + max_name = c_upper(name_pfx + 'max') + cw.p('__' + max_name + ',') + cw.p(max_name + ' = (__' + max_name + ' - 1)') + cw.block_end(line=';') + cw.nl() + elif const['type'] == 'const': + defines.append([c_upper(family.get('c-define-name', + f"{family.name}-{const['name']}")), + const['value']]) + + if defines: + cw.writes_defines(defines) + cw.nl() + + max_by_define = family.get('max-by-define', False) + + for _, attr_set in family.attr_sets_list: + if attr_set.subset_of: + continue + + cnt_name = c_upper(family.get('attr-cnt-name', f"__{attr_set.name_prefix}MAX")) + max_value = f"({cnt_name} - 1)" + + val = 0 + uapi_enum_start(family, cw, attr_set.yaml, 'enum-name') + for _, attr in attr_set.items(): + suffix = ',' + if attr['value'] != val: + suffix = f" = {attr['value']}," + val = attr['value'] + val += 1 + cw.p(attr.enum_name + suffix) + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{attr_set.max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {attr_set.max_name} {max_value}") + cw.nl() + + # Commands + separate_ntf = 'async-prefix' in family['operations'] + + max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX")) + cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX")) + max_value = f"({cnt_name} - 1)" + + uapi_enum_start(family, cw, family['operations'], 'enum-name') + for op in family.msg_list: + if separate_ntf and ('notify' in op or 'event' in op): + continue + + suffix = ',' + if 'value' in op: + suffix = f" = {op['value']}," + cw.p(op.enum_name + suffix) + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + if separate_ntf: + uapi_enum_start(family, cw, family['operations'], enum_name='async-enum') + for op in family.msg_list: + if separate_ntf and not ('notify' in op or 'event' in op): + continue + + suffix = ',' + if 'value' in op: + suffix = f" = {op['value']}," + cw.p(op.enum_name + suffix) + cw.block_end(line=';') + cw.nl() + + # Multicast + defines = [] + for grp in family.mcgrps['list']: + name = grp['name'] + defines.append([c_upper(grp.get('c-define-name', f"{family.name}-mcgrp-{name}")), + f'{name}']) + cw.nl() + if defines: + cw.writes_defines(defines) + cw.nl() + + cw.p(f'#endif /* {hdr_prot} */') + + +def find_kernel_root(full_path): + sub_path = '' + while True: + sub_path = os.path.join(os.path.basename(full_path), sub_path) + full_path = os.path.dirname(full_path) + maintainers = os.path.join(full_path, "MAINTAINERS") + if os.path.exists(maintainers): + return full_path, sub_path[:-1] + + +def main(): + parser = argparse.ArgumentParser(description='Netlink simple parsing generator') + parser.add_argument('--mode', dest='mode', type=str, required=True) + parser.add_argument('--spec', dest='spec', type=str, required=True) + parser.add_argument('--header', dest='header', action='store_true', default=None) + parser.add_argument('--source', dest='header', action='store_false') + parser.add_argument('--user-header', nargs='+', default=[]) + parser.add_argument('-o', dest='out_file', type=str) + args = parser.parse_args() + + out_file = open(args.out_file, 'w+') if args.out_file else os.sys.stdout + + if args.header is None: + parser.error("--header or --source is required") + + try: + parsed = Family(args.spec) + except yaml.YAMLError as exc: + print(exc) + os.sys.exit(1) + return + + cw = CodeWriter(BaseNlLib(), out_file) + + _, spec_kernel = find_kernel_root(args.spec) + if args.mode == 'uapi': + cw.p('/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */') + else: + if args.header: + cw.p('/* SPDX-License-Identifier: BSD-3-Clause */') + else: + cw.p('// SPDX-License-Identifier: BSD-3-Clause') + cw.p("/* Do not edit directly, auto-generated from: */") + cw.p(f"/*\t{spec_kernel} */") + cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */") + cw.nl() + + if args.mode == 'uapi': + render_uapi(parsed, cw) + return + + hdr_prot = f"_LINUX_{parsed.name.upper()}_GEN_H" + if args.header: + cw.p('#ifndef ' + hdr_prot) + cw.p('#define ' + hdr_prot) + cw.nl() + + if args.mode == 'kernel': + cw.p('#include <net/netlink.h>') + cw.p('#include <net/genetlink.h>') + cw.nl() + if not args.header: + if args.out_file: + cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"') + cw.nl() + headers = [parsed.uapi_header] + for definition in parsed['definitions']: + if 'header' in definition: + headers.append(definition['header']) + for one in headers: + cw.p(f"#include <{one}>") + cw.nl() + + if args.mode == "user": + if not args.header: + cw.p("#include <stdlib.h>") + cw.p("#include <stdio.h>") + cw.p("#include <string.h>") + cw.p("#include <libmnl/libmnl.h>") + cw.p("#include <linux/genetlink.h>") + cw.nl() + for one in args.user_header: + cw.p(f'#include "{one}"') + else: + cw.p('struct ynl_sock;') + cw.nl() + + if args.mode == "kernel": + if args.header: + for _, struct in sorted(parsed.pure_nested_structs.items()): + if struct.request: + cw.p('/* Common nested types */') + break + for attr_set, struct in sorted(parsed.pure_nested_structs.items()): + if struct.request: + print_req_policy_fwd(cw, struct) + cw.nl() + + if parsed.kernel_policy == 'global': + cw.p(f"/* Global operation policy for {parsed.name} */") + + struct = Struct(parsed, parsed.global_policy_set, type_list=parsed.global_policy) + print_req_policy_fwd(cw, struct) + cw.nl() + + if parsed.kernel_policy in {'per-op', 'split'}: + for op_name, op in parsed.ops.items(): + if 'do' in op and 'event' not in op: + ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do") + print_req_policy_fwd(cw, ri.struct['request'], ri=ri) + cw.nl() + + print_kernel_op_table_hdr(parsed, cw) + print_kernel_mcgrp_hdr(parsed, cw) + print_kernel_family_struct_hdr(parsed, cw) + else: + for _, struct in sorted(parsed.pure_nested_structs.items()): + if struct.request: + cw.p('/* Common nested types */') + break + for attr_set, struct in sorted(parsed.pure_nested_structs.items()): + if struct.request: + print_req_policy(cw, struct) + cw.nl() + + if parsed.kernel_policy == 'global': + cw.p(f"/* Global operation policy for {parsed.name} */") + + struct = Struct(parsed, parsed.global_policy_set, type_list=parsed.global_policy) + print_req_policy(cw, struct) + cw.nl() + + for op_name, op in parsed.ops.items(): + if parsed.kernel_policy in {'per-op', 'split'}: + for op_mode in {'do', 'dump'}: + if op_mode in op and 'request' in op[op_mode]: + cw.p(f"/* {op.enum_name} - {op_mode} */") + ri = RenderInfo(cw, parsed, args.mode, op, op_name, op_mode) + print_req_policy(cw, ri.struct['request'], ri=ri) + cw.nl() + + print_kernel_op_table(parsed, cw) + print_kernel_mcgrp_src(parsed, cw) + print_kernel_family_struct_src(parsed, cw) + + if args.mode == "user": + has_ntf = False + if args.header: + cw.p('/* Common nested types */') + for attr_set, struct in sorted(parsed.pure_nested_structs.items()): + ri = RenderInfo(cw, parsed, args.mode, "", "", "", attr_set) + print_type_full(ri, struct) + + for op_name, op in parsed.ops.items(): + cw.p(f"/* ============== {op.enum_name} ============== */") + + if 'do' in op and 'event' not in op: + cw.p(f"/* {op.enum_name} - do */") + ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do") + print_req_type(ri) + print_req_type_helpers(ri) + cw.nl() + print_rsp_type(ri) + print_rsp_type_helpers(ri) + cw.nl() + print_req_prototype(ri) + cw.nl() + + if 'dump' in op: + cw.p(f"/* {op.enum_name} - dump */") + ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'dump') + if 'request' in op['dump']: + print_req_type(ri) + print_req_type_helpers(ri) + if not ri.type_consistent: + print_rsp_type(ri) + print_wrapped_type(ri) + print_dump_prototype(ri) + cw.nl() + + if 'notify' in op: + cw.p(f"/* {op.enum_name} - notify */") + ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'notify') + has_ntf = True + if not ri.type_consistent: + raise Exception('Only notifications with consistent types supported') + print_wrapped_type(ri) + + if 'event' in op: + ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'event') + cw.p(f"/* {op.enum_name} - event */") + print_rsp_type(ri) + cw.nl() + print_wrapped_type(ri) + + if has_ntf: + cw.p('/* --------------- Common notification parsing --------------- */') + print_ntf_parse_prototype(parsed, cw) + cw.nl() + else: + cw.p('/* Policies */') + for name, _ in parsed.attr_sets.items(): + struct = Struct(parsed, name) + put_typol_fwd(cw, struct) + cw.nl() + + for name, _ in parsed.attr_sets.items(): + struct = Struct(parsed, name) + put_typol(cw, struct) + + cw.p('/* Common nested types */') + for attr_set, struct in sorted(parsed.pure_nested_structs.items()): + ri = RenderInfo(cw, parsed, args.mode, "", "", "", attr_set) + + free_rsp_nested(ri, struct) + if struct.request: + put_req_nested(ri, struct) + if struct.reply: + parse_rsp_nested(ri, struct) + + for op_name, op in parsed.ops.items(): + cw.p(f"/* ============== {op.enum_name} ============== */") + if 'do' in op and 'event' not in op: + cw.p(f"/* {op.enum_name} - do */") + ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do") + print_rsp_free(ri) + parse_rsp_msg(ri) + print_req(ri) + cw.nl() + + if 'dump' in op: + cw.p(f"/* {op.enum_name} - dump */") + ri = RenderInfo(cw, parsed, args.mode, op, op_name, "dump") + if not ri.type_consistent: + parse_rsp_msg(ri, deref=True) + print_dump_type_free(ri) + print_dump(ri) + cw.nl() + + if 'notify' in op: + cw.p(f"/* {op.enum_name} - notify */") + ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'notify') + has_ntf = True + if not ri.type_consistent: + raise Exception('Only notifications with consistent types supported') + print_ntf_type_free(ri) + + if 'event' in op: + cw.p(f"/* {op.enum_name} - event */") + has_ntf = True + + ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do") + parse_rsp_msg(ri) + + ri = RenderInfo(cw, parsed, args.mode, op, op_name, "event") + print_ntf_type_free(ri) + + if has_ntf: + cw.p('/* --------------- Common notification parsing --------------- */') + print_ntf_type_parse(parsed, cw, args.mode) + + if args.header: + cw.p(f'#endif /* {hdr_prot} */') + + +if __name__ == "__main__": + main() diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh new file mode 100755 index 000000000000..43989ae48ed0 --- /dev/null +++ b/tools/net/ynl/ynl-regen.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause + +TOOL=$(dirname $(realpath $0))/ynl-gen-c.py + +force= + +while [ ! -z "$1" ]; do + case "$1" in + -f ) force=yes; shift ;; + * ) echo "Unrecognized option '$1'"; exit 1 ;; + esac +done + +KDIR=$(dirname $(dirname $(dirname $(dirname $(realpath $0))))) + +files=$(git grep --files-with-matches '^/\* YNL-GEN \(kernel\|uapi\)') +for f in $files; do + # params: 0 1 2 3 + # $YAML YNL-GEN kernel $mode + params=( $(git grep -B1 -h '/\* YNL-GEN' $f | sed 's@/\*\(.*\)\*/@\1@') ) + + if [ $f -nt ${params[0]} -a -z "$force" ]; then + echo -e "\tSKIP $f" + continue + fi + + echo -e "\tGEN ${params[2]}\t$f" + $TOOL --mode ${params[2]} --${params[3]} --spec $KDIR/${params[0]} -o $f +done diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4350be739f4f..4b7c8b33069e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -427,6 +427,15 @@ static int decode_instructions(struct objtool_file *file) if (func->type != STT_NOTYPE && func->type != STT_FUNC) continue; + if (func->offset == sec->sh.sh_size) { + /* Heuristic: likely an "end" symbol */ + if (func->type == STT_NOTYPE) + continue; + WARN("%s(): STT_FUNC at end of section", + func->name); + return -1; + } + if (func->return_thunk || func->alias != func) continue; diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 6e7b88917ca0..ba5d942e4c6a 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -267,7 +267,7 @@ $(OUTPUT)%.xml : %.txt $(ASCIIDOC) -b docbook -d manpage \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \ -aperf_date=$(shell git log -1 --pretty="format:%cd" \ - --date=short $<) \ + --date=short --no-show-signature $<) \ -o $@+ $< && \ mv $@+ $@ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c2504c39bdcb..5b8784675903 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -589,6 +589,8 @@ ifndef NO_LIBELF $(call feature_check,libbpf-bpf_program__set_insns) ifeq ($(feature-libbpf-bpf_program__set_insns), 1) CFLAGS += -DHAVE_LIBBPF_BPF_PROGRAM__SET_INSNS + else + dummy := $(error Error: libbpf devel library needs to be >= 0.8.0 to build with LIBBPF_DYNAMIC, update or build statically with the version that comes with the kernel sources); endif $(call feature_check,libbpf-btf__raw_data) ifeq ($(feature-libbpf-btf__raw_data), 1) @@ -602,6 +604,8 @@ ifndef NO_LIBELF dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif else + # Libbpf will be built as a static library from tools/lib/bpf. + LIBBPF_STATIC := 1 CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM @@ -1314,14 +1318,6 @@ tip_instdir_SQ = $(subst ','\'',$(tip_instdir)) export perfexec_instdir_SQ -# If we install to $(HOME) we keep the traceevent default: -# $(HOME)/.traceevent/plugins -# Otherwise we install plugins into the global $(libdir). -ifdef DESTDIR -plugindir=$(libdir)/traceevent/plugins -plugindir_SQ= $(subst ','\'',$(plugindir)) -endif - print_var = $(eval $(print_var_code)) $(info $(MSG)) define print_var_code MSG = $(shell printf '...%40s: %s' $(1) $($(1))) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 13e7d26e77f0..b7d9c4206230 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -303,10 +303,12 @@ ifneq ($(OUTPUT),) else LIBBPF_OUTPUT = $(CURDIR)/libbpf endif -LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) -LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include -LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a -CFLAGS += -I$(LIBBPF_OUTPUT)/include +ifdef LIBBPF_STATIC + LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) + LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include + LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a + CFLAGS += -I$(LIBBPF_OUTPUT)/include +endif ifneq ($(OUTPUT),) LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd @@ -393,10 +395,8 @@ endif export PERL_PATH PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL) -ifndef NO_LIBBPF - ifndef LIBBPF_DYNAMIC - PERFLIBS += $(LIBBPF) - endif +ifdef LIBBPF_STATIC + PERFLIBS += $(LIBBPF) endif # We choose to avoid "if .. else if .. else .. endif endif" @@ -756,12 +756,15 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(arch_errno_name_array) \ $(sync_file_range_arrays) \ $(LIBAPI) \ - $(LIBBPF) \ $(LIBPERF) \ $(LIBSUBCMD) \ $(LIBSYMBOL) \ bpf-skel +ifdef LIBBPF_STATIC +prepare: $(LIBBPF) +endif + $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -819,7 +822,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) $(LIBAPI): FORCE | $(LIBAPI_OUTPUT) $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ - DESTDIR=$(LIBAPI_DESTDIR) prefix= \ + DESTDIR=$(LIBAPI_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBAPI)-clean: @@ -828,7 +831,7 @@ $(LIBAPI)-clean: $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ - O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBBPF)-clean: @@ -837,7 +840,7 @@ $(LIBBPF)-clean: $(LIBPERF): FORCE | $(LIBPERF_OUTPUT) $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(LIBPERF_OUTPUT) \ - DESTDIR=$(LIBPERF_DESTDIR) prefix= \ + DESTDIR=$(LIBPERF_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBPERF)-clean: @@ -846,7 +849,7 @@ $(LIBPERF)-clean: $(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT) $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \ - DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= \ + DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBSUBCMD)-clean: @@ -855,7 +858,7 @@ $(LIBSUBCMD)-clean: $(LIBSYMBOL): FORCE | $(LIBSYMBOL_OUTPUT) $(Q)$(MAKE) -C $(LIBSYMBOL_DIR) O=$(LIBSYMBOL_OUTPUT) \ - DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= \ + DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBSYMBOL)-clean: diff --git a/tools/perf/arch/riscv/util/unwind-libdw.c b/tools/perf/arch/riscv/util/unwind-libdw.c index 19536e172850..54a198714eb8 100644 --- a/tools/perf/arch/riscv/util/unwind-libdw.c +++ b/tools/perf/arch/riscv/util/unwind-libdw.c @@ -4,7 +4,7 @@ #include <elfutils/libdwfl.h> #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index e20656c431a4..8ae0a1535293 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include <linux/kernel.h> +#include <linux/numa.h> #include <linux/rbtree.h> #include <linux/string.h> #include <linux/zalloc.h> @@ -185,22 +186,33 @@ static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *s total_allocated += bytes_alloc; nr_allocs++; - return 0; -} -static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample) -{ - int ret = evsel__process_alloc_event(evsel, sample); + /* + * Commit 11e9734bcb6a ("mm/slab_common: unify NUMA and UMA + * version of tracepoints") adds the field "node" into the + * tracepoints 'kmalloc' and 'kmem_cache_alloc'. + * + * The legacy tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node' + * also contain the field "node". + * + * If the tracepoint contains the field "node" the tool stats the + * cross allocation. + */ + if (evsel__field(evsel, "node")) { + int node1, node2; - if (!ret) { - int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), - node2 = evsel__intval(evsel, sample, "node"); + node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}); + node2 = evsel__intval(evsel, sample, "node"); - if (node1 != node2) + /* + * If the field "node" is NUMA_NO_NODE (-1), we don't take it + * as a cross allocation. + */ + if ((node2 != NUMA_NO_NODE) && (node1 != node2)) nr_cross_allocs++; } - return ret; + return 0; } static int ptr_cmp(void *, void *); @@ -1369,8 +1381,8 @@ static int __cmd_kmem(struct perf_session *session) /* slab allocator */ { "kmem:kmalloc", evsel__process_alloc_event, }, { "kmem:kmem_cache_alloc", evsel__process_alloc_event, }, - { "kmem:kmalloc_node", evsel__process_alloc_node_event, }, - { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, }, + { "kmem:kmalloc_node", evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc_node", evsel__process_alloc_event, }, { "kmem:kfree", evsel__process_free_event, }, { "kmem:kmem_cache_free", evsel__process_free_event, }, /* page allocator */ @@ -1824,6 +1836,19 @@ static int parse_line_opt(const struct option *opt __maybe_unused, return 0; } +static bool slab_legacy_tp_is_exposed(void) +{ + /* + * The tracepoints "kmem:kmalloc_node" and + * "kmem:kmem_cache_alloc_node" have been removed on the latest + * kernel, if the tracepoint "kmem:kmalloc_node" is existed it + * means the tool is running on an old kernel, we need to + * rollback to support these legacy tracepoints. + */ + return IS_ERR(trace_event__tp_format("kmem", "kmalloc_node")) ? + false : true; +} + static int __cmd_record(int argc, const char **argv) { const char * const record_args[] = { @@ -1831,22 +1856,28 @@ static int __cmd_record(int argc, const char **argv) }; const char * const slab_events[] = { "-e", "kmem:kmalloc", - "-e", "kmem:kmalloc_node", "-e", "kmem:kfree", "-e", "kmem:kmem_cache_alloc", - "-e", "kmem:kmem_cache_alloc_node", "-e", "kmem:kmem_cache_free", }; + const char * const slab_legacy_events[] = { + "-e", "kmem:kmalloc_node", + "-e", "kmem:kmem_cache_alloc_node", + }; const char * const page_events[] = { "-e", "kmem:mm_page_alloc", "-e", "kmem:mm_page_free", }; unsigned int rec_argc, i, j; const char **rec_argv; + unsigned int slab_legacy_tp_exposed = slab_legacy_tp_is_exposed(); rec_argc = ARRAY_SIZE(record_args) + argc - 1; - if (kmem_slab) + if (kmem_slab) { rec_argc += ARRAY_SIZE(slab_events); + if (slab_legacy_tp_exposed) + rec_argc += ARRAY_SIZE(slab_legacy_events); + } if (kmem_page) rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ @@ -1861,6 +1892,10 @@ static int __cmd_record(int argc, const char **argv) if (kmem_slab) { for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) rec_argv[i] = strdup(slab_events[j]); + if (slab_legacy_tp_exposed) { + for (j = 0; j < ARRAY_SIZE(slab_legacy_events); j++, i++) + rec_argv[i] = strdup(slab_legacy_events[j]); + } } if (kmem_page) { rec_argv[i++] = strdup("-g"); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 718b82bfcdff..506c2fe42d52 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1670,6 +1670,7 @@ static int __cmd_report(bool display_info) /* for lock function check */ symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; symbol__init(&session->header.env); if (!data.is_pipe) { @@ -1757,6 +1758,7 @@ static int __cmd_contention(int argc, const char **argv) /* for lock function check */ symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; symbol__init(&session->header.env); if (use_bpf) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 86e06f136f40..d21fe0f32a6d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -16,7 +16,9 @@ #include "util/record.h" #include <api/fs/tracing_path.h> +#ifdef HAVE_LIBBPF_SUPPORT #include <bpf/bpf.h> +#endif #include "util/bpf_map.h" #include "util/rlimit.h" #include "builtin.h" diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 8fcab5ad00c5..e8d2762adade 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -16,20 +16,20 @@ perf-ftrace mainporcelain common perf-inject mainporcelain common perf-iostat mainporcelain common perf-kallsyms mainporcelain common -perf-kmem mainporcelain common +perf-kmem mainporcelain traceevent perf-kvm mainporcelain common -perf-kwork mainporcelain common +perf-kwork mainporcelain traceevent perf-list mainporcelain common -perf-lock mainporcelain common +perf-lock mainporcelain traceevent perf-mem mainporcelain common perf-probe mainporcelain full perf-record mainporcelain common perf-report mainporcelain common -perf-sched mainporcelain common +perf-sched mainporcelain traceevent perf-script mainporcelain common perf-stat mainporcelain common perf-test mainporcelain common -perf-timechart mainporcelain common +perf-timechart mainporcelain traceevent perf-top mainporcelain common perf-trace mainporcelain audit perf-version mainporcelain common diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c index bd83d364cf30..91778b5c6125 100644 --- a/tools/perf/tests/bpf-script-test-prologue.c +++ b/tools/perf/tests/bpf-script-test-prologue.c @@ -20,6 +20,8 @@ # undef if #endif +typedef unsigned int __bitwise fmode_t; + #define FMODE_READ 0x1 #define FMODE_WRITE 0x2 diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 05e818a8bbad..009d6efb673c 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -222,19 +222,7 @@ installed_files_bin := bin/perf installed_files_bin += etc/bash_completion.d/perf installed_files_bin += libexec/perf-core/perf-archive -installed_files_plugins := $(lib)/traceevent/plugins/plugin_cfg80211.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_scsi.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_xen.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_function.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_sched_switch.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_mac80211.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_kvm.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_kmem.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_hrtimer.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_jbd2.so - installed_files_all := $(installed_files_bin) -installed_files_all += $(installed_files_plugins) test_make_install := $(call test_dest_files,$(installed_files_all)) test_make_install_O := $(call test_dest_files,$(installed_files_all)) diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh index f05670d1e39e..aaf851108ca3 100755 --- a/tools/perf/tests/shell/buildid.sh +++ b/tools/perf/tests/shell/buildid.sh @@ -77,7 +77,20 @@ check() file=${build_id_dir}/.build-id/${id:0:2}/`readlink ${link}`/elf echo "file: ${file}" - if [ ! -x $file ]; then + # Check for file permission of original file + # in case of pe-file.exe file + echo $1 | grep ".exe" + if [ $? -eq 0 ]; then + if [ -x $1 -a ! -x $file ]; then + echo "failed: file ${file} executable does not exist" + exit 1 + fi + + if [ ! -x $file -a ! -e $file ]; then + echo "failed: file ${file} does not exist" + exit 1 + fi + elif [ ! -x $file ]; then echo "failed: file ${file} does not exist" exit 1 fi diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 34c400ccbe04..57e7a6a470c9 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -37,6 +37,7 @@ trace_libc_inet_pton_backtrace() { case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' + echo "text_to_binary_address.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected @@ -57,7 +58,7 @@ trace_libc_inet_pton_backtrace() { perf_data=`mktemp -u /tmp/perf.data.XXX` perf_script=`mktemp -u /tmp/perf.script.XXX` perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1 - perf script -i $perf_data > $perf_script + perf script -i $perf_data | tac | grep -m1 ^ping -B9 | tac > $perf_script exec 3<$perf_script exec 4<$expected diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index de3701a2a212..13c3a237b9c9 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -33,7 +33,10 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - char sa_data[14]; /* 14 bytes of protocol address */ + union { + char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ + DECLARE_FLEX_ARRAY(char, sa_data); + }; }; struct linger { diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 3cc42821d9b3..d7dc7c28508c 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -19,7 +19,7 @@ TAG= if test -d ../../.git -o -f ../../.git then TAG=$(MAKEFLAGS= make -sC ../.. kernelversion) - CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" + CID=$(git log -1 --abbrev=12 --pretty=format:"%h" --no-show-signature 2>/dev/null) && CID="-g$CID" elif test -f ../../PERF-VERSION-FILE then TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g') diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 265d20cc126b..c2e323cd7d49 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2611,7 +2611,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, *size = sym->start - *start; if (idx > 0) { if (*size) - return 1; + return 0; } else if (dso_sym_match(sym, sym_name, &cnt, idx)) { print_duplicate_syms(dso, sym_name); return -EINVAL; diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 4dbf26408b69..c6d21c07b14c 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -4,9 +4,12 @@ #include <linux/list.h> #include <sys/resource.h> + +#ifdef HAVE_LIBBPF_SUPPORT #include <bpf/bpf.h> #include <bpf/btf.h> #include <bpf/libbpf.h> +#endif struct evsel; struct target; @@ -87,6 +90,8 @@ static inline void set_max_rlimit(void) setrlimit(RLIMIT_MEMLOCK, &rinf); } +#ifdef HAVE_BPF_SKEL + static inline __u32 bpf_link_get_id(int fd) { struct bpf_link_info link_info = { .id = 0, }; @@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu) return bpf_prog_test_run_opts(prog_fd, &opts); } +#endif /* HAVE_BPF_SKEL */ #endif /* __PERF_BPF_COUNTER_H */ diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 3c2df7522f6f..1c82377ed78b 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -116,27 +116,19 @@ static int bperf_load_program(struct evlist *evlist) /* open single copy of the events w/o cgroup */ err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1); - if (err) { - pr_err("Failed to open first cgroup events\n"); - goto out; - } + if (err == 0) + evsel->supported = true; map_fd = bpf_map__fd(skel->maps.events); perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) { int fd = FD(evsel, j); __u32 idx = evsel->core.idx * total_cpus + cpu.cpu; - err = bpf_map_update_elem(map_fd, &idx, &fd, - BPF_ANY); - if (err < 0) { - pr_err("Failed to update perf_event fd\n"); - goto out; - } + bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); } evsel->cgrp = leader_cgrp; } - evsel->supported = true; if (evsel->cgrp == cgrp) continue; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index a839b30c981b..ea9c083ab1e3 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -715,9 +715,13 @@ build_id_cache__add(const char *sbuild_id, const char *name, const char *realnam } else if (nsi && nsinfo__need_setns(nsi)) { if (copyfile_ns(name, filename, nsi)) goto out_free; - } else if (link(realname, filename) && errno != EEXIST && - copyfile(name, filename)) - goto out_free; + } else if (link(realname, filename) && errno != EEXIST) { + struct stat f_stat; + + if (!(stat(name, &f_stat) < 0) && + copyfile_mode(name, filename, f_stat.st_mode)) + goto out_free; + } } /* Some binaries are stripped, but have .debug files with their symbol diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index e99b41f9be45..cd978c240e0d 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -224,6 +224,19 @@ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unus return 0; } +static int check_and_add_cgroup_name(const char *fpath) +{ + struct cgroup_name *cn; + + list_for_each_entry(cn, &cgroup_list, list) { + if (!strcmp(cn->name, fpath)) + return 0; + } + + /* pretend if it's added by ftw() */ + return add_cgroup_name(fpath, NULL, FTW_D, NULL); +} + static void release_cgroup_list(void) { struct cgroup_name *cn; @@ -242,7 +255,7 @@ static int list_cgroups(const char *str) struct cgroup_name *cn; char *s; - /* use given name as is - for testing purpose */ + /* use given name as is when no regex is given */ for (;;) { p = strchr(str, ','); e = p ? p : eos; @@ -253,13 +266,13 @@ static int list_cgroups(const char *str) s = strndup(str, e - str); if (!s) return -1; - /* pretend if it's added by ftw() */ - ret = add_cgroup_name(s, NULL, FTW_D, NULL); + + ret = check_and_add_cgroup_name(s); free(s); - if (ret) + if (ret < 0) return -1; } else { - if (add_cgroup_name("", NULL, FTW_D, NULL) < 0) + if (check_and_add_cgroup_name("/") < 0) return -1; } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a7f68c309545..fc16299c915f 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -132,6 +132,7 @@ int perf_data__open_dir(struct perf_data *data) file->size = st.st_size; } + closedir(dir); if (!files) return -EINVAL; @@ -140,6 +141,7 @@ int perf_data__open_dir(struct perf_data *data) return 0; out_err: + closedir(dir); close_dir(files, nr); return ret; } diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 0168a9637330..d47de5f270a8 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -42,8 +42,11 @@ static char *normalize(char *str, int runtime) char *dst = str; while (*str) { - if (*str == '\\') + if (*str == '\\') { *dst++ = *++str; + if (!*str) + break; + } else if (*str == '?') { char *paramval; int i = 0; diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index c3cef36d4176..1b5140e5ce99 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)" +echo "#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd @@ -51,5 +51,20 @@ do p }' "Documentation/perf-$cmd.txt" done -echo "#endif /* HAVE_LIBELF_SUPPORT */" +echo "#endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */" + +echo "#ifdef HAVE_LIBTRACEEVENT" +sed -n -e 's/^perf-\([^ ]*\)[ ].* traceevent.*/\1/p' command-list.txt | +sort | +while read cmd +do + sed -n ' + /^NAME/,/perf-'"$cmd"'/H + ${ + x + s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + p + }' "Documentation/perf-$cmd.txt" +done +echo "#endif /* HAVE_LIBTRACEEVENT */" echo "};" diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index e188f74698dd..37662cdec5ee 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2971,6 +2971,18 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, ret = add_all_matching_fields(evlist, field_name, raw_trace, level); goto out; } +#else + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) { + pr_err("%s %s", ret ? "," : "This perf binary isn't linked with libtraceevent, can't process", evsel__name(evsel)); + ret = -ENOTSUP; + } + } + + if (ret) { + pr_err("\n"); + goto out; + } #endif evsel = find_evsel(evlist, event_name); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index add6c5d9531c..9b3cd79cca12 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -4,6 +4,7 @@ #include <stdbool.h> #include <stdio.h> +#include <sys/types.h> #include <linux/types.h> struct evlist; diff --git a/tools/testing/memblock/.gitignore b/tools/testing/memblock/.gitignore index 654338e0be52..4cc7cd5aac2b 100644 --- a/tools/testing/memblock/.gitignore +++ b/tools/testing/memblock/.gitignore @@ -1,4 +1,5 @@ main memblock.c linux/memblock.h +asm/asm.h asm/cmpxchg.h diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index 2310ac4d080e..7a1ca694a982 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -29,13 +29,14 @@ include: ../../../include/linux/memblock.h ../../include/linux/*.h \ @mkdir -p linux test -L linux/memblock.h || ln -s ../../../../include/linux/memblock.h linux/memblock.h + test -L asm/asm.h || ln -s ../../../arch/x86/include/asm/asm.h asm/asm.h test -L asm/cmpxchg.h || ln -s ../../../arch/x86/include/asm/cmpxchg.h asm/cmpxchg.h memblock.c: $(EXTR_SRC) test -L memblock.c || ln -s $(EXTR_SRC) memblock.c clean: - $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/cmpxchg.h + $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/asm.h asm/cmpxchg.h help: @echo 'Memblock simulator' diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db7308..85973e55489e 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -15,6 +15,10 @@ bool mirrored_kernelcore = false; struct page {}; +void __free_pages_core(struct page *page, unsigned int order) +{ +} + void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index ea0978f22db8..251794f83719 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -241,7 +241,7 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { switch (opt) { case 'p': - reclaim_period_ms = atoi_non_negative("Reclaim period", optarg); + reclaim_period_ms = atoi_positive("Reclaim period", optarg); break; case 't': token = atoi_paranoid(optarg); diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index dae510c263b4..13c75dc18c10 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -434,6 +434,7 @@ static void *juggle_shinfo_state(void *arg) int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; + struct kvm_xen_hvm_attr evt_reset; struct kvm_vm *vm; pthread_t thread; bool verbose; @@ -962,10 +963,8 @@ int main(int argc, char *argv[]) } done: - struct kvm_xen_hvm_attr evt_reset = { - .type = KVM_XEN_ATTR_TYPE_EVTCHN, - .u.evtchn.flags = KVM_XEN_EVTCHN_RESET, - }; + evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN; + evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); alarm(0); diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 291144c284fb..f7900e75d230 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -20,7 +20,7 @@ CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH)) ifeq ($(CROSS_COMPILE),) ifeq ($(CLANG_TARGET_FLAGS),) -$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk +$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk) else CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS) endif # CLANG_TARGET_FLAGS diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3007e98a6d64..951bd5342bc6 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -45,6 +45,7 @@ TEST_PROGS += arp_ndisc_untracked_subnets.sh TEST_PROGS += stress_reuseport_listen.sh TEST_PROGS += l2_tos_ttl_inherit.sh TEST_PROGS += bind_bhash.sh +TEST_PROGS += ip_local_port_range.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest @@ -75,14 +76,61 @@ TEST_GEN_PROGS += so_incoming_cpu TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += csum +TEST_GEN_FILES += nat6to4.o +TEST_GEN_FILES += ip_local_port_range TEST_FILES := settings include ../lib.mk -include bpf/Makefile - $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread + +# Rules to generate bpf obj nat6to4.o +CLANG ?= clang +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +BPFDIR := $(abspath ../../../lib/bpf) +APIDIR := $(abspath ../../../include/uapi) + +CCINCLUDE += -I../bpf +CCINCLUDE += -I../../../../usr/include/ +CCINCLUDE += -I$(SCRATCH_DIR)/include + +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +MAKE_DIRS := $(BUILD_DIR)/libbpf +$(MAKE_DIRS): + mkdir -p $@ + +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + +$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS) + $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + $(APIDIR)/linux/bpf.h \ + | $(BUILD_DIR)/libbpf + $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +EXTRA_CLEAN := $(SCRATCH_DIR) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index b57e91e1c3f2..532459a15067 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr) wait_for_signal(pipefd[0]); if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(struct sockaddr)) != 0) { + sizeof(*consumer_addr)) != 0) { perror("Connect failed"); kill(0, SIGTERM); exit(1); diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile deleted file mode 100644 index 4abaf16d2077..000000000000 --- a/tools/testing/selftests/net/bpf/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -CLANG ?= clang -SCRATCH_DIR := $(OUTPUT)/tools -BUILD_DIR := $(SCRATCH_DIR)/build -BPFDIR := $(abspath ../../../lib/bpf) -APIDIR := $(abspath ../../../include/uapi) - -CCINCLUDE += -I../../bpf -CCINCLUDE += -I../../../../../usr/include/ -CCINCLUDE += -I$(SCRATCH_DIR)/include - -BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a - -MAKE_DIRS := $(BUILD_DIR)/libbpf $(OUTPUT)/bpf -$(MAKE_DIRS): - mkdir -p $@ - -TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o -all: $(TEST_CUSTOM_PROGS) - -# Get Clang's default includes on this system, as opposed to those seen by -# '-target bpf'. This fixes "missing" files on some architectures/distros, -# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. -# -# Use '-idirafter': Don't interfere with include mechanics except where the -# build would have failed anyways. -define get_sys_includes -$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') -endef - -ifneq ($(CROSS_COMPILE),) -CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) -endif - -CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) - -$(TEST_CUSTOM_PROGS): $(OUTPUT)/%.o: %.c $(BPFOBJ) | $(MAKE_DIRS) - $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ - -$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ - $(APIDIR)/linux/bpf.h \ - | $(BUILD_DIR)/libbpf - $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g -O0' \ - DESTDIR=$(SCRATCH_DIR) prefix= all install_headers - -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) - diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 1e0a62f638fe..919c0dd9fe4b 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -3,7 +3,8 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ - gact_trap_test mirred_egress_to_ingress_test" + gact_trap_test mirred_egress_to_ingress_test \ + mirred_egress_to_ingress_tcp_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -198,6 +199,52 @@ mirred_egress_to_ingress_test() log_test "mirred_egress_to_ingress ($tcflags)" } +mirred_egress_to_ingress_tcp_test() +{ + local tmpfile=$(mktemp) tmpfile1=$(mktemp) + + RET=0 + dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$tmpfile + tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \ + $tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \ + action ct commit nat src addr 192.0.2.2 pipe \ + action ct clear pipe \ + action ct commit nat dst addr 192.0.2.1 pipe \ + action ct clear pipe \ + action skbedit ptype host pipe \ + action mirred ingress redirect dev $h1 + tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \ + $tcflags ip_proto icmp \ + action mirred ingress redirect dev $h1 + tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \ + ip_proto icmp \ + action drop + + ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $tmpfile1 & + local rpid=$! + ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$tmpfile + wait -n $rpid + cmp -s $tmpfile $tmpfile1 + check_err $? "server output check failed" + + $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \ + -t icmp "ping,id=42,seq=5" -q + tc_check_packets "dev $h1 egress" 101 10 + check_err $? "didn't mirred redirect ICMP" + tc_check_packets "dev $h1 ingress" 102 10 + check_err $? "didn't drop mirred ICMP" + local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits) + test ${overlimits} = 10 + check_err $? "wrong overlimits, expected 10 got ${overlimits}" + + tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower + tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower + tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower + + rm -f $tmpfile $tmpfile1 + log_test "mirred_egress_to_ingress_tcp ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c new file mode 100644 index 000000000000..75e3fdacdf73 --- /dev/null +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -0,0 +1,447 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2023 Cloudflare + +/* Test IP_LOCAL_PORT_RANGE socket option: IPv4 + IPv6, TCP + UDP. + * + * Tests assume that net.ipv4.ip_local_port_range is [40000, 49999]. + * Don't run these directly but with ip_local_port_range.sh script. + */ + +#include <fcntl.h> +#include <netinet/ip.h> + +#include "../kselftest_harness.h" + +#ifndef IP_LOCAL_PORT_RANGE +#define IP_LOCAL_PORT_RANGE 51 +#endif + +static __u32 pack_port_range(__u16 lo, __u16 hi) +{ + return (hi << 16) | (lo << 0); +} + +static void unpack_port_range(__u32 range, __u16 *lo, __u16 *hi) +{ + *lo = range & 0xffff; + *hi = range >> 16; +} + +static int get_so_domain(int fd) +{ + int domain, err; + socklen_t len; + + len = sizeof(domain); + err = getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &domain, &len); + if (err) + return -1; + + return domain; +} + +static int bind_to_loopback_any_port(int fd) +{ + union { + struct sockaddr sa; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + socklen_t addr_len; + + memset(&addr, 0, sizeof(addr)); + switch (get_so_domain(fd)) { + case AF_INET: + addr.v4.sin_family = AF_INET; + addr.v4.sin_port = htons(0); + addr.v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr_len = sizeof(addr.v4); + break; + case AF_INET6: + addr.v6.sin6_family = AF_INET6; + addr.v6.sin6_port = htons(0); + addr.v6.sin6_addr = in6addr_loopback; + addr_len = sizeof(addr.v6); + break; + default: + return -1; + } + + return bind(fd, &addr.sa, addr_len); +} + +static int get_sock_port(int fd) +{ + union { + struct sockaddr sa; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + socklen_t addr_len; + int err; + + addr_len = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + err = getsockname(fd, &addr.sa, &addr_len); + if (err) + return -1; + + switch (addr.sa.sa_family) { + case AF_INET: + return ntohs(addr.v4.sin_port); + case AF_INET6: + return ntohs(addr.v6.sin6_port); + default: + errno = EAFNOSUPPORT; + return -1; + } +} + +static int get_ip_local_port_range(int fd, __u32 *range) +{ + socklen_t len; + __u32 val; + int err; + + len = sizeof(val); + err = getsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val, &len); + if (err) + return -1; + + *range = val; + return 0; +} + +FIXTURE(ip_local_port_range) {}; + +FIXTURE_SETUP(ip_local_port_range) +{ +} + +FIXTURE_TEARDOWN(ip_local_port_range) +{ +} + +FIXTURE_VARIANT(ip_local_port_range) { + int so_domain; + int so_type; + int so_protocol; +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_tcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_udp) { + .so_domain = AF_INET, + .so_type = SOCK_DGRAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_SCTP, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_udp) { + .so_domain = AF_INET6, + .so_type = SOCK_DGRAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_SCTP, +}; + +TEST_F(ip_local_port_range, invalid_option_value) +{ + __u16 val16; + __u32 val32; + __u64 val64; + int fd, err; + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + /* Too few bytes */ + val16 = 40000; + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val16, sizeof(val16)); + EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail"); + EXPECT_EQ(errno, EINVAL); + + /* Empty range: low port > high port */ + val32 = pack_port_range(40222, 40111); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val32, sizeof(val32)); + EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail"); + EXPECT_EQ(errno, EINVAL); + + /* Too many bytes */ + val64 = pack_port_range(40333, 40444); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val64, sizeof(val64)); + EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail"); + EXPECT_EQ(errno, EINVAL); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); +} + +TEST_F(ip_local_port_range, port_range_out_of_netns_range) +{ + const struct test { + __u16 range_lo; + __u16 range_hi; + } tests[] = { + { 30000, 39999 }, /* socket range below netns range */ + { 50000, 59999 }, /* socket range above netns range */ + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + /* Bind a couple of sockets, not just one, to check + * that the range wasn't clamped to a single port from + * the netns range. That is [40000, 40000] or [49999, + * 49999], respectively for each test case. + */ + int fds[2], i; + + TH_LOG("lo %5hu, hi %5hu", t->range_lo, t->range_hi); + + for (i = 0; i < ARRAY_SIZE(fds); i++) { + int fd, err, port; + __u32 range; + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("#%d: socket failed", i); + + range = pack_port_range(t->range_lo, t->range_hi); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("#%d: setsockopt(IP_LOCAL_PORT_RANGE) failed", i); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("#%d: bind failed", i); + + /* Check that socket port range outside of ephemeral range is ignored */ + port = get_sock_port(fd); + ASSERT_GE(port, 40000) TH_LOG("#%d: expected port within netns range", i); + ASSERT_LE(port, 49999) TH_LOG("#%d: expected port within netns range", i); + + fds[i] = fd; + } + + for (i = 0; i < ARRAY_SIZE(fds); i++) + ASSERT_TRUE(close(fds[i]) == 0) TH_LOG("#%d: close failed", i); + } +} + +TEST_F(ip_local_port_range, single_port_range) +{ + const struct test { + __u16 range_lo; + __u16 range_hi; + __u16 expected; + } tests[] = { + /* single port range within ephemeral range */ + { 45000, 45000, 45000 }, + /* first port in the ephemeral range (clamp from above) */ + { 0, 40000, 40000 }, + /* last port in the ephemeral range (clamp from below) */ + { 49999, 0, 49999 }, + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + int fd, err, port; + __u32 range; + + TH_LOG("lo %5hu, hi %5hu, expected %5hu", + t->range_lo, t->range_hi, t->expected); + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(t->range_lo, t->range_hi); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("bind failed"); + + port = get_sock_port(fd); + ASSERT_EQ(port, t->expected) TH_LOG("unexpected local port"); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); + } +} + +TEST_F(ip_local_port_range, exhaust_8_port_range) +{ + __u8 port_set = 0; + int i, fd, err; + __u32 range; + __u16 port; + int fds[8]; + + for (i = 0; i < ARRAY_SIZE(fds); i++) { + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(40000, 40007); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("bind failed"); + + port = get_sock_port(fd); + ASSERT_GE(port, 40000) TH_LOG("expected port within sockopt range"); + ASSERT_LE(port, 40007) TH_LOG("expected port within sockopt range"); + + port_set |= 1 << (port - 40000); + fds[i] = fd; + } + + /* Check that all every port from the test range is in use */ + ASSERT_EQ(port_set, 0xff) TH_LOG("expected all ports to be busy"); + + /* Check that bind() fails because the whole range is busy */ + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(40000, 40007); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(err) TH_LOG("expected bind to fail"); + ASSERT_EQ(errno, EADDRINUSE); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); + + for (i = 0; i < ARRAY_SIZE(fds); i++) { + err = close(fds[i]); + ASSERT_TRUE(!err) TH_LOG("close failed"); + } +} + +TEST_F(ip_local_port_range, late_bind) +{ + union { + struct sockaddr sa; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + socklen_t addr_len; + const int one = 1; + int fd, err; + __u32 range; + __u16 port; + + if (variant->so_protocol == IPPROTO_SCTP) + SKIP(return, "SCTP doesn't support IP_BIND_ADDRESS_NO_PORT"); + + fd = socket(variant->so_domain, variant->so_type, 0); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(40100, 40199); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_BIND_ADDRESS_NO_PORT) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("bind failed"); + + port = get_sock_port(fd); + ASSERT_EQ(port, 0) TH_LOG("getsockname failed"); + + /* Invalid destination */ + memset(&addr, 0, sizeof(addr)); + switch (variant->so_domain) { + case AF_INET: + addr.v4.sin_family = AF_INET; + addr.v4.sin_port = htons(0); + addr.v4.sin_addr.s_addr = htonl(INADDR_ANY); + addr_len = sizeof(addr.v4); + break; + case AF_INET6: + addr.v6.sin6_family = AF_INET6; + addr.v6.sin6_port = htons(0); + addr.v6.sin6_addr = in6addr_any; + addr_len = sizeof(addr.v6); + break; + default: + ASSERT_TRUE(false) TH_LOG("unsupported socket domain"); + } + + /* connect() doesn't need to succeed for late bind to happen */ + connect(fd, &addr.sa, addr_len); + + port = get_sock_port(fd); + ASSERT_GE(port, 40100); + ASSERT_LE(port, 40199); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); +} + +TEST_F(ip_local_port_range, get_port_range) +{ + __u16 lo, hi; + __u32 range; + int fd, err; + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + /* Get range before it will be set */ + err = get_ip_local_port_range(fd, &range); + ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed"); + + unpack_port_range(range, &lo, &hi); + ASSERT_EQ(lo, 0) TH_LOG("unexpected low port"); + ASSERT_EQ(hi, 0) TH_LOG("unexpected high port"); + + range = pack_port_range(12345, 54321); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + /* Get range after it has been set */ + err = get_ip_local_port_range(fd, &range); + ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed"); + + unpack_port_range(range, &lo, &hi); + ASSERT_EQ(lo, 12345) TH_LOG("unexpected low port"); + ASSERT_EQ(hi, 54321) TH_LOG("unexpected high port"); + + /* Unset the port range */ + range = pack_port_range(0, 0); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + /* Get range after it has been unset */ + err = get_ip_local_port_range(fd, &range); + ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed"); + + unpack_port_range(range, &lo, &hi); + ASSERT_EQ(lo, 0) TH_LOG("unexpected low port"); + ASSERT_EQ(hi, 0) TH_LOG("unexpected high port"); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh new file mode 100755 index 000000000000..6c6ad346eaa0 --- /dev/null +++ b/tools/testing/selftests/net/ip_local_port_range.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range' diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index dca1e6f777a8..f11756e7df2f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -12,19 +12,27 @@ # In addition this script also checks if forcing a specific field in the # outer header is working. +# Return 4 by default (Kselftest SKIP code) +ERR=4 + if [ "$(id -u)" != "0" ]; then echo "Please run as root." - exit 0 + exit $ERR fi if ! which tcpdump > /dev/null 2>&1; then echo "No tcpdump found. Required for this test." - exit 0 + exit $ERR fi expected_tos="0x00" expected_ttl="0" failed=false +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) + +RUN_NS0="ip netns exec ${NS0}" + get_random_tos() { # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4 echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\ @@ -61,7 +69,6 @@ setup() { local vlan="$5" local test_tos="0x00" local test_ttl="0" - local ns="ip netns exec testing" # We don't want a test-tos of 0x00, # because this is the value that we get when no tos is set. @@ -94,14 +101,15 @@ setup() { printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan" - # Create 'testing' netns, veth pair and connect main ns with testing ns - ip netns add testing - ip link add type veth - ip link set veth1 netns testing - ip link set veth0 up - $ns ip link set veth1 up - ip addr flush dev veth0 - $ns ip addr flush dev veth1 + # Create netns NS0 and NS1 and connect them with a veth pair + ip netns add "${NS0}" + ip netns add "${NS1}" + ip link add name veth0 netns "${NS0}" type veth \ + peer name veth1 netns "${NS1}" + ip -netns "${NS0}" link set dev veth0 up + ip -netns "${NS1}" link set dev veth1 up + ip -netns "${NS0}" address flush dev veth0 + ip -netns "${NS1}" address flush dev veth1 local local_addr1="" local local_addr2="" @@ -127,51 +135,59 @@ setup() { if [ "$type" = "gre" ]; then type="gretap" fi - ip addr add 198.18.0.1/24 dev veth0 - $ns ip addr add 198.18.0.2/24 dev veth1 - ip link add name tep0 type $type $local_addr1 remote \ - 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 remote \ - 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve + ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0 + ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1 + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote 198.18.0.2 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote 198.18.0.1 tos $test_tos ttl $test_ttl \ + $vxlan $geneve elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 - ip link add name tep0 type $type $local_addr1 \ - remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ - $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 \ - remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \ - $vxlan $geneve + ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \ + dev veth0 nodad + ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \ + dev veth1 nodad + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote fdd1:ced0:5d88:3fce::2 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote fdd1:ced0:5d88:3fce::1 tos $test_tos \ + ttl $test_ttl $vxlan $geneve fi # Bring L2-tunnel link up and create VLAN on top - ip link set tep0 up - $ns ip link set tep1 up - ip addr flush dev tep0 - $ns ip addr flush dev tep1 + ip -netns "${NS0}" link set tep0 up + ip -netns "${NS1}" link set tep1 up + ip -netns "${NS0}" address flush dev tep0 + ip -netns "${NS1}" address flush dev tep1 local parent if $vlan; then parent="vlan99-" - ip link add link tep0 name ${parent}0 type vlan id 99 - $ns ip link add link tep1 name ${parent}1 type vlan id 99 - ip link set ${parent}0 up - $ns ip link set ${parent}1 up - ip addr flush dev ${parent}0 - $ns ip addr flush dev ${parent}1 + ip -netns "${NS0}" link add link tep0 name ${parent}0 \ + type vlan id 99 + ip -netns "${NS1}" link add link tep1 name ${parent}1 \ + type vlan id 99 + ip -netns "${NS0}" link set dev ${parent}0 up + ip -netns "${NS1}" link set dev ${parent}1 up + ip -netns "${NS0}" address flush dev ${parent}0 + ip -netns "${NS1}" address flush dev ${parent}1 else parent="tep" fi # Assign inner IPv4/IPv6 addresses if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then - ip addr add 198.19.0.1/24 brd + dev ${parent}0 - $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 + ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0 + ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 + ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \ + dev ${parent}0 nodad + ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \ + dev ${parent}1 nodad fi } @@ -192,10 +208,10 @@ verify() { ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6 fi if [ "$tos_ttl" = "inherit" ]; then - ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \ - 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \ + -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!" else - ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" fi local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset if [ "$type" = "gre" ]; then @@ -216,10 +232,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x01 and \ - ip[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x01 and \ + ip[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then req_proto_offset="44" req_offset="78" @@ -231,10 +249,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x3a and \ - ip[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x3a and \ + ip[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then req_proto_offset="36" req_offset="45" @@ -250,11 +270,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x08 and \ - ip[$((req_proto_offset + 1))] = 0x06 and \ - ip[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x08 and \ + ip[$((req_proto_offset + 1))] = 0x06 and \ + ip[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then @@ -273,10 +295,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x01 and \ - ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x01 and \ + ip6[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then local req_proto_offset="72" local req_offset="106" @@ -288,10 +312,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x3a and \ - ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x3a and \ + ip6[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then local req_proto_offset="64" local req_offset="73" @@ -307,15 +333,17 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x08 and \ - ip6[$((req_proto_offset + 1))] = 0x06 and \ - ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x08 and \ + ip6[$((req_proto_offset + 1))] = 0x06 and \ + ip6[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi fi kill -9 $ping_pid - wait $ping_pid 2>/dev/null + wait $ping_pid 2>/dev/null || true result="FAIL" if [ "$outer" = "4" ]; then captured_ttl="$(get_field "ttl" "$out")" @@ -351,11 +379,35 @@ verify() { } cleanup() { - ip link del veth0 2>/dev/null - ip netns del testing 2>/dev/null - ip link del tep0 2>/dev/null + ip netns del "${NS0}" 2>/dev/null + ip netns del "${NS1}" 2>/dev/null } +exit_handler() { + # Don't exit immediately if one of the intermediate commands fails. + # We might be called at the end of the script, when the network + # namespaces have already been deleted. So cleanup() may fail, but we + # still need to run until 'exit $ERR' or the script won't return the + # correct error code. + set +e + + cleanup + + exit $ERR +} + +# Restore the default SIGINT handler (just in case) and exit. +# The exit handler will take care of cleaning everything up. +interrupted() { + trap - INT + + exit $ERR +} + +set -e +trap exit_handler EXIT +trap interrupted INT + printf "┌────────┬───────┬───────┬──────────────┬" printf "──────────────┬───────┬────────┐\n" for type in gre vxlan geneve; do @@ -385,6 +437,10 @@ done printf "└────────┴───────┴───────┴──────────────┴" printf "──────────────┴───────┴────────┘\n" +# All tests done. +# Set ERR appropriately: it will be returned by the exit handler. if $failed; then - exit 1 + ERR=1 +else + ERR=0 fi diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 24bcd7b9bdb2..ef628b16fe9b 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -17,6 +17,11 @@ flush_pids() sleep 1.1 ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null + + for _ in $(seq 10); do + [ -z "$(ip netns pids "${ns}")" ] && break + sleep 0.1 + done } cleanup() @@ -37,15 +42,20 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +get_msk_inuse() +{ + ip netns exec $ns cat /proc/net/protocols | awk '$1~/^MPTCP$/{print $3}' +} + __chk_nr() { - local condition="$1" + local command="$1" local expected=$2 local msg nr shift 2 msg=$* - nr=$(ss -inmHMN $ns | $condition) + nr=$(eval $command) printf "%-50s" "$msg" if [ $nr != $expected ]; then @@ -57,9 +67,17 @@ __chk_nr() test_cnt=$((test_cnt+1)) } +__chk_msk_nr() +{ + local condition=$1 + shift 1 + + __chk_nr "ss -inmHMN $ns | $condition" $* +} + chk_msk_nr() { - __chk_nr "grep -c token:" $* + __chk_msk_nr "grep -c token:" $* } wait_msk_nr() @@ -97,12 +115,12 @@ wait_msk_nr() chk_msk_fallback_nr() { - __chk_nr "grep -c fallback" $* + __chk_msk_nr "grep -c fallback" $* } chk_msk_remote_key_nr() { - __chk_nr "grep -c remote_key" $* + __chk_msk_nr "grep -c remote_key" $* } __chk_listen() @@ -142,6 +160,26 @@ chk_msk_listen() nr=$(ss -Ml $filter | wc -l) } +chk_msk_inuse() +{ + local expected=$1 + local listen_nr + + shift 1 + + listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) + expected=$((expected + listen_nr)) + + for _ in $(seq 10); do + if [ $(get_msk_inuse) -eq $expected ];then + break + fi + sleep 0.1 + done + + __chk_nr get_msk_inuse $expected $* +} + # $1: ns, $2: port wait_local_port_listen() { @@ -195,8 +233,10 @@ wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" +chk_msk_inuse 2 "....chk 2 msk in use" flush_pids +chk_msk_inuse 0 "....chk 0 msk in use after flush" echo "a" | \ timeout ${timeout_test} \ @@ -211,8 +251,11 @@ echo "b" | \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" +chk_msk_inuse 1 "....chk 1 msk in use" flush_pids +chk_msk_inuse 0 "....chk 0 msk in use after flush" + NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do echo "a" | \ @@ -232,6 +275,9 @@ for I in `seq 1 $NR_CLIENTS`; do done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" flush_pids +chk_msk_inuse 0 "....chk 0 msk in use after flush" + exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 8a8266957bc5..b25a31445ded 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -627,7 +627,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, char rbuf[8192]; ssize_t len; - if (fds.events == 0) + if (fds.events == 0 || quit) break; switch (poll(&fds, 1, poll_timeout)) { @@ -733,7 +733,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, } /* leave some time for late join/announce */ - if (cfg_remove) + if (cfg_remove && !quit) usleep(cfg_wait); return 0; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index d11d3d566608..387abdcec011 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -774,24 +774,17 @@ do_transfer() addr_nr_ns2=${addr_nr_ns2:9} fi - local local_addr - if is_v6 "${connect_addr}"; then - local_addr="::" - else - local_addr="0.0.0.0" - fi - extra_srv_args="$extra_args $extra_srv_args" if [ "$test_link_fail" -gt 1 ];then timeout ${timeout_test} \ ip netns exec ${listener_ns} \ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args ${local_addr} < "$sinfail" > "$sout" & + $extra_srv_args "::" < "$sinfail" > "$sout" & else timeout ${timeout_test} \ ip netns exec ${listener_ns} \ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args ${local_addr} < "$sin" > "$sout" & + $extra_srv_args "::" < "$sin" > "$sout" & fi local spid=$! @@ -2448,6 +2441,47 @@ v4mapped_tests() fi } +mixed_tests() +{ + if reset "IPv4 sockets do not use IPv6 addresses"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + fi + + # Need an IPv6 mptcp socket to allow subflows of both families + if reset "simult IPv4 and IPv6 subflows"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow + chk_join_nr 1 1 1 + fi + + # cross families subflows will not be created even in fullmesh mode + if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow + chk_join_nr 1 1 1 + fi + + # fullmesh still tries to create all the possibly subflows with + # matching family + if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 2 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + run_tests $ns1 $ns2 dead:beef:1::1 0 0 fullmesh_1 slow + chk_join_nr 4 4 4 + fi +} + backup_tests() { # single subflow, backup @@ -3120,6 +3154,7 @@ all_tests_sorted=( a@add_tests 6@ipv6_tests 4@v4mapped_tests + M@mixed_tests b@backup_tests p@add_addr_ports_tests k@syncookies_tests diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index a29deb9fa024..66c5be25c13d 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -43,41 +43,40 @@ rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" +print_title() +{ + stdbuf -o0 -e0 printf "INFO: %s\n" "${1}" +} + kill_wait() { + [ $1 -eq 0 ] && return 0 + + kill -SIGUSR1 $1 > /dev/null 2>&1 kill $1 > /dev/null 2>&1 wait $1 2>/dev/null } cleanup() { - echo "cleanup" - - rm -rf $file $client_evts $server_evts + print_title "Cleanup" # Terminate the MPTCP connection and related processes - if [ $client4_pid -ne 0 ]; then - kill -SIGUSR1 $client4_pid > /dev/null 2>&1 - fi - if [ $server4_pid -ne 0 ]; then - kill_wait $server4_pid - fi - if [ $client6_pid -ne 0 ]; then - kill -SIGUSR1 $client6_pid > /dev/null 2>&1 - fi - if [ $server6_pid -ne 0 ]; then - kill_wait $server6_pid - fi - if [ $server_evts_pid -ne 0 ]; then - kill_wait $server_evts_pid - fi - if [ $client_evts_pid -ne 0 ]; then - kill_wait $client_evts_pid - fi + local pid + for pid in $client4_pid $server4_pid $client6_pid $server6_pid\ + $server_evts_pid $client_evts_pid + do + kill_wait $pid + done + local netns for netns in "$ns1" "$ns2" ;do ip netns del "$netns" done + + rm -rf $file $client_evts $server_evts + + stdbuf -o0 -e0 printf "Done\n" } trap cleanup EXIT @@ -108,6 +107,7 @@ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad ip -net "$ns2" link set ns2eth1 up +print_title "Init" stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2 \t\t\t[OK]\n" make_file() @@ -193,11 +193,16 @@ make_connection() server_serverside=$(grep "type:1," "$server_evts" | sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q') + stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1 \t\t" $is_v6 if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && [ "$server_serverside" = 1 ] then - stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1 \t\t[OK]\n" $is_v6 + stdbuf -o0 -e0 printf "[OK]\n" else + stdbuf -o0 -e0 printf "[FAIL]\n" + stdbuf -o0 -e0 printf "\tExpected tokens (c:%s - s:%s) and server (c:%d - s:%d)\n" \ + "${client_token}" "${server_token}" \ + "${client_serverside}" "${server_serverside}" exit 1 fi @@ -217,6 +222,48 @@ make_connection() fi } +# $1: var name ; $2: prev ret +check_expected_one() +{ + local var="${1}" + local exp="e_${var}" + local prev_ret="${2}" + + if [ "${!var}" = "${!exp}" ] + then + return 0 + fi + + if [ "${prev_ret}" = "0" ] + then + stdbuf -o0 -e0 printf "[FAIL]\n" + fi + + stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \ + "${var}" "${!var}" "${!exp}" + return 1 +} + +# $@: all var names to check +check_expected() +{ + local ret=0 + local var + + for var in "${@}" + do + check_expected_one "${var}" "${ret}" || ret=1 + done + + if [ ${ret} -eq 0 ] + then + stdbuf -o0 -e0 printf "[OK]\n" + return 0 + fi + + exit 1 +} + verify_announce_event() { local evt=$1 @@ -242,19 +289,14 @@ verify_announce_event() fi dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] && - [ "$addr" = "$e_addr" ] && [ "$dport" = "$e_dport" ] && - [ "$id" = "$e_id" ] - then - stdbuf -o0 -e0 printf "[OK]\n" - return 0 - fi - stdbuf -o0 -e0 printf "[FAIL]\n" - exit 1 + + check_expected "type" "token" "addr" "dport" "id" } test_announce() { + print_title "Announce tests" + # Capture events on the network namespace running the server :>"$server_evts" @@ -270,7 +312,7 @@ test_announce() then stdbuf -o0 -e0 printf "[OK]\n" else - stdbuf -o0 -e0 printf "[FAIL]\n" + stdbuf -o0 -e0 printf "[FAIL]\n\ttype defined: %s\n" "${type}" exit 1 fi @@ -347,18 +389,14 @@ verify_remove_event() type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] && - [ "$id" = "$e_id" ] - then - stdbuf -o0 -e0 printf "[OK]\n" - return 0 - fi - stdbuf -o0 -e0 printf "[FAIL]\n" - exit 1 + + check_expected "type" "token" "id" } test_remove() { + print_title "Remove tests" + # Capture events on the network namespace running the server :>"$server_evts" @@ -507,20 +545,13 @@ verify_subflow_events() daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") fi - if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] && - [ "$daddr" = "$e_daddr" ] && [ "$e_dport" = "$dport" ] && - [ "$family" = "$e_family" ] && [ "$saddr" = "$e_saddr" ] && - [ "$e_locid" = "$locid" ] && [ "$e_remid" = "$remid" ] - then - stdbuf -o0 -e0 printf "[OK]\n" - return 0 - fi - stdbuf -o0 -e0 printf "[FAIL]\n" - exit 1 + check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid" } test_subflows() { + print_title "Subflows v4 or v6 only tests" + # Capture events on the network namespace running the server :>"$server_evts" @@ -752,8 +783,58 @@ test_subflows() "$server4_token" > /dev/null 2>&1 } +test_subflows_v4_v6_mix() +{ + print_title "Subflows v4 and v6 mix tests" + + # Attempt to add a listener at 10.0.2.1:<subflow-port> + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $app6_port > /dev/null 2>&1 & + local listener_pid=$! + + # ADD_ADDR4 from server to client machine reusing the subflow port on + # the established v6 connection + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ + $server_addr_id dev ns1eth2 > /dev/null 2>&1 + stdbuf -o0 -e0 printf "ADD_ADDR4 id:%d 10.0.2.1 (ns1) => ns2, reuse port\t\t" $server_addr_id + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ + "$server_addr_id" "$app6_port" + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + kill_wait $listener_pid + + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server6_token" > /dev/null 2>&1 + sleep 0.5 +} + test_prio() { + print_title "Prio tests" + local count # Send MP_PRIO signal from client to server machine @@ -765,7 +846,7 @@ test_prio() count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') [ -z "$count" ] && count=0 if [ $count != 1 ]; then - stdbuf -o0 -e0 printf "[FAIL]\n" + stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}" exit 1 else stdbuf -o0 -e0 printf "[OK]\n" @@ -776,7 +857,7 @@ test_prio() count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') [ -z "$count" ] && count=0 if [ $count != 1 ]; then - stdbuf -o0 -e0 printf "[FAIL]\n" + stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}" exit 1 else stdbuf -o0 -e0 printf "[OK]\n" @@ -817,19 +898,13 @@ verify_listener_events() sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') fi - if [ $type ] && [ $type = $e_type ] && - [ $family ] && [ $family = $e_family ] && - [ $saddr ] && [ $saddr = $e_saddr ] && - [ $sport ] && [ $sport = $e_sport ]; then - stdbuf -o0 -e0 printf "[OK]\n" - return 0 - fi - stdbuf -o0 -e0 printf "[FAIL]\n" - exit 1 + check_expected "type" "family" "saddr" "sport" } test_listener() { + print_title "Listener tests" + # Capture events on the network namespace running the client :>$client_evts @@ -856,11 +931,14 @@ test_listener() verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port } +print_title "Make connections" make_connection make_connection "v6" + test_announce test_remove test_subflows +test_subflows_v4_v6_mix test_prio test_listener diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/nat6to4.c index ac54c36b25fc..ac54c36b25fc 100644 --- a/tools/testing/selftests/net/bpf/nat6to4.c +++ b/tools/testing/selftests/net/nat6to4.c diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 00f837c9bc6c..46a02bbd31d0 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -137,7 +137,8 @@ static void *mmap_large_buffer(size_t need, size_t *allocated) if (buffer == (void *)-1) { sz = need; buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, + -1, 0); if (buffer != (void *)-1) fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n"); } diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c index 90026a27eac0..9ba03164d73a 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/net/toeplitz.c @@ -215,7 +215,7 @@ static char *recv_frame(const struct ring_state *ring, char *frame) } /* A single TPACKET_V3 block can hold multiple frames */ -static void recv_block(struct ring_state *ring) +static bool recv_block(struct ring_state *ring) { struct tpacket_block_desc *block; char *frame; @@ -223,7 +223,7 @@ static void recv_block(struct ring_state *ring) block = (void *)(ring->mmap + ring->idx * ring_block_sz); if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) - return; + return false; frame = (char *)block; frame += block->hdr.bh1.offset_to_first_pkt; @@ -235,6 +235,8 @@ static void recv_block(struct ring_state *ring) block->hdr.bh1.block_status = TP_STATUS_KERNEL; ring->idx = (ring->idx + 1) % ring_block_nr; + + return true; } /* simple test: sleep once unconditionally and then process all rings */ @@ -245,7 +247,7 @@ static void process_rings(void) usleep(1000 * cfg_timeout_msec); for (i = 0; i < num_cpus; i++) - recv_block(&rings[i]); + do {} while (recv_block(&rings[i])); fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", frames_received - frames_nohash - frames_error, @@ -257,12 +259,12 @@ static char *setup_ring(int fd) struct tpacket_req3 req3 = {0}; void *ring; - req3.tp_retire_blk_tov = cfg_timeout_msec; + req3.tp_retire_blk_tov = cfg_timeout_msec / 8; req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; req3.tp_frame_size = 2048; req3.tp_frame_nr = 1 << 10; - req3.tp_block_nr = 2; + req3.tp_block_nr = 16; req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; req3.tp_block_size /= req3.tp_block_nr; diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index c9c4b9d65839..0a6359bed0b9 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -40,8 +40,8 @@ run_one() { ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact - tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action - tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action + tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action + tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & @@ -88,8 +88,8 @@ if [ ! -f ${BPF_FILE} ]; then exit -1 fi -if [ ! -f bpf/nat6to4.o ]; then - echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first" +if [ ! -f nat6to4.o ]; then + echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first" exit -1 fi diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh index a7f62ad4f661..2ffba45a78bf 100755 --- a/tools/testing/selftests/netfilter/nft_trans_stress.sh +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -10,12 +10,20 @@ ksft_skip=4 testns=testns-$(mktemp -u "XXXXXXXX") +tmp="" tables="foo bar baz quux" global_ret=0 eret=0 lret=0 +cleanup() { + ip netns pids "$testns" | xargs kill 2>/dev/null + ip netns del "$testns" + + rm -f "$tmp" +} + check_result() { local r=$1 @@ -43,6 +51,7 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +trap cleanup EXIT tmp=$(mktemp) for table in $tables; do @@ -139,11 +148,4 @@ done check_result $lret "add/delete with nftrace enabled" -pkill -9 ping - -wait - -rm -f "$tmp" -ip netns del "$testns" - exit $global_ret diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/netfilter/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index d95b1cb43d9d..7588428b8fcd 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -25,6 +25,7 @@ #undef NDEBUG #include <assert.h> #include <errno.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -41,7 +42,7 @@ * 1: vsyscall VMA is --xp vsyscall=xonly * 2: vsyscall VMA is r-xp vsyscall=emulate */ -static int g_vsyscall; +static volatile int g_vsyscall; static const char *g_proc_pid_maps_vsyscall; static const char *g_proc_pid_smaps_vsyscall; @@ -147,11 +148,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 69551bfa215c..cacbd2a4aec9 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -257,11 +257,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index f8293c6910c9..43a254f0e14d 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -all: test +all: test vsock_perf test: vsock_test vsock_diag_test vsock_test: vsock_test.o timeout.o control.o util.o vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o +vsock_perf: vsock_perf.o CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 4d5045e7d2c3..84ee217ba8ee 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -35,3 +35,37 @@ Invoke test binaries in both directions as follows: --control-port=$GUEST_IP \ --control-port=1234 \ --peer-cid=3 + +vsock_perf utility +------------------- +'vsock_perf' is a simple tool to measure vsock performance. It works in +sender/receiver modes: sender connect to peer at the specified port and +starts data transmission to the receiver. After data processing is done, +it prints several metrics(see below). + +Usage: +# run as sender +# connect to CID 2, port 1234, send 1G of data, tx buf size is 1M +./vsock_perf --sender 2 --port 1234 --bytes 1G --buf-size 1M + +Output: +tx performance: A Gbits/s + +Output explanation: +A is calculated as "number of bits to send" / "time in tx loop" + +# run as receiver +# listen port 1234, rx buf size is 1M, socket buf size is 1G, SO_RCVLOWAT is 64K +./vsock_perf --port 1234 --buf-size 1M --vsk-size 1G --rcvlowat 64K + +Output: +rx performance: A Gbits/s +total in 'read()': B sec +POLLIN wakeups: C +average in 'read()': D ns + +Output explanation: +A is calculated as "number of received bits" / "time in rx loop". +B is time, spent in 'read()' system call(excluding 'poll()') +C is number of 'poll()' wake ups with POLLIN bit set. +D is B / C, e.g. average amount of time, spent in single 'read()'. diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c index 4874872fc5a3..d2deb4b15b94 100644 --- a/tools/testing/vsock/control.c +++ b/tools/testing/vsock/control.c @@ -141,6 +141,34 @@ void control_writeln(const char *str) timeout_end(); } +void control_writeulong(unsigned long value) +{ + char str[32]; + + if (snprintf(str, sizeof(str), "%lu", value) >= sizeof(str)) { + perror("snprintf"); + exit(EXIT_FAILURE); + } + + control_writeln(str); +} + +unsigned long control_readulong(void) +{ + unsigned long value; + char *str; + + str = control_readln(); + + if (!str) + exit(EXIT_FAILURE); + + value = strtoul(str, NULL, 10); + free(str); + + return value; +} + /* Return the next line from the control socket (without the trailing newline). * * The program terminates if a timeout occurs. diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h index 51814b4f9ac1..c1f77fdb2c7a 100644 --- a/tools/testing/vsock/control.h +++ b/tools/testing/vsock/control.h @@ -9,7 +9,9 @@ void control_init(const char *control_host, const char *control_port, void control_cleanup(void); void control_writeln(const char *str); char *control_readln(void); +unsigned long control_readulong(void); void control_expectln(const char *str); bool control_cmpln(char *line, const char *str, bool fail); +void control_writeulong(unsigned long value); #endif /* CONTROL_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 2acbb7703c6a..01b636d3039a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -395,3 +395,16 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, test_cases[test_id].skip = true; } + +unsigned long hash_djb2(const void *data, size_t len) +{ + unsigned long hash = 5381; + int i = 0; + + while (i < len) { + hash = ((hash << 5) + hash) + ((unsigned char *)data)[i]; + i++; + } + + return hash; +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index a3375ad2fb7f..fb99208a95ea 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -49,4 +49,5 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +unsigned long hash_djb2(const void *data, size_t len); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c new file mode 100644 index 000000000000..a72520338f84 --- /dev/null +++ b/tools/testing/vsock/vsock_perf.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vsock_perf - benchmark utility for vsock. + * + * Copyright (C) 2022 SberDevices. + * + * Author: Arseniy Krasnov <AVKrasnov@sberdevices.ru> + */ +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <stdint.h> +#include <poll.h> +#include <sys/socket.h> +#include <linux/vm_sockets.h> + +#define DEFAULT_BUF_SIZE_BYTES (128 * 1024) +#define DEFAULT_TO_SEND_BYTES (64 * 1024) +#define DEFAULT_VSOCK_BUF_BYTES (256 * 1024) +#define DEFAULT_RCVLOWAT_BYTES 1 +#define DEFAULT_PORT 1234 + +#define BYTES_PER_GB (1024 * 1024 * 1024ULL) +#define NSEC_PER_SEC (1000000000ULL) + +static unsigned int port = DEFAULT_PORT; +static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; +static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; + +static void error(const char *s) +{ + perror(s); + exit(EXIT_FAILURE); +} + +static time_t current_nsec(void) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts)) + error("clock_gettime"); + + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; +} + +/* From lib/cmdline.c. */ +static unsigned long memparse(const char *ptr) +{ + char *endptr; + + unsigned long long ret = strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + return ret; +} + +static void vsock_increase_buf_size(int fd) +{ + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) + error("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) + error("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); +} + +static int vsock_connect(unsigned int cid, unsigned int port) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = cid, + }, + }; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) { + perror("socket"); + return -1; + } + + if (connect(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("connect"); + close(fd); + return -1; + } + + return fd; +} + +static float get_gbps(unsigned long bits, time_t ns_delta) +{ + return ((float)bits / 1000000000ULL) / + ((float)ns_delta / NSEC_PER_SEC); +} + +static void run_receiver(unsigned long rcvlowat_bytes) +{ + unsigned int read_cnt; + time_t rx_begin_ns; + time_t in_read_ns; + size_t total_recv; + int client_fd; + char *data; + int fd; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = VMADDR_CID_ANY, + }, + }; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } clientaddr; + + socklen_t clientaddr_len = sizeof(clientaddr.svm); + + printf("Run as receiver\n"); + printf("Listen port %u\n", port); + printf("RX buffer %lu bytes\n", buf_size_bytes); + printf("vsock buffer %lu bytes\n", vsock_buf_bytes); + printf("SO_RCVLOWAT %lu bytes\n", rcvlowat_bytes); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) + error("socket"); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) + error("bind"); + + if (listen(fd, 1) < 0) + error("listen"); + + client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); + + if (client_fd < 0) + error("accept"); + + vsock_increase_buf_size(client_fd); + + if (setsockopt(client_fd, SOL_SOCKET, SO_RCVLOWAT, + &rcvlowat_bytes, + sizeof(rcvlowat_bytes))) + error("setsockopt(SO_RCVLOWAT)"); + + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } + + read_cnt = 0; + in_read_ns = 0; + total_recv = 0; + rx_begin_ns = current_nsec(); + + while (1) { + struct pollfd fds = { 0 }; + + fds.fd = client_fd; + fds.events = POLLIN | POLLERR | + POLLHUP | POLLRDHUP; + + if (poll(&fds, 1, -1) < 0) + error("poll"); + + if (fds.revents & POLLERR) { + fprintf(stderr, "'poll()' error\n"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLIN) { + ssize_t bytes_read; + time_t t; + + t = current_nsec(); + bytes_read = read(fds.fd, data, buf_size_bytes); + in_read_ns += (current_nsec() - t); + read_cnt++; + + if (!bytes_read) + break; + + if (bytes_read < 0) { + perror("read"); + exit(EXIT_FAILURE); + } + + total_recv += bytes_read; + } + + if (fds.revents & (POLLHUP | POLLRDHUP)) + break; + } + + printf("total bytes received: %zu\n", total_recv); + printf("rx performance: %f Gbits/s\n", + get_gbps(total_recv * 8, current_nsec() - rx_begin_ns)); + printf("total time in 'read()': %f sec\n", (float)in_read_ns / NSEC_PER_SEC); + printf("average time in 'read()': %f ns\n", (float)in_read_ns / read_cnt); + printf("POLLIN wakeups: %i\n", read_cnt); + + free(data); + close(client_fd); + close(fd); +} + +static void run_sender(int peer_cid, unsigned long to_send_bytes) +{ + time_t tx_begin_ns; + time_t tx_total_ns; + size_t total_send; + void *data; + int fd; + + printf("Run as sender\n"); + printf("Connect to %i:%u\n", peer_cid, port); + printf("Send %lu bytes\n", to_send_bytes); + printf("TX buffer %lu bytes\n", buf_size_bytes); + + fd = vsock_connect(peer_cid, port); + + if (fd < 0) + exit(EXIT_FAILURE); + + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } + + memset(data, 0, buf_size_bytes); + total_send = 0; + tx_begin_ns = current_nsec(); + + while (total_send < to_send_bytes) { + ssize_t sent; + + sent = write(fd, data, buf_size_bytes); + + if (sent <= 0) + error("write"); + + total_send += sent; + } + + tx_total_ns = current_nsec() - tx_begin_ns; + + printf("total bytes sent: %zu\n", total_send); + printf("tx performance: %f Gbits/s\n", + get_gbps(total_send * 8, tx_total_ns)); + printf("total time in 'write()': %f sec\n", + (float)tx_total_ns / NSEC_PER_SEC); + + close(fd); + free(data); +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "help", + .has_arg = no_argument, + .val = 'H', + }, + { + .name = "sender", + .has_arg = required_argument, + .val = 'S', + }, + { + .name = "port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "bytes", + .has_arg = required_argument, + .val = 'M', + }, + { + .name = "buf-size", + .has_arg = required_argument, + .val = 'B', + }, + { + .name = "vsk-size", + .has_arg = required_argument, + .val = 'V', + }, + { + .name = "rcvlowat", + .has_arg = required_argument, + .val = 'R', + }, + {}, +}; + +static void usage(void) +{ + printf("Usage: ./vsock_perf [--help] [options]\n" + "\n" + "This is benchmarking utility, to test vsock performance.\n" + "It runs in two modes: sender or receiver. In sender mode, it\n" + "connects to the specified CID and starts data transmission.\n" + "\n" + "Options:\n" + " --help This message\n" + " --sender <cid> Sender mode (receiver default)\n" + " <cid> of the receiver to connect to\n" + " --port <port> Port (default %d)\n" + " --bytes <bytes>KMG Bytes to send (default %d)\n" + " --buf-size <bytes>KMG Data buffer size (default %d). In sender mode\n" + " it is the buffer size, passed to 'write()'. In\n" + " receiver mode it is the buffer size passed to 'read()'.\n" + " --vsk-size <bytes>KMG Socket buffer size (default %d)\n" + " --rcvlowat <bytes>KMG SO_RCVLOWAT value (default %d)\n" + "\n", DEFAULT_PORT, DEFAULT_TO_SEND_BYTES, + DEFAULT_BUF_SIZE_BYTES, DEFAULT_VSOCK_BUF_BYTES, + DEFAULT_RCVLOWAT_BYTES); + exit(EXIT_FAILURE); +} + +static long strtolx(const char *arg) +{ + long value; + char *end; + + value = strtol(arg, &end, 10); + + if (end != arg + strlen(arg)) + usage(); + + return value; +} + +int main(int argc, char **argv) +{ + unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES; + unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; + int peer_cid = -1; + bool sender = false; + + while (1) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'V': /* Peer buffer size. */ + vsock_buf_bytes = memparse(optarg); + break; + case 'R': /* SO_RCVLOWAT value. */ + rcvlowat_bytes = memparse(optarg); + break; + case 'P': /* Port to connect to. */ + port = strtolx(optarg); + break; + case 'M': /* Bytes to send. */ + to_send_bytes = memparse(optarg); + break; + case 'B': /* Size of rx/tx buffer. */ + buf_size_bytes = memparse(optarg); + break; + case 'S': /* Sender mode. CID to connect to. */ + peer_cid = strtolx(optarg); + sender = true; + break; + case 'H': /* Help. */ + usage(); + break; + default: + usage(); + } + } + + if (!sender) + run_receiver(rcvlowat_bytes); + else + run_sender(peer_cid, to_send_bytes); + + return 0; +} diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index bb6d691cb30d..67e9f9df3a8c 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -284,10 +284,14 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) close(fd); } -#define MESSAGES_CNT 7 -#define MSG_EOR_IDX (MESSAGES_CNT / 2) +#define SOCK_BUF_SIZE (2 * 1024 * 1024) +#define MAX_MSG_SIZE (32 * 1024) + static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { + unsigned long curr_hash; + int page_size; + int msg_count; int fd; fd = vsock_seqpacket_connect(opts->peer_cid, 1234); @@ -296,18 +300,79 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - /* Send several messages, one with MSG_EOR flag */ - for (int i = 0; i < MESSAGES_CNT; i++) - send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0); + /* Wait, until receiver sets buffer size. */ + control_expectln("SRVREADY"); + + curr_hash = 0; + page_size = getpagesize(); + msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE; + + for (int i = 0; i < msg_count; i++) { + ssize_t send_size; + size_t buf_size; + int flags; + void *buf; + + /* Use "small" buffers and "big" buffers. */ + if (i & 1) + buf_size = page_size + + (rand() % (MAX_MSG_SIZE - page_size)); + else + buf_size = 1 + (rand() % page_size); + + buf = malloc(buf_size); + + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + memset(buf, rand() & 0xff, buf_size); + /* Set at least one MSG_EOR + some random. */ + if (i == (msg_count / 2) || (rand() & 1)) { + flags = MSG_EOR; + curr_hash++; + } else { + flags = 0; + } + + send_size = send(fd, buf, buf_size, flags); + + if (send_size < 0) { + perror("send"); + exit(EXIT_FAILURE); + } + + if (send_size != buf_size) { + fprintf(stderr, "Invalid send size\n"); + exit(EXIT_FAILURE); + } + + /* + * Hash sum is computed at both client and server in + * the same way: + * H += hash('message data') + * Such hash "controls" both data integrity and message + * bounds. After data exchange, both sums are compared + * using control socket, and if message bounds wasn't + * broken - two values must be equal. + */ + curr_hash += hash_djb2(buf, buf_size); + free(buf); + } control_writeln("SENDDONE"); + control_writeulong(curr_hash); close(fd); } static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) { + unsigned long sock_buf_size; + unsigned long remote_hash; + unsigned long curr_hash; int fd; - char buf[16]; + char buf[MAX_MSG_SIZE]; struct msghdr msg = {0}; struct iovec iov = {0}; @@ -317,25 +382,57 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) exit(EXIT_FAILURE); } + sock_buf_size = SOCK_BUF_SIZE; + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + &sock_buf_size, sizeof(sock_buf_size))) { + perror("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + exit(EXIT_FAILURE); + } + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &sock_buf_size, sizeof(sock_buf_size))) { + perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + exit(EXIT_FAILURE); + } + + /* Ready to receive data. */ + control_writeln("SRVREADY"); + /* Wait, until peer sends whole data. */ control_expectln("SENDDONE"); iov.iov_base = buf; iov.iov_len = sizeof(buf); msg.msg_iov = &iov; msg.msg_iovlen = 1; - for (int i = 0; i < MESSAGES_CNT; i++) { - if (recvmsg(fd, &msg, 0) != 1) { - perror("message bound violated"); - exit(EXIT_FAILURE); - } + curr_hash = 0; + + while (1) { + ssize_t recv_size; - if ((i == MSG_EOR_IDX) ^ !!(msg.msg_flags & MSG_EOR)) { - perror("MSG_EOR"); + recv_size = recvmsg(fd, &msg, 0); + + if (!recv_size) + break; + + if (recv_size < 0) { + perror("recvmsg"); exit(EXIT_FAILURE); } + + if (msg.msg_flags & MSG_EOR) + curr_hash++; + + curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size); } close(fd); + remote_hash = control_readulong(); + + if (curr_hash != remote_hash) { + fprintf(stderr, "Message bounds broken\n"); + exit(EXIT_FAILURE); + } } #define MESSAGE_TRUNC_SZ 32 @@ -427,7 +524,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) tv.tv_usec = 0; if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv)) == -1) { - perror("setsockopt 'SO_RCVTIMEO'"); + perror("setsockopt(SO_RCVTIMEO)"); exit(EXIT_FAILURE); } @@ -472,6 +569,70 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) close(fd); } +static void test_seqpacket_bigmsg_client(const struct test_opts *opts) +{ + unsigned long sock_buf_size; + ssize_t send_size; + socklen_t len; + void *data; + int fd; + + len = sizeof(sock_buf_size); + + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (getsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &sock_buf_size, &len)) { + perror("getsockopt"); + exit(EXIT_FAILURE); + } + + sock_buf_size++; + + data = malloc(sock_buf_size); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + send_size = send(fd, data, sock_buf_size, 0); + if (send_size != -1) { + fprintf(stderr, "expected 'send(2)' failure, got %zi\n", + send_size); + exit(EXIT_FAILURE); + } + + if (errno != EMSGSIZE) { + fprintf(stderr, "expected EMSGSIZE in 'errno', got %i\n", + errno); + exit(EXIT_FAILURE); + } + + control_writeln("CLISENT"); + + free(data); + close(fd); +} + +static void test_seqpacket_bigmsg_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("CLISENT"); + + close(fd); +} + #define BUF_PATTERN_1 'a' #define BUF_PATTERN_2 'b' @@ -644,7 +805,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, &lowat_val, sizeof(lowat_val))) { - perror("setsockopt"); + perror("setsockopt(SO_RCVLOWAT)"); exit(EXIT_FAILURE); } @@ -754,6 +915,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_poll_rcvlowat_client, .run_server = test_stream_poll_rcvlowat_server, }, + { + .name = "SOCK_SEQPACKET big message", + .run_client = test_seqpacket_bigmsg_client, + .run_server = test_seqpacket_bigmsg_server, + }, {}, }; @@ -837,6 +1003,7 @@ int main(int argc, char **argv) .peer_cid = VMADDR_CID_ANY, }; + srand(time(NULL)); init_signals(); for (;;) { |